## Preprocessing

In [None]:
# !pip install optuna

In [None]:
import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from transformers import AlbertTokenizer, AlbertForSequenceClassification
from transformers import ElectraTokenizer, ElectraForSequenceClassification, AdamW,DebertaV2Tokenizer, DebertaV2ForSequenceClassification
from transformers import DistilBertTokenizer, DistilBertForSequenceClassification
from transformers import MobileBertTokenizer, MobileBertForSequenceClassification
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from transformers import BertTokenizer, BertForSequenceClassification
from sklearn.preprocessing import MinMaxScaler,StandardScaler
from tqdm import tqdm
import optuna
import time
import datetime

In [None]:
csv_file = "/content/filtered_bulk_r.csv"
data = pd.read_csv(csv_file)

In [None]:
data_benign = data[data['Label_Desc'] == 'Benign']
data_malicious = data[data['Label_Desc'] == 'Attack']

data_benign = data_benign.sample(n=54000)
data_malicious = data_malicious.sample(n=27000)
data_combined = pd.concat([data_benign, data_malicious])
print(data['Label_Desc'].value_counts())

Label_Desc
Benign    54960
Attack    27388
Name: count, dtype: int64


In [None]:
numeric_data = data_combined[['fw_fl_byt_s', 'bw_fl_byt_s', 'fw_fl_pkt_s', 'bw_fl_pkt_s', 'fw_pkt_s', 'bw_pkt_s']].values
labels = data_combined['Label_code'].values

def normalize_data(data):
    # scaler = MinMaxScaler()
    scaler = StandardScaler()
    return scaler.fit_transform(data)

normalized_data = normalize_data(numeric_data)
normalized_data = np.round(normalized_data,4)

In [None]:
def convert_numeric_to_text(row):
    return " ".join([f"feature_{i}: {val}" for i, val in enumerate(row)])

text_data = [convert_numeric_to_text(sample) for sample in normalized_data]
print(f"Sample text representation: {text_data[0]}")

Sample text representation: feature_0: -0.533 feature_1: -0.684 feature_2: -0.734 feature_3: -0.753 feature_4: 0.661 feature_5: -0.829


In [None]:
few_shot_size = 500
few_shot_indices = np.random.choice(len(text_data), few_shot_size, replace=False)
few_shot_texts = [text_data[i] for i in few_shot_indices]
few_shot_labels = [labels[i] for i in few_shot_indices]

train_texts, test_texts, train_labels, test_labels = train_test_split(
    few_shot_texts, few_shot_labels, test_size=0.2, random_state=42, shuffle=True
)
train_texts, val_texts, train_labels, val_labels = train_test_split(
    train_texts, train_labels, test_size=0.1, random_state=42, shuffle=True
)

In [None]:
class CustomDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_length=128):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = self.texts[idx]
        label = self.labels[idx]
        encoding = self.tokenizer.encode_plus(
            text,
            add_special_tokens=True,
            max_length=self.max_length,
            return_token_type_ids=False,
            padding='max_length',
            truncation=True,
            return_attention_mask=True,
            return_tensors='pt',
        )
        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'labels': torch.tensor(label, dtype=torch.long)
        }

## TinyBert

In [None]:
def objective(trial):
    print(f"\n{'='*80}")
    print(f"Starting trial {trial.number}")
    print(f"{'='*80}")

    model_name = trial.suggest_categorical("model_name", ["huawei-noah/TinyBERT_General_4L_312D"])
    learning_rate = trial.suggest_float("learning_rate", 1e-5, 5e-2, log=True)
    batch_size = trial.suggest_categorical("batch_size", [4,8,16, 32, 64])
    num_epochs = trial.suggest_int("num_epochs", 4, 10)
    weight_decay = trial.suggest_float("weight_decay", 1e-4, 1e-3, log=True)
    # dropout = trial.suggest_float("dropout", 0.1, 0.5)

    print(f"Trial {trial.number} hyperparameters:")
    print(f"  Model: {model_name}")
    print(f"  Learning rate: {learning_rate}")
    print(f"  Batch size: {batch_size}")
    print(f"  Epochs: {num_epochs}")
    print(f"  Weight decay: {weight_decay}")
    # print(f"  Dropout: {dropout}")

    print(f"Initializing tokenizer and model: {model_name}")
    if model_name == "albert-base-v2":
        tokenizer = AlbertTokenizer.from_pretrained(model_name)
        model = AlbertForSequenceClassification.from_pretrained(
            model_name,
            num_labels=2
            # hidden_dropout_prob=dropout,
            # attention_probs_dropout_prob=dropout
        )
    else:
        tokenizer = AutoTokenizer.from_pretrained(model_name)
        model = AutoModelForSequenceClassification.from_pretrained(
            model_name,
            num_labels=2
        )

    train_dataset = CustomDataset(train_texts, train_labels, tokenizer)
    val_dataset = CustomDataset(val_texts, val_labels, tokenizer)
    test_dataset = CustomDataset(test_texts, test_labels, tokenizer)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"Using device: {device}")
    model.to(device)

    optimizer = AdamW(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
    loss_fn = torch.nn.CrossEntropyLoss()

    def train_epoch(model, data_loader, loss_fn, optimizer, device, epoch):
        model = model.train()
        losses = []
        correct_predictions = 0
        total_samples = 0

        print(f"  Training epoch {epoch}...")

        for batch in tqdm(data_loader, desc='Training Progress'):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)

            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            _, preds = torch.max(outputs.logits, dim=1)
            loss = loss_fn(outputs.logits, labels)

            correct_predictions += torch.sum(preds == labels).item()
            total_samples += len(labels)
            losses.append(loss.item())

            loss.backward()
            optimizer.step()
            optimizer.zero_grad()

        epoch_acc = correct_predictions / total_samples
        epoch_loss = np.mean(losses)
        print(f"  Training - Loss: {epoch_loss:.4f}, Accuracy: {epoch_acc:.4f}")

        return epoch_acc, epoch_loss

    def eval_model(model, data_loader, loss_fn, device, phase="Validation"):
        model = model.eval()
        losses = []
        correct_predictions = 0
        total_samples = 0

        print(f"  Running {phase} evaluation...")

        with torch.no_grad():
            for batch in tqdm(data_loader, desc='Evaluation Progress'):
                input_ids = batch['input_ids'].to(device)
                attention_mask = batch['attention_mask'].to(device)
                labels = batch['labels'].to(device)

                outputs = model(input_ids=input_ids, attention_mask=attention_mask)
                _, preds = torch.max(outputs.logits, dim=1)
                loss = loss_fn(outputs.logits, labels)

                correct_predictions += torch.sum(preds == labels).item()
                total_samples += len(labels)
                losses.append(loss.item())


        eval_acc = correct_predictions / total_samples
        eval_loss = np.mean(losses)

        print(f"  {phase} - Loss: {eval_loss:.4f}, Accuracy: {eval_acc:.4f}")

        return eval_acc, eval_loss

    best_val_acc = 0

    for epoch in range(num_epochs):
        print(f"\nEpoch {epoch + 1}/{num_epochs} - Trial {trial.number}")
        print("-" * 60)

        train_acc, train_loss = train_epoch(model, train_loader, loss_fn, optimizer, device, epoch + 1)
        val_acc, val_loss = eval_model(model, val_loader, loss_fn, device)
        test_acc, test_loss = eval_model(model, test_loader, loss_fn, device)

        print(f"Epoch {epoch + 1} Summary:")
        print(f"  Train Loss: {train_loss:.4f}, Train Accuracy: {train_acc:.4f}")
        print(f"  Val Loss: {val_loss:.4f}, Val Accuracy: {val_acc:.4f}")
        print(f"Test Accuracy: {test_acc:.4f}")

        trial.report(val_acc, epoch)

        if trial.should_prune():
            print(f"Trial {trial.number} pruned at epoch {epoch + 1}")
            raise optuna.exceptions.TrialPruned()

        if val_acc > best_val_acc:
            best_val_acc = val_acc
            print(f"  New best validation accuracy: {best_val_acc:.4f}")

    print(f"\nTrial {trial.number} completed with best validation accuracy: {best_val_acc:.4f}")
    return best_val_acc

print("\nStarting Optuna hyperparameter optimization...")
study = optuna.create_study(direction="maximize", pruner=optuna.pruners.MedianPruner())
study.optimize(objective, n_trials=20) 

print("Hyperparameter optimization completed!")
print("Best trial:")
trial = study.best_trial
print(f"  Value (Validation Accuracy): {trial.value:.4f}")
print("  Params: ")
for key, value in trial.params.items():
    print(f"    {key}: {value}")


## DV3

In [None]:
def objective(trial):
    print(f"\n{'='*80}")
    print(f"Starting trial {trial.number}")
    print(f"{'='*80}")

    model_name = trial.suggest_categorical("model_name", ["microsoft/deberta-v3-small"])
    learning_rate = trial.suggest_float("learning_rate", 1e-5, 5e-4, log=True)
    batch_size = trial.suggest_categorical("batch_size", [8,16, 32, 64])
    num_epochs = trial.suggest_int("num_epochs", 3, 7)
    weight_decay = trial.suggest_float("weight_decay", 1e-5, 1e-3, log=True)
    # dropout = trial.suggest_float("dropout", 0.1, 0.5)

    print(f"Trial {trial.number} hyperparameters:")
    print(f"  Model: {model_name}")
    print(f"  Learning rate: {learning_rate}")
    print(f"  Batch size: {batch_size}")
    print(f"  Epochs: {num_epochs}")
    print(f"  Weight decay: {weight_decay}")
    # print(f"  Dropout: {dropout}")

    print(f"Initializing tokenizer and model: {model_name}")
    if model_name == "albert-base-v2":
        tokenizer = AlbertTokenizer.from_pretrained(model_name)
        model = AlbertForSequenceClassification.from_pretrained(
            model_name,
            num_labels=2
            # hidden_dropout_prob=dropout,
            # attention_probs_dropout_prob=dropout
        )
    elif model_name == "microsoft/deberta-v3-small":
        tokenizer = DebertaV2Tokenizer.from_pretrained(model_name)
        model = DebertaV2ForSequenceClassification.from_pretrained(model_name, num_labels=2)
    else:
        tokenizer = AutoTokenizer.from_pretrained(model_name)
        model = AutoModelForSequenceClassification.from_pretrained(
            model_name,
            num_labels=2
        )

    train_dataset = CustomDataset(train_texts, train_labels, tokenizer)
    val_dataset = CustomDataset(val_texts, val_labels, tokenizer)
    test_dataset = CustomDataset(test_texts, test_labels, tokenizer)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"Using device: {device}")
    model.to(device)

    optimizer = AdamW(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
    loss_fn = torch.nn.CrossEntropyLoss()

    def train_epoch(model, data_loader, loss_fn, optimizer, device, epoch):
        model = model.train()
        losses = []
        correct_predictions = 0
        total_samples = 0

        print(f"  Training epoch {epoch}...")

        for batch in tqdm(data_loader, desc='Training Progress'):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)

            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            _, preds = torch.max(outputs.logits, dim=1)
            loss = loss_fn(outputs.logits, labels)

            correct_predictions += torch.sum(preds == labels).item()
            total_samples += len(labels)
            losses.append(loss.item())

            loss.backward()
            optimizer.step()
            optimizer.zero_grad()

        epoch_acc = correct_predictions / total_samples
        epoch_loss = np.mean(losses)
        print(f"  Training - Loss: {epoch_loss:.4f}, Accuracy: {epoch_acc:.4f}")

        return epoch_acc, epoch_loss

    def eval_model(model, data_loader, loss_fn, device, phase="Validation"):
        model = model.eval()
        losses = []
        correct_predictions = 0
        total_samples = 0


        with torch.no_grad():
            for batch in tqdm(data_loader, desc='Evaluation Progress'):
                input_ids = batch['input_ids'].to(device)
                attention_mask = batch['attention_mask'].to(device)
                labels = batch['labels'].to(device)

                outputs = model(input_ids=input_ids, attention_mask=attention_mask)
                _, preds = torch.max(outputs.logits, dim=1)
                loss = loss_fn(outputs.logits, labels)

                correct_predictions += torch.sum(preds == labels).item()
                total_samples += len(labels)
                losses.append(loss.item())


        eval_acc = correct_predictions / total_samples
        eval_loss = np.mean(losses)

        print(f"  {phase} - Loss: {eval_loss:.4f}, Accuracy: {eval_acc:.4f}")

        return eval_acc, eval_loss

    best_val_acc = 0

    for epoch in range(num_epochs):
        print(f"\nEpoch {epoch + 1}/{num_epochs} - Trial {trial.number}")
        print("-" * 60)

        train_acc, train_loss = train_epoch(model, train_loader, loss_fn, optimizer, device, epoch + 1)
        val_acc, val_loss = eval_model(model, val_loader, loss_fn, device)
        test_acc, test_loss = eval_model(model, test_loader, loss_fn, device)

        print(f"Epoch {epoch + 1} Summary:")
        print(f"  Train Loss: {train_loss:.4f}, Train Accuracy: {train_acc:.4f}")
        print(f"  Val Loss: {val_loss:.4f}, Val Accuracy: {val_acc:.4f}")
        print(f"Test Accuracy: {test_acc:.4f}")

        trial.report(val_acc, epoch)

        if trial.should_prune():
            print(f"Trial {trial.number} pruned at epoch {epoch + 1}")
            raise optuna.exceptions.TrialPruned()

        if val_acc > best_val_acc:
            best_val_acc = val_acc
            print(f"  New best validation accuracy: {best_val_acc:.4f}")

    print(f"\nTrial {trial.number} completed with best validation accuracy: {best_val_acc:.4f}")
    return best_val_acc

print("\nStarting Optuna hyperparameter optimization...")
study = optuna.create_study(direction="maximize", pruner=optuna.pruners.MedianPruner())
study.optimize(objective, n_trials=20)  

print("Hyperparameter optimization completed!")
print("Best trial:")
trial = study.best_trial
print(f"  Value (Validation Accuracy): {trial.value:.4f}")
print("  Params: ")
for key, value in trial.params.items():
    print(f"    {key}: {value}")


## Albert

In [None]:
def objective(trial):
    print(f"\n{'='*80}")
    print(f"Starting trial {trial.number}")
    print(f"{'='*80}")

    model_name = trial.suggest_categorical("model_name", ["albert-base-v2"])
    learning_rate = trial.suggest_float("learning_rate", 1e-5, 5e-2, log=True)
    batch_size = trial.suggest_categorical("batch_size", [4,8,16, 32, 64])
    num_epochs = trial.suggest_int("num_epochs", 3, 10)
    weight_decay = trial.suggest_float("weight_decay", 1e-4, 1e-3, log=True)
    # dropout = trial.suggest_float("dropout", 0.1, 0.5)

    print(f"Trial {trial.number} hyperparameters:")
    print(f"  Model: {model_name}")
    print(f"  Learning rate: {learning_rate}")
    print(f"  Batch size: {batch_size}")
    print(f"  Epochs: {num_epochs}")
    print(f"  Weight decay: {weight_decay}")
    # print(f"  Dropout: {dropout}")

    print(f"Initializing tokenizer and model: {model_name}")
    if model_name == "albert-base-v2":
        tokenizer = AlbertTokenizer.from_pretrained(model_name)
        model = AlbertForSequenceClassification.from_pretrained(model_name, num_labels=2)
    elif model_name == "microsoft/deberta-v3-small":
        tokenizer = DebertaV2Tokenizer.from_pretrained(model_name)
        model = DebertaV2ForSequenceClassification.from_pretrained(model_name, num_labels=2)
    else:
        tokenizer = AutoTokenizer.from_pretrained(model_name)
        model = AutoModelForSequenceClassification.from_pretrained(
            model_name,
            num_labels=2
        )

    train_dataset = CustomDataset(train_texts, train_labels, tokenizer)
    val_dataset = CustomDataset(val_texts, val_labels, tokenizer)
    test_dataset = CustomDataset(test_texts, test_labels, tokenizer)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"Using device: {device}")
    model.to(device)

    optimizer = AdamW(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
    loss_fn = torch.nn.CrossEntropyLoss()

    def train_epoch(model, data_loader, loss_fn, optimizer, device, epoch):
        model = model.train()
        losses = []
        correct_predictions = 0
        total_samples = 0

        print(f"  Training epoch {epoch}...")

        for batch in tqdm(data_loader, desc='Training Progress'):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)

            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            _, preds = torch.max(outputs.logits, dim=1)
            loss = loss_fn(outputs.logits, labels)

            correct_predictions += torch.sum(preds == labels).item()
            total_samples += len(labels)
            losses.append(loss.item())

            loss.backward()
            optimizer.step()
            optimizer.zero_grad()

        epoch_acc = correct_predictions / total_samples
        epoch_loss = np.mean(losses)
        print(f"  Training - Loss: {epoch_loss:.4f}, Accuracy: {epoch_acc:.4f}")

        return epoch_acc, epoch_loss

    def eval_model(model, data_loader, loss_fn, device, phase="Validation"):
        model = model.eval()
        losses = []
        correct_predictions = 0
        total_samples = 0


        with torch.no_grad():
            for batch in tqdm(data_loader, desc='Evaluation Progress'):
                input_ids = batch['input_ids'].to(device)
                attention_mask = batch['attention_mask'].to(device)
                labels = batch['labels'].to(device)

                outputs = model(input_ids=input_ids, attention_mask=attention_mask)
                _, preds = torch.max(outputs.logits, dim=1)
                loss = loss_fn(outputs.logits, labels)

                correct_predictions += torch.sum(preds == labels).item()
                total_samples += len(labels)
                losses.append(loss.item())


        eval_acc = correct_predictions / total_samples
        eval_loss = np.mean(losses)

        print(f"  {phase} - Loss: {eval_loss:.4f}, Accuracy: {eval_acc:.4f}")

        return eval_acc, eval_loss

    best_val_acc = 0

    for epoch in range(num_epochs):
        print(f"\nEpoch {epoch + 1}/{num_epochs} - Trial {trial.number}")
        print("-" * 60)

        train_acc, train_loss = train_epoch(model, train_loader, loss_fn, optimizer, device, epoch + 1)
        val_acc, val_loss = eval_model(model, val_loader, loss_fn, device)
        test_acc, test_loss = eval_model(model, test_loader, loss_fn, device)

        print(f"Epoch {epoch + 1} Summary:")
        print(f"  Train Loss: {train_loss:.4f}, Train Accuracy: {train_acc:.4f}")
        print(f"  Val Loss: {val_loss:.4f}, Val Accuracy: {val_acc:.4f}")
        print(f"Test Accuracy: {test_acc:.4f}")

        trial.report(val_acc, epoch)

        if trial.should_prune():
            print(f"Trial {trial.number} pruned at epoch {epoch + 1}")
            raise optuna.exceptions.TrialPruned()

        if val_acc > best_val_acc:
            best_val_acc = val_acc
            print(f"  New best validation accuracy: {best_val_acc:.4f}")

    print(f"\nTrial {trial.number} completed with best validation accuracy: {best_val_acc:.4f}")
    return best_val_acc

print("\nStarting Optuna hyperparameter optimization...")
study = optuna.create_study(direction="maximize", pruner=optuna.pruners.MedianPruner())
study.optimize(objective, n_trials=20)  # Adjust n_trials as needed

print("Hyperparameter optimization completed!")
print("Best trial:")
trial = study.best_trial
print(f"  Value (Validation Accuracy): {trial.value:.4f}")
print("  Params: ")
for key, value in trial.params.items():
    print(f"    {key}: {value}")


## Distilbert

In [None]:
def objective(trial):
    print(f"\n{'='*80}")
    print(f"Starting trial {trial.number}")
    print(f"{'='*80}")

    model_name = trial.suggest_categorical("model_name", ["distilbert-base-uncased"])
    learning_rate = trial.suggest_float("learning_rate", 1e-5, 5e-4, log=True)
    batch_size = trial.suggest_categorical("batch_size", [8,16, 32, 64])
    num_epochs = trial.suggest_int("num_epochs", 3, 7)
    weight_decay = trial.suggest_float("weight_decay", 1e-5, 1e-3, log=True)
    # dropout = trial.suggest_float("dropout", 0.1, 0.5)

    print(f"Trial {trial.number} hyperparameters:")
    print(f"  Model: {model_name}")
    print(f"  Learning rate: {learning_rate}")
    print(f"  Batch size: {batch_size}")
    print(f"  Epochs: {num_epochs}")
    print(f"  Weight decay: {weight_decay}")
    # print(f"  Dropout: {dropout}")

    print(f"Initializing tokenizer and model: {model_name}")
    if model_name == "distilbert-base-uncased":
        tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
        model = DistilBertForSequenceClassification.from_pretrained('distilbert-base-uncased', num_labels=2)
    elif model_name == "microsoft/deberta-v3-small":
        tokenizer = DebertaV2Tokenizer.from_pretrained(model_name)
        model = DebertaV2ForSequenceClassification.from_pretrained(model_name, num_labels=2)
    else:
        tokenizer = AutoTokenizer.from_pretrained(model_name)
        model = AutoModelForSequenceClassification.from_pretrained(
            model_name,
            num_labels=2
        )

    train_dataset = CustomDataset(train_texts, train_labels, tokenizer)
    val_dataset = CustomDataset(val_texts, val_labels, tokenizer)
    test_dataset = CustomDataset(test_texts, test_labels, tokenizer)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"Using device: {device}")
    model.to(device)

    optimizer = AdamW(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
    loss_fn = torch.nn.CrossEntropyLoss()

    def train_epoch(model, data_loader, loss_fn, optimizer, device, epoch):
        model = model.train()
        losses = []
        correct_predictions = 0
        total_samples = 0

        print(f"  Training epoch {epoch}...")

        for batch in tqdm(data_loader, desc='Training Progress'):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)

            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            _, preds = torch.max(outputs.logits, dim=1)
            loss = loss_fn(outputs.logits, labels)

            correct_predictions += torch.sum(preds == labels).item()
            total_samples += len(labels)
            losses.append(loss.item())

            loss.backward()
            optimizer.step()
            optimizer.zero_grad()

        epoch_acc = correct_predictions / total_samples
        epoch_loss = np.mean(losses)
        print(f"  Training - Loss: {epoch_loss:.4f}, Accuracy: {epoch_acc:.4f}")

        return epoch_acc, epoch_loss

    def eval_model(model, data_loader, loss_fn, device, phase="Validation"):
        model = model.eval()
        losses = []
        correct_predictions = 0
        total_samples = 0


        with torch.no_grad():
            for batch in tqdm(data_loader, desc='Evaluation Progress'):
                input_ids = batch['input_ids'].to(device)
                attention_mask = batch['attention_mask'].to(device)
                labels = batch['labels'].to(device)

                outputs = model(input_ids=input_ids, attention_mask=attention_mask)
                _, preds = torch.max(outputs.logits, dim=1)
                loss = loss_fn(outputs.logits, labels)

                correct_predictions += torch.sum(preds == labels).item()
                total_samples += len(labels)
                losses.append(loss.item())


        eval_acc = correct_predictions / total_samples
        eval_loss = np.mean(losses)

        print(f"  {phase} - Loss: {eval_loss:.4f}, Accuracy: {eval_acc:.4f}")

        return eval_acc, eval_loss

    best_val_acc = 0

    for epoch in range(num_epochs):
        print(f"\nEpoch {epoch + 1}/{num_epochs} - Trial {trial.number}")
        print("-" * 60)

        train_acc, train_loss = train_epoch(model, train_loader, loss_fn, optimizer, device, epoch + 1)
        val_acc, val_loss = eval_model(model, val_loader, loss_fn, device)
        test_acc, test_loss = eval_model(model, test_loader, loss_fn, device)

        print(f"Epoch {epoch + 1} Summary:")
        print(f"  Train Loss: {train_loss:.4f}, Train Accuracy: {train_acc:.4f}")
        print(f"  Val Loss: {val_loss:.4f}, Val Accuracy: {val_acc:.4f}")
        print(f"Test Accuracy: {test_acc:.4f}")

        trial.report(val_acc, epoch)

        if trial.should_prune():
            print(f"Trial {trial.number} pruned at epoch {epoch + 1}")
            raise optuna.exceptions.TrialPruned()

        if val_acc > best_val_acc:
            best_val_acc = val_acc
            print(f"  New best validation accuracy: {best_val_acc:.4f}")

    print(f"\nTrial {trial.number} completed with best validation accuracy: {best_val_acc:.4f}")
    return best_val_acc

print("\nStarting Optuna hyperparameter optimization...")
study = optuna.create_study(direction="maximize", pruner=optuna.pruners.MedianPruner())
study.optimize(objective, n_trials=20)  

print("Hyperparameter optimization completed!")
print("Best trial:")
trial = study.best_trial
print(f"  Value (Validation Accuracy): {trial.value:.4f}")
print("  Params: ")
for key, value in trial.params.items():
    print(f"    {key}: {value}")


## Mobilebert

In [None]:
def objective(trial):
    print(f"\n{'='*80}")
    print(f"Starting trial {trial.number}")
    print(f"{'='*80}")

    model_name = trial.suggest_categorical("model_name", ["google/mobilebert-uncased"])
    learning_rate = trial.suggest_float("learning_rate", 5e-6, 1e-5, log=True)
    batch_size = trial.suggest_categorical("batch_size", [4,8,16, 32, 64])
    num_epochs = trial.suggest_int("num_epochs", 3, 10)
    weight_decay = trial.suggest_float("weight_decay", 1e-5, 1e-4, log=True)
    hidden_dropout = trial.suggest_float("hidden_dropout", 0.1, 0.5)
    attention_dropout = trial.suggest_float("attention_dropout", 0.1, 0.5)

    print(f"Trial {trial.number} hyperparameters:")
    print(f"  Model: {model_name}")
    print(f"  Learning rate: {learning_rate}")
    print(f"  Batch size: {batch_size}")
    print(f"  Epochs: {num_epochs}")
    print(f"  Weight decay: {weight_decay}")
    print(f"  Hidden dropout: {hidden_dropout}")
    print(f"  Attention dropout: {attention_dropout}")

    print(f"Initializing tokenizer and model: {model_name}")
    if model_name == 'google/mobilebert-uncased':
        tokenizer = MobileBertTokenizer.from_pretrained(model_name)
        model = MobileBertForSequenceClassification.from_pretrained(model_name, num_labels=2
                                                                    ,hidden_dropout_prob=hidden_dropout
                                                                    ,attention_probs_dropout_prob = attention_dropout
                                                                    )
    elif model_name == "microsoft/deberta-v3-small":
        tokenizer = DebertaV2Tokenizer.from_pretrained(model_name)
        model = DebertaV2ForSequenceClassification.from_pretrained(model_name, num_labels=2)
    else:
        tokenizer = AutoTokenizer.from_pretrained(model_name)
        model = AutoModelForSequenceClassification.from_pretrained(
            model_name,
            num_labels=2
        )

    train_dataset = CustomDataset(train_texts, train_labels, tokenizer)
    val_dataset = CustomDataset(val_texts, val_labels, tokenizer)
    test_dataset = CustomDataset(test_texts, test_labels, tokenizer)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"Using device: {device}")
    model.to(device)

    optimizer = AdamW(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
    loss_fn = torch.nn.CrossEntropyLoss()

    def train_epoch(model, data_loader, loss_fn, optimizer, device, epoch):
        model = model.train()
        losses = []
        correct_predictions = 0
        total_samples = 0

        print(f"  Training epoch {epoch}...")

        for batch in tqdm(data_loader, desc='Training Progress'):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)

            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            _, preds = torch.max(outputs.logits, dim=1)
            loss = loss_fn(outputs.logits, labels)

            correct_predictions += torch.sum(preds == labels).item()
            total_samples += len(labels)
            losses.append(loss.item())

            loss.backward()
            optimizer.step()
            optimizer.zero_grad()

        epoch_acc = correct_predictions / total_samples
        epoch_loss = np.mean(losses)
        print(f"  Training - Loss: {epoch_loss:.4f}, Accuracy: {epoch_acc:.4f}")

        return epoch_acc, epoch_loss

    def eval_model(model, data_loader, loss_fn, device, phase="Validation"):
        model = model.eval()
        losses = []
        correct_predictions = 0
        total_samples = 0


        with torch.no_grad():
            for batch in tqdm(data_loader, desc='Evaluation Progress'):
                input_ids = batch['input_ids'].to(device)
                attention_mask = batch['attention_mask'].to(device)
                labels = batch['labels'].to(device)

                outputs = model(input_ids=input_ids, attention_mask=attention_mask)
                _, preds = torch.max(outputs.logits, dim=1)
                loss = loss_fn(outputs.logits, labels)

                correct_predictions += torch.sum(preds == labels).item()
                total_samples += len(labels)
                losses.append(loss.item())


        eval_acc = correct_predictions / total_samples
        eval_loss = np.mean(losses)

        print(f"  {phase} - Loss: {eval_loss:.4f}, Accuracy: {eval_acc:.4f}")

        return eval_acc, eval_loss

    best_val_acc = 0

    for epoch in range(num_epochs):
        print(f"\nEpoch {epoch + 1}/{num_epochs} - Trial {trial.number}")
        print("-" * 60)

        train_acc, train_loss = train_epoch(model, train_loader, loss_fn, optimizer, device, epoch + 1)
        val_acc, val_loss = eval_model(model, val_loader, loss_fn, device)
        test_acc, test_loss = eval_model(model, test_loader, loss_fn, device)

        print(f"Epoch {epoch + 1} Summary:")
        print(f"  Train Loss: {train_loss:.4f}, Train Accuracy: {train_acc:.4f}")
        print(f"  Val Loss: {val_loss:.4f}, Val Accuracy: {val_acc:.4f}")
        print(f"Test Accuracy: {test_acc:.4f}")

        trial.report(val_acc, epoch)

        if trial.should_prune():
            print(f"Trial {trial.number} pruned at epoch {epoch + 1}")
            raise optuna.exceptions.TrialPruned()

        if val_acc > best_val_acc:
            best_val_acc = val_acc
            print(f"  New best validation accuracy: {best_val_acc:.4f}")

    print(f"\nTrial {trial.number} completed with best validation accuracy: {best_val_acc:.4f}")
    return best_val_acc

print("\nStarting Optuna hyperparameter optimization...")
study = optuna.create_study(direction="maximize", pruner=optuna.pruners.MedianPruner())
study.optimize(objective, n_trials=20)  # Adjust n_trials as needed

print("Hyperparameter optimization completed!")
print("Best trial:")
trial = study.best_trial
print(f"  Value (Validation Accuracy): {trial.value:.4f}")
print("  Params: ")
for key, value in trial.params.items():
    print(f"    {key}: {value}")


## Electra

In [None]:
def objective(trial):
    print(f"\n{'='*80}")
    print(f"Starting trial {trial.number}")
    print(f"{'='*80}")

    model_name = trial.suggest_categorical("model_name", ["google/electra-small-discriminator"])
    learning_rate = trial.suggest_float("learning_rate", 1e-6, 5e-5, log=True)
    batch_size = trial.suggest_categorical("batch_size", [8,16, 32, 64])
    num_epochs = trial.suggest_int("num_epochs", 3, 10)
    weight_decay = trial.suggest_float("weight_decay", 1e-6, 1e-4, log=True)
    # dropout = trial.suggest_float("dropout", 0.1, 0.5)

    print(f"Trial {trial.number} hyperparameters:")
    print(f"  Model: {model_name}")
    print(f"  Learning rate: {learning_rate}")
    print(f"  Batch size: {batch_size}")
    print(f"  Epochs: {num_epochs}")
    print(f"  Weight decay: {weight_decay}")
    # print(f"  Dropout: {dropout}")

    print(f"Initializing tokenizer and model: {model_name}")
    if model_name == "albert-base-v2":
        tokenizer = AlbertTokenizer.from_pretrained(model_name)
        model = AlbertForSequenceClassification.from_pretrained(
            model_name,
            num_labels=2
            # hidden_dropout_prob=dropout,
            # attention_probs_dropout_prob=dropout
        )
    elif model_name == "google/electra-small-discriminator":
        tokenizer = ElectraTokenizer.from_pretrained('google/electra-small-discriminator')
        model = ElectraForSequenceClassification.from_pretrained('google/electra-small-discriminator', num_labels=2)
    else:
        tokenizer = AutoTokenizer.from_pretrained(model_name)
        model = AutoModelForSequenceClassification.from_pretrained(
            model_name,
            num_labels=2
        )

    train_dataset = CustomDataset(train_texts, train_labels, tokenizer)
    val_dataset = CustomDataset(val_texts, val_labels, tokenizer)
    test_dataset = CustomDataset(test_texts, test_labels, tokenizer)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"Using device: {device}")
    model.to(device)

    optimizer = AdamW(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
    loss_fn = torch.nn.CrossEntropyLoss()

    def train_epoch(model, data_loader, loss_fn, optimizer, device, epoch):
        model = model.train()
        losses = []
        correct_predictions = 0
        total_samples = 0

        print(f"  Training epoch {epoch}...")

        for batch in tqdm(data_loader, desc='Training Progress'):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)

            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            _, preds = torch.max(outputs.logits, dim=1)
            loss = loss_fn(outputs.logits, labels)

            correct_predictions += torch.sum(preds == labels).item()
            total_samples += len(labels)
            losses.append(loss.item())

            loss.backward()
            optimizer.step()
            optimizer.zero_grad()

        epoch_acc = correct_predictions / total_samples
        epoch_loss = np.mean(losses)
        print(f"  Training - Loss: {epoch_loss:.4f}, Accuracy: {epoch_acc:.4f}")

        return epoch_acc, epoch_loss

    def eval_model(model, data_loader, loss_fn, device, phase="Validation"):
        model = model.eval()
        losses = []
        correct_predictions = 0
        total_samples = 0


        with torch.no_grad():
            for batch in tqdm(data_loader, desc='Evaluation Progress'):
                input_ids = batch['input_ids'].to(device)
                attention_mask = batch['attention_mask'].to(device)
                labels = batch['labels'].to(device)

                outputs = model(input_ids=input_ids, attention_mask=attention_mask)
                _, preds = torch.max(outputs.logits, dim=1)
                loss = loss_fn(outputs.logits, labels)

                correct_predictions += torch.sum(preds == labels).item()
                total_samples += len(labels)
                losses.append(loss.item())


        eval_acc = correct_predictions / total_samples
        eval_loss = np.mean(losses)

        print(f"  {phase} - Loss: {eval_loss:.4f}, Accuracy: {eval_acc:.4f}")

        return eval_acc, eval_loss

    best_val_acc = 0


    for epoch in range(num_epochs):
        print(f"\nEpoch {epoch + 1}/{num_epochs} - Trial {trial.number}")
        print("-" * 60)

        train_acc, train_loss = train_epoch(model, train_loader, loss_fn, optimizer, device, epoch + 1)
        val_acc, val_loss = eval_model(model, val_loader, loss_fn, device)
        test_acc, test_loss = eval_model(model, test_loader, loss_fn, device)

        print(f"Epoch {epoch + 1} Summary:")
        print(f"  Train Loss: {train_loss:.4f}, Train Accuracy: {train_acc:.4f}")
        print(f"  Val Loss: {val_loss:.4f}, Val Accuracy: {val_acc:.4f}")
        print(f"Test Accuracy: {test_acc:.4f}")

        trial.report(val_acc, epoch)

        if trial.should_prune():
            print(f"Trial {trial.number} pruned at epoch {epoch + 1}")
            raise optuna.exceptions.TrialPruned()

        if val_acc > best_val_acc:
            best_val_acc = val_acc
            best_test_acc = test_acc
            print(f"  New best validation accuracy: {best_val_acc:.4f}")

    print(f"\nTrial {trial.number} completed with best validation accuracy: {best_val_acc:.4f}")
    return best_val_acc

print("\nStarting Optuna hyperparameter optimization...")
study = optuna.create_study(direction="maximize", pruner=optuna.pruners.MedianPruner())
study.optimize(objective, n_trials=20)  
# best_test_acc = 0
print("Hyperparameter optimization completed!")
print("Best trial:")
trial = study.best_trial
print(f"  Value (Validation Accuracy): {trial.value:.4f}")
# print(f"  Test Accuracy: {best_test_acc:.4f}")
print("  Params: ")
for key, value in trial.params.items():
    print(f"    {key}: {value}")
