In [2]:
%%capture
!pip install deepchem transformers peft

In [3]:
import warnings
import logging

warnings.filterwarnings("ignore", category=DeprecationWarning)
logging.getLogger("deepchem").setLevel(logging.ERROR)

In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from torch.utils.data import Dataset, DataLoader, random_split
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
import os
import deepchem as dc
from peft import get_peft_model, LoraConfig, TaskType, AdaLoraConfig
import time
from sklearn.metrics import accuracy_score
from rdkit import Chem
import pandas as pd
import matplotlib.pyplot as plt

Instructions for updating:
experimental_relax_shapes is deprecated, use reduce_retracing instead


In [1]:
import wandb
wandb.login()

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: W&B API key is configured. Use [1m`wandb login --relogin`[0m to force relogin


True

In [5]:
# Define paths and parameters
MODEL_NAME = "seyonec/ChemBERTa-zinc-base-v1"
MAX_LENGTH = 128
BATCH_SIZE = 32
EPOCHS = 5
LEARNING_RATE = 2e-5

# LoRA Configuration
LORA_R = 8  # Rank of LoRA
LORA_ALPHA = 16
LORA_DROPOUT = 0.1

In [6]:
class ClinToxDataset(Dataset):
    def __init__(self, data_path, tokenizer, split='train', max_length=128):
        """
        Custom PyTorch Dataset for the ClinTox dataset.
        """
        self.tokenizer = tokenizer
        self.max_length = max_length
        self.split = split

        # Load ClinTox dataset from DeepChem
        tasks, datasets, transformers = dc.molnet.load_clintox()
        train_dataset, valid_dataset, test_dataset = datasets

        # Convert to SMILES and labels
        self.smiles_train, self.labels_train = self.remove_invalid_smiles(train_dataset.ids, train_dataset.y)
        self.smiles_valid, self.labels_valid = self.remove_invalid_smiles(valid_dataset.ids, valid_dataset.y)
        self.smiles_test, self.labels_test = self.remove_invalid_smiles(test_dataset.ids, test_dataset.y)

        # Set active data split based on input parameter
        if split == 'train':
            self.smiles = self.smiles_train
            self.labels = self.labels_train
        elif split == 'valid':
            self.smiles = self.smiles_valid
            self.labels = self.labels_valid
        elif split == 'test':
            self.smiles = self.smiles_test
            self.labels = self.labels_test
        else:
            raise ValueError("Invalid split. Use 'train', 'valid', or 'test'.")

    def __len__(self):
        return len(self.smiles)

    def __getitem__(self, idx):
        smiles = self.smiles[idx]
        label = self.labels[idx]

        encoding = self.tokenizer(
            smiles,
            return_tensors="pt",
            max_length=self.max_length,
            padding="max_length",
            truncation=True
        )

        return {
            'input_ids': encoding['input_ids'].squeeze(),
            'attention_mask': encoding['attention_mask'].squeeze(),
            'labels': torch.tensor(label, dtype=torch.float)
        }

    def remove_invalid_smiles(self, smiles, labels):
        """
        Filters out invalid SMILES strings using RDKit validation.
        """
        valid_indices = []
        for i, smile in enumerate(smiles):
            try:
                mol = Chem.MolFromSmiles(smile)
                if mol is not None:
                    valid_indices.append(i)
            except:
                pass

        return smiles[valid_indices], labels[valid_indices]

In [7]:
def evaluate_model(model, dataloader, device):
    """
    Evaluate the model on the validation or test set.
    """
    model.eval()
    total_loss = 0
    all_labels = []
    all_preds = []
    criterion = nn.BCEWithLogitsLoss()

    with torch.no_grad():
        for batch in tqdm(dataloader, desc="Evaluating"):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)

            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            logits = outputs.logits.squeeze(-1)

            loss = criterion(logits, labels)
            total_loss += loss.item()

            probs = torch.sigmoid(logits)
            all_preds.extend(probs.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    # Convert to numpy arrays
    all_preds = np.array(all_preds)
    all_labels = np.array(all_labels)

    # Binary predictions for accuracy
    bin_preds = (all_preds > 0.5).astype(int)
    accuracy = accuracy_score(all_labels, bin_preds)

    avg_loss = total_loss / len(dataloader)
    return avg_loss, accuracy

In [8]:
def print_trainable_parameters(model):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    all_params = sum(p.numel() for p in model.parameters())
    percentage_trained = round(100*trainable_params/all_params, 2)
    print(f"Trainable: {trainable_params} | All: {all_params} | % Trained: {percentage_trained}")
    return trainable_params, all_params, percentage_trained

In [19]:
def train_and_profile(model, tokenizer, optimization_name, lr=LEARNING_RATE):
    """
    Training Loop with Profiling
    """

    print(f"\n--- {optimization_name} - Trainable Parameters ---")
    trainable_params, all_params, percentage_trained = print_trainable_parameters(model)

    run = wandb.init(
        project="testing",
        entity="hpml-proj-deepchem",
        name=f"{optimization_name}_lr{lr}",
        config={
            "model_name": MODEL_NAME,
            "optimization": optimization_name,
            "learning_rate": lr,
            "batch_size": BATCH_SIZE,
            "epochs": EPOCHS,
            "max_length": MAX_LENGTH,
            "lora_r": LORA_R,
            "lora_alpha": LORA_ALPHA,
            "lora_dropout": LORA_DROPOUT,
        }
    )

    wandb.log({
        "trainable_parameters": trainable_params,
        "total_parameters": all_params,
        "parameter_efficiency": percentage_trained
    })

    # Create datasets for each split
    train_dataset = ClinToxDataset("clintox", tokenizer, split="train", max_length=MAX_LENGTH)
    val_dataset = ClinToxDataset("clintox", tokenizer, split="valid", max_length=MAX_LENGTH)
    test_dataset = ClinToxDataset("clintox", tokenizer, split="test", max_length=MAX_LENGTH)

    # Create dataloaders
    train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
    val_dataloader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)
    test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

    optimizer = optim.AdamW(model.parameters(), lr=lr)
    criterion = nn.BCEWithLogitsLoss()
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    train_losses, val_losses, val_accuracies = [], [], []

    print(f"\n--- {optimization_name} ---")

    # Start timing
    start_time = time.time()

    for epoch in range(EPOCHS):
        epoch_start_time = time.time()
        model.train()
        total_train_loss = 0
        progress_bar = tqdm(train_dataloader, desc=f"Epoch {epoch+1}/{EPOCHS}")

        for batch in progress_bar:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)

            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            logits = outputs.logits.squeeze(-1)

            loss = criterion(logits, labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            total_train_loss += loss.item()
            progress_bar.set_postfix({"loss": loss.item()})

        epoch_time = time.time() - epoch_start_time

        avg_train_loss = total_train_loss / len(train_dataloader)
        val_loss, val_acc = evaluate_model(model, val_dataloader, device)

        train_losses.append(avg_train_loss)
        val_losses.append(val_loss)
        val_accuracies.append(val_acc)

        wandb.log({
            "epoch": epoch + 1,
            "train_loss": avg_train_loss,
            "val_loss": val_loss,
            "val_accuracy": val_acc,
            "epoch_time": epoch_time,
            "model_type": optimization_name
        })


        print(f"Epoch {epoch+1} | Training Time: {epoch_time:.2f} s | Train Loss: {avg_train_loss:.4f} | Val Loss: {val_loss:.4f} | Val Accuracy: {val_acc:.4f}")

    end_time = time.time()

    test_loss, test_acc = evaluate_model(model, test_dataloader, device)
    training_time = round(end_time - start_time, 2)
    print(f"\n--- {optimization_name} ---")
    print(f"Training time: {training_time} seconds")
    print(f"Test Loss: {test_loss:.4f} | Test Accuracy: {test_acc:.4f}")

    wandb.log({
        "test_loss": test_loss,
        "test_accuracy": test_acc,
        "training_time": training_time,
        "trainable_parameters": trainable_params,
        "total_parameters": all_params,
        "parameter_efficiency": percentage_trained,
        "model_type": optimization_name
    })

    wandb.log({
        f"{optimization_name}_train_loss_curve": wandb.plot.line_series(
            xs=list(range(1, EPOCHS+1)),
            ys=[train_losses],
            keys=[f"{optimization_name}_train_loss"],
            title=f"{optimization_name} Training Loss",
            xname="Epoch"
        ),
        f"{optimization_name}_val_loss_curve": wandb.plot.line_series(
            xs=list(range(1, EPOCHS+1)),
            ys=[val_losses],
            keys=[f"{optimization_name}_val_loss"],
            title=f"{optimization_name} Validation Loss",
            xname="Epoch"
        )
    })

    wandb.finish()

    return {
        "train_losses": train_losses,
        "val_losses": val_losses,
        "val_accuracies": val_accuracies,
        "training_time": training_time,
        "test_accuracy": test_acc}

In [29]:
def hyperparameter_tuning(model_type="LoRA", learning_rates=[1e-5, 2e-5, 3e-5, 5e-5, 1e-4, 3e-4]):
    """
    Run hyperparameter tuning  with wandb tracking
    """

    # Initialize the main sweep run
    parent_run = wandb.init(project="testing", entity="hpml-proj-deepchem", name=f"{model_type}_Hyperparameter_Sweep", job_type="sweep")

    results = {
        "learning_rate": [],
        "test_accuracy": [],
        "training_time": [],
    }

    # Run experiment for each learning rate
    for lr in learning_rates:
        print(f"\n--- {model_type} with learning_rate={lr} ---")

        # new model for each run
        tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
        model = None

        if model_type.lower() == "lora":
            model = setup_lora_model(MODEL_NAME)
        elif model_type.lower() == "adalora":
            model = setup_adalora_model(MODEL_NAME)

        with wandb.init(
            project="testing",
            entity="hpml-proj-deepchem",
            name=f"{model_type}_lr_{lr}",
            group=f"{model_type}_Sweep",
            job_type="run",
            config={"learning_rate": lr},
            reinit=True  # Allows multiple init calls
        ):

            metrics = train_and_profile(model, tokenizer, f"{model_type}_lr_{lr}", lr=lr)

            results["learning_rate"].append(lr)
            results["test_accuracy"].append(metrics["test_accuracy"])
            results["training_time"].append(metrics["training_time"])

        # Clean up memory
        del model, tokenizer
        torch.cuda.empty_cache()
    wandb.finish()
    # Create results dataframe
    results_df = pd.DataFrame(results)

    # Create and log summary visualization
    fig, ax = plt.subplots(figsize=(10, 6))
    ax.plot(results_df["learning_rate"], results_df["test_accuracy"], 'bo-')
    ax.set_xscale('log')
    ax.set_xlabel('Learning Rate')
    ax.set_ylabel('Test Accuracy')
    ax.set_title(f'{model_type} Performance vs Learning Rate')
    ax.grid(True)

    # Add text annotations for each point
    for i, lr in enumerate(results_df["learning_rate"]):
        ax.annotate(
            f"Time: {results_df['training_time'][i]:.1f}s\nAcc: {results_df['test_accuracy'][i]:.4f}",
            (lr, results_df["test_accuracy"][i]),
            textcoords="offset points",
            xytext=(0, 10),
            ha='center'
        )

    second_run = wandb.init(project="testing", entity="hpml-proj-deepchem", name=f"{model_type}_Hyperparameter_GraphTable")

    wandb.log({f"{model_type}_lr_tuning_curve": wandb.Image(fig),
               f"{model_type}_lr_tuning_table": wandb.Table(dataframe=results_df)})

    plt.close(fig)

    wandb.finish()

    return results_df


## Base ChemBeRTa Model Without Any Additional Fine-Tuning

In [17]:
baseline = AutoModelForSequenceClassification.from_pretrained(
        MODEL_NAME,
        num_labels=2,  # Binary classification for ClinTox
        return_dict=True
    )

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
baseline.to(device)
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
test_dataset = ClinToxDataset("clintox", tokenizer, split="test", max_length=MAX_LENGTH)
test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)


_, test_acc = evaluate_model(baseline, test_dataloader, device)

print(f"Baseline (Not Fine-Tuned) Test Accuracy: {test_acc:.4f}")


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at seyonec/ChemBERTa-zinc-base-v1 and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 21.62it/s]

Baseline (Not Fine-Tuned) Test Accuracy: 0.0946





## Base ChemBeRTa Model with Full Parameter Fine-Tuning


In [22]:
def setup_baseline_model(model_name):
    """
    Set up a baseline model
    """
    model = AutoModelForSequenceClassification.from_pretrained(
        model_name,
        num_labels=2,  # Binary classification for ClinTox
        return_dict=True
    )
    return model

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model_baseline = setup_baseline_model(MODEL_NAME)

baseline_metrics = train_and_profile(model_baseline, tokenizer, "Model_Baseline_Finetuned")

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at seyonec/ChemBERTa-zinc-base-v1 and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



--- Model_Baseline_Finetuned - Trainable Parameters ---
Trainable: 44105474 | All: 44105474 | % Trained: 100.0


0,1
epoch,▁
epoch_time,▁
parameter_efficiency,▁
total_parameters,▁
train_loss,▁
trainable_parameters,▁
val_accuracy,▁
val_loss,▁

0,1
epoch,1
epoch_time,5.5166
model_type,Model_Baseline_Finet...
parameter_efficiency,100
total_parameters,44105474
train_loss,0.25829
trainable_parameters,44105474
val_accuracy,0.96622
val_loss,0.09241



--- Model_Baseline_Finetuned ---


Epoch 1/5: 100%|██████████| 37/37 [00:05<00:00,  6.76it/s, loss=0.0674]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 20.11it/s]


Epoch 1 | Training Time: 5.47 s | Train Loss: 0.2631 | Val Loss: 0.0949 | Val Accuracy: 0.9730


Epoch 2/5: 100%|██████████| 37/37 [00:05<00:00,  6.69it/s, loss=0.0308]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 20.56it/s]


Epoch 2 | Training Time: 5.53 s | Train Loss: 0.0805 | Val Loss: 0.0953 | Val Accuracy: 0.9730


Epoch 3/5: 100%|██████████| 37/37 [00:05<00:00,  6.67it/s, loss=0.0129]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 20.51it/s]


Epoch 3 | Training Time: 5.55 s | Train Loss: 0.0534 | Val Loss: 0.1127 | Val Accuracy: 0.9527


Epoch 4/5: 100%|██████████| 37/37 [00:05<00:00,  6.73it/s, loss=0.0978]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 20.55it/s]


Epoch 4 | Training Time: 5.50 s | Train Loss: 0.0492 | Val Loss: 0.0665 | Val Accuracy: 0.9865


Epoch 5/5: 100%|██████████| 37/37 [00:05<00:00,  6.74it/s, loss=0.0265]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 20.74it/s]


Epoch 5 | Training Time: 5.49 s | Train Loss: 0.0350 | Val Loss: 0.0665 | Val Accuracy: 0.9865


Evaluating: 100%|██████████| 5/5 [00:00<00:00, 20.82it/s]



--- Model_Baseline_Finetuned ---
Training time: 28.79 seconds
Test Loss: 0.0504 | Test Accuracy: 0.9865


0,1
epoch,▁▃▅▆█
epoch_time,▁▇█▃▃
parameter_efficiency,▁▁
test_accuracy,▁
test_loss,▁
total_parameters,▁▁
train_loss,█▂▂▁▁
trainable_parameters,▁▁
training_time,▁
val_accuracy,▅▅▁██

0,1
epoch,5
epoch_time,5.49387
model_type,Model_Baseline_Finet...
parameter_efficiency,100
test_accuracy,0.98649
test_loss,0.05038
total_parameters,44105474
train_loss,0.03498
trainable_parameters,44105474
training_time,28.79


## ChemBeRTa With LoRA Fine-Tuning

In [23]:
def setup_lora_model(model_name):
    """
    Set up a model with LoRA configuration
    """
    model = AutoModelForSequenceClassification.from_pretrained(
        model_name,
        num_labels=2,  # Binary classification for ClinTox
        return_dict=True
    )

    peft_config = LoraConfig(
        task_type=TaskType.SEQ_CLS,
        inference_mode=False,
        r=LORA_R,
        lora_alpha=LORA_ALPHA,
        lora_dropout=LORA_DROPOUT,
        target_modules=["query", "value"]  # Target attention modules
    )

    model = get_peft_model(model, peft_config)
    return model

In [30]:
hyperparameter_tuning("LoRA")


--- LoRA with learning_rate=1e-05 ---


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at seyonec/ChemBERTa-zinc-base-v1 and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



--- LoRA_lr_1e-05 - Trainable Parameters ---
Trainable: 739586 | All: 44845060 | % Trained: 1.65



--- LoRA_lr_1e-05 ---


Epoch 1/5: 100%|██████████| 37/37 [00:03<00:00,  9.35it/s, loss=0.355]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 19.31it/s]


Epoch 1 | Training Time: 3.96 s | Train Loss: 0.5158 | Val Loss: 0.3309 | Val Accuracy: 0.9527


Epoch 2/5: 100%|██████████| 37/37 [00:04<00:00,  9.24it/s, loss=0.297]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 19.34it/s]


Epoch 2 | Training Time: 4.01 s | Train Loss: 0.3263 | Val Loss: 0.2262 | Val Accuracy: 0.9527


Epoch 3/5: 100%|██████████| 37/37 [00:04<00:00,  9.15it/s, loss=0.601]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 19.06it/s]


Epoch 3 | Training Time: 4.05 s | Train Loss: 0.2668 | Val Loss: 0.1929 | Val Accuracy: 0.9527


Epoch 4/5: 100%|██████████| 37/37 [00:04<00:00,  9.17it/s, loss=0.305]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 19.16it/s]


Epoch 4 | Training Time: 4.04 s | Train Loss: 0.2411 | Val Loss: 0.1777 | Val Accuracy: 0.9527


Epoch 5/5: 100%|██████████| 37/37 [00:04<00:00,  9.18it/s, loss=0.479]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 19.24it/s]


Epoch 5 | Training Time: 4.04 s | Train Loss: 0.2240 | Val Loss: 0.1679 | Val Accuracy: 0.9527


Evaluating: 100%|██████████| 5/5 [00:00<00:00, 19.70it/s]



--- LoRA_lr_1e-05 ---
Training time: 21.42 seconds
Test Loss: 0.1781 | Test Accuracy: 0.9324


0,1
epoch,▁▃▅▆█
epoch_time,▁▅█▇▇
parameter_efficiency,▁▁
test_accuracy,▁
test_loss,▁
total_parameters,▁▁
train_loss,█▃▂▁▁
trainable_parameters,▁▁
training_time,▁
val_accuracy,▁▁▁▁▁

0,1
epoch,5
epoch_time,4.03599
model_type,LoRA_lr_1e-05
parameter_efficiency,1.65
test_accuracy,0.93243
test_loss,0.1781
total_parameters,44845060
train_loss,0.224
trainable_parameters,739586
training_time,21.42



--- LoRA with learning_rate=2e-05 ---


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at seyonec/ChemBERTa-zinc-base-v1 and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



--- LoRA_lr_2e-05 - Trainable Parameters ---
Trainable: 739586 | All: 44845060 | % Trained: 1.65



--- LoRA_lr_2e-05 ---


Epoch 1/5: 100%|██████████| 37/37 [00:03<00:00,  9.38it/s, loss=0.25]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 19.45it/s]


Epoch 1 | Training Time: 3.95 s | Train Loss: 0.4945 | Val Loss: 0.2421 | Val Accuracy: 0.9527


Epoch 2/5: 100%|██████████| 37/37 [00:03<00:00,  9.35it/s, loss=0.223]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 19.39it/s]


Epoch 2 | Training Time: 3.96 s | Train Loss: 0.2646 | Val Loss: 0.1810 | Val Accuracy: 0.9527


Epoch 3/5: 100%|██████████| 37/37 [00:03<00:00,  9.47it/s, loss=0.31]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 19.67it/s]


Epoch 3 | Training Time: 3.91 s | Train Loss: 0.2242 | Val Loss: 0.1643 | Val Accuracy: 0.9527


Epoch 4/5: 100%|██████████| 37/37 [00:03<00:00,  9.44it/s, loss=0.12]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 19.58it/s]


Epoch 4 | Training Time: 3.92 s | Train Loss: 0.1966 | Val Loss: 0.1500 | Val Accuracy: 0.9527


Epoch 5/5: 100%|██████████| 37/37 [00:03<00:00,  9.45it/s, loss=0.161]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 19.58it/s]


Epoch 5 | Training Time: 3.92 s | Train Loss: 0.1683 | Val Loss: 0.1336 | Val Accuracy: 0.9595


Evaluating: 100%|██████████| 5/5 [00:00<00:00, 19.97it/s]



--- LoRA_lr_2e-05 ---
Training time: 20.97 seconds
Test Loss: 0.1236 | Test Accuracy: 0.9527


0,1
epoch,▁▃▅▆█
epoch_time,▆█▁▃▂
parameter_efficiency,▁▁
test_accuracy,▁
test_loss,▁
total_parameters,▁▁
train_loss,█▃▂▂▁
trainable_parameters,▁▁
training_time,▁
val_accuracy,▁▁▁▁█

0,1
epoch,5
epoch_time,3.91844
model_type,LoRA_lr_2e-05
parameter_efficiency,1.65
test_accuracy,0.9527
test_loss,0.1236
total_parameters,44845060
train_loss,0.16826
trainable_parameters,739586
training_time,20.97



--- LoRA with learning_rate=3e-05 ---


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at seyonec/ChemBERTa-zinc-base-v1 and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



--- LoRA_lr_3e-05 - Trainable Parameters ---
Trainable: 739586 | All: 44845060 | % Trained: 1.65



--- LoRA_lr_3e-05 ---


Epoch 1/5: 100%|██████████| 37/37 [00:03<00:00,  9.50it/s, loss=0.325]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 19.94it/s]


Epoch 1 | Training Time: 3.90 s | Train Loss: 0.3248 | Val Loss: 0.1776 | Val Accuracy: 0.9527


Epoch 2/5: 100%|██████████| 37/37 [00:03<00:00,  9.52it/s, loss=0.241]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 19.63it/s]


Epoch 2 | Training Time: 3.89 s | Train Loss: 0.2128 | Val Loss: 0.1559 | Val Accuracy: 0.9527


Epoch 3/5: 100%|██████████| 37/37 [00:03<00:00,  9.47it/s, loss=0.169]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 19.49it/s]


Epoch 3 | Training Time: 3.91 s | Train Loss: 0.1750 | Val Loss: 0.1360 | Val Accuracy: 0.9527


Epoch 4/5: 100%|██████████| 37/37 [00:03<00:00,  9.45it/s, loss=0.128]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 19.74it/s]


Epoch 4 | Training Time: 3.92 s | Train Loss: 0.1416 | Val Loss: 0.1148 | Val Accuracy: 0.9662


Epoch 5/5: 100%|██████████| 37/37 [00:03<00:00,  9.41it/s, loss=0.0553]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 19.64it/s]


Epoch 5 | Training Time: 3.94 s | Train Loss: 0.1151 | Val Loss: 0.1022 | Val Accuracy: 0.9797


Evaluating: 100%|██████████| 5/5 [00:00<00:00, 20.01it/s]



--- LoRA_lr_3e-05 ---
Training time: 20.86 seconds
Test Loss: 0.0899 | Test Accuracy: 0.9662


0,1
epoch,▁▃▅▆█
epoch_time,▂▁▄▆█
parameter_efficiency,▁▁
test_accuracy,▁
test_loss,▁
total_parameters,▁▁
train_loss,█▄▃▂▁
trainable_parameters,▁▁
training_time,▁
val_accuracy,▁▁▁▅█

0,1
epoch,5
epoch_time,3.93504
model_type,LoRA_lr_3e-05
parameter_efficiency,1.65
test_accuracy,0.96622
test_loss,0.0899
total_parameters,44845060
train_loss,0.1151
trainable_parameters,739586
training_time,20.86



--- LoRA with learning_rate=5e-05 ---


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at seyonec/ChemBERTa-zinc-base-v1 and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



--- LoRA_lr_5e-05 - Trainable Parameters ---
Trainable: 739586 | All: 44845060 | % Trained: 1.65



--- LoRA_lr_5e-05 ---


Epoch 1/5: 100%|██████████| 37/37 [00:03<00:00,  9.47it/s, loss=0.343]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 19.82it/s]


Epoch 1 | Training Time: 3.91 s | Train Loss: 0.2888 | Val Loss: 0.1651 | Val Accuracy: 0.9527


Epoch 2/5: 100%|██████████| 37/37 [00:03<00:00,  9.46it/s, loss=0.152]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 19.72it/s]


Epoch 2 | Training Time: 3.91 s | Train Loss: 0.1770 | Val Loss: 0.1313 | Val Accuracy: 0.9595


Epoch 3/5: 100%|██████████| 37/37 [00:03<00:00,  9.39it/s, loss=0.284]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 19.63it/s]


Epoch 3 | Training Time: 3.95 s | Train Loss: 0.1270 | Val Loss: 0.1017 | Val Accuracy: 0.9662


Epoch 4/5: 100%|██████████| 37/37 [00:03<00:00,  9.37it/s, loss=0.0269]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 19.36it/s]


Epoch 4 | Training Time: 3.95 s | Train Loss: 0.0961 | Val Loss: 0.0899 | Val Accuracy: 0.9730


Epoch 5/5: 100%|██████████| 37/37 [00:03<00:00,  9.31it/s, loss=0.0267]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 19.40it/s]


Epoch 5 | Training Time: 3.98 s | Train Loss: 0.0705 | Val Loss: 0.0962 | Val Accuracy: 0.9662


Evaluating: 100%|██████████| 5/5 [00:00<00:00, 19.58it/s]



--- LoRA_lr_5e-05 ---
Training time: 21.01 seconds
Test Loss: 0.0820 | Test Accuracy: 0.9662


0,1
epoch,▁▃▅▆█
epoch_time,▁▁▄▅█
parameter_efficiency,▁▁
test_accuracy,▁
test_loss,▁
total_parameters,▁▁
train_loss,█▄▃▂▁
trainable_parameters,▁▁
training_time,▁
val_accuracy,▁▃▆█▆

0,1
epoch,5
epoch_time,3.97946
model_type,LoRA_lr_5e-05
parameter_efficiency,1.65
test_accuracy,0.96622
test_loss,0.08198
total_parameters,44845060
train_loss,0.07049
trainable_parameters,739586
training_time,21.01



--- LoRA with learning_rate=0.0001 ---


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at seyonec/ChemBERTa-zinc-base-v1 and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



--- LoRA_lr_0.0001 - Trainable Parameters ---
Trainable: 739586 | All: 44845060 | % Trained: 1.65



--- LoRA_lr_0.0001 ---


Epoch 1/5: 100%|██████████| 37/37 [00:03<00:00,  9.47it/s, loss=0.136]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 19.72it/s]


Epoch 1 | Training Time: 3.91 s | Train Loss: 0.2770 | Val Loss: 0.1400 | Val Accuracy: 0.9527


Epoch 2/5: 100%|██████████| 37/37 [00:03<00:00,  9.40it/s, loss=0.0768]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 19.61it/s]


Epoch 2 | Training Time: 3.94 s | Train Loss: 0.1395 | Val Loss: 0.1051 | Val Accuracy: 0.9662


Epoch 3/5: 100%|██████████| 37/37 [00:03<00:00,  9.47it/s, loss=0.0708]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 19.56it/s]


Epoch 3 | Training Time: 3.91 s | Train Loss: 0.0855 | Val Loss: 0.1195 | Val Accuracy: 0.9459


Epoch 4/5: 100%|██████████| 37/37 [00:03<00:00,  9.42it/s, loss=0.142]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 19.45it/s]


Epoch 4 | Training Time: 3.93 s | Train Loss: 0.0670 | Val Loss: 0.0988 | Val Accuracy: 0.9662


Epoch 5/5: 100%|██████████| 37/37 [00:03<00:00,  9.35it/s, loss=0.121]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 19.69it/s]


Epoch 5 | Training Time: 3.96 s | Train Loss: 0.0568 | Val Loss: 0.0907 | Val Accuracy: 0.9797


Evaluating: 100%|██████████| 5/5 [00:00<00:00, 20.02it/s]



--- LoRA_lr_0.0001 ---
Training time: 20.96 seconds
Test Loss: 0.0740 | Test Accuracy: 0.9730


0,1
epoch,▁▃▅▆█
epoch_time,▁▅▁▄█
parameter_efficiency,▁▁
test_accuracy,▁
test_loss,▁
total_parameters,▁▁
train_loss,█▄▂▁▁
trainable_parameters,▁▁
training_time,▁
val_accuracy,▂▅▁▅█

0,1
epoch,5
epoch_time,3.96077
model_type,LoRA_lr_0.0001
parameter_efficiency,1.65
test_accuracy,0.97297
test_loss,0.07398
total_parameters,44845060
train_loss,0.05678
trainable_parameters,739586
training_time,20.96



--- LoRA with learning_rate=0.0003 ---


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at seyonec/ChemBERTa-zinc-base-v1 and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



--- LoRA_lr_0.0003 - Trainable Parameters ---
Trainable: 739586 | All: 44845060 | % Trained: 1.65



--- LoRA_lr_0.0003 ---


Epoch 1/5: 100%|██████████| 37/37 [00:03<00:00,  9.49it/s, loss=0.168]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 19.41it/s]


Epoch 1 | Training Time: 3.90 s | Train Loss: 0.1946 | Val Loss: 0.0884 | Val Accuracy: 0.9662


Epoch 2/5: 100%|██████████| 37/37 [00:03<00:00,  9.49it/s, loss=0.258]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 18.73it/s]


Epoch 2 | Training Time: 3.90 s | Train Loss: 0.0776 | Val Loss: 0.1014 | Val Accuracy: 0.9662


Epoch 3/5: 100%|██████████| 37/37 [00:03<00:00,  9.44it/s, loss=0.164]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 19.60it/s]


Epoch 3 | Training Time: 3.92 s | Train Loss: 0.0538 | Val Loss: 0.0812 | Val Accuracy: 0.9865


Epoch 4/5: 100%|██████████| 37/37 [00:03<00:00,  9.38it/s, loss=0.0548]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 19.56it/s]


Epoch 4 | Training Time: 3.95 s | Train Loss: 0.0498 | Val Loss: 0.0746 | Val Accuracy: 0.9797


Epoch 5/5: 100%|██████████| 37/37 [00:03<00:00,  9.40it/s, loss=0.0231]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 19.44it/s]


Epoch 5 | Training Time: 3.94 s | Train Loss: 0.0426 | Val Loss: 0.1310 | Val Accuracy: 0.9324


Evaluating: 100%|██████████| 5/5 [00:00<00:00, 19.84it/s]



--- LoRA_lr_0.0003 ---
Training time: 20.93 seconds
Test Loss: 0.0746 | Test Accuracy: 0.9797


0,1
epoch,▁▃▅▆█
epoch_time,▁▁▄█▇
parameter_efficiency,▁▁
test_accuracy,▁
test_loss,▁
total_parameters,▁▁
train_loss,█▃▂▁▁
trainable_parameters,▁▁
training_time,▁
val_accuracy,▅▅█▇▁

0,1
epoch,5
epoch_time,3.93989
model_type,LoRA_lr_0.0003
parameter_efficiency,1.65
test_accuracy,0.97973
test_loss,0.07459
total_parameters,44845060
train_loss,0.04258
trainable_parameters,739586
training_time,20.93


Unnamed: 0,learning_rate,test_accuracy,training_time
0,1e-05,0.932432,21.42
1,2e-05,0.952703,20.97
2,3e-05,0.966216,20.86
3,5e-05,0.966216,21.01
4,0.0001,0.972973,20.96
5,0.0003,0.97973,20.93


In [31]:
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model_lora = setup_lora_model(MODEL_NAME)

lora_metrics = train_and_profile(model_lora, tokenizer, "LoRA")

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at seyonec/ChemBERTa-zinc-base-v1 and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



--- LoRA - Trainable Parameters ---
Trainable: 739586 | All: 44845060 | % Trained: 1.65



--- LoRA ---


Epoch 1/5: 100%|██████████| 37/37 [00:03<00:00,  9.41it/s, loss=0.312]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 19.60it/s]


Epoch 1 | Training Time: 3.93 s | Train Loss: 0.4095 | Val Loss: 0.2168 | Val Accuracy: 0.9527


Epoch 2/5: 100%|██████████| 37/37 [00:03<00:00,  9.42it/s, loss=0.402]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 19.62it/s]


Epoch 2 | Training Time: 3.93 s | Train Loss: 0.2480 | Val Loss: 0.1750 | Val Accuracy: 0.9527


Epoch 3/5: 100%|██████████| 37/37 [00:03<00:00,  9.37it/s, loss=0.147]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 19.43it/s]


Epoch 3 | Training Time: 3.95 s | Train Loss: 0.2111 | Val Loss: 0.1588 | Val Accuracy: 0.9527


Epoch 4/5: 100%|██████████| 37/37 [00:03<00:00,  9.32it/s, loss=0.237]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 19.31it/s]


Epoch 4 | Training Time: 3.97 s | Train Loss: 0.1818 | Val Loss: 0.1437 | Val Accuracy: 0.9527


Epoch 5/5: 100%|██████████| 37/37 [00:03<00:00,  9.30it/s, loss=0.195]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 19.28it/s]


Epoch 5 | Training Time: 3.98 s | Train Loss: 0.1572 | Val Loss: 0.1276 | Val Accuracy: 0.9595


Evaluating: 100%|██████████| 5/5 [00:00<00:00, 19.61it/s]



--- LoRA ---
Training time: 21.09 seconds
Test Loss: 0.1195 | Test Accuracy: 0.9527


0,1
epoch,▁▃▅▆█
epoch_time,▁▁▄▇█
parameter_efficiency,▁▁
test_accuracy,▁
test_loss,▁
total_parameters,▁▁
train_loss,█▄▂▂▁
trainable_parameters,▁▁
training_time,▁
val_accuracy,▁▁▁▁█

0,1
epoch,5
epoch_time,3.98083
model_type,LoRA
parameter_efficiency,1.65
test_accuracy,0.9527
test_loss,0.11947
total_parameters,44845060
train_loss,0.15718
trainable_parameters,739586
training_time,21.09


## ChemBeRTa With AdaLoRA Fine-Tuning

In [32]:
def setup_adalora_model(model_name):
    """
    Set up a model with AdaLoRA configuration
    """
    model = AutoModelForSequenceClassification.from_pretrained(
        model_name,
        num_labels=2,  # Binary classification for ClinTox
        return_dict=True
    )

    train_dataset = ClinToxDataset("clintox", tokenizer, split="train", max_length=MAX_LENGTH)
    train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
    ADA_TOTALSTEP = EPOCHS * len(train_dataloader)

    peft_config = AdaLoraConfig(
        task_type=TaskType.SEQ_CLS,
        inference_mode=False,
        r=LORA_R,
        lora_alpha=LORA_ALPHA,
        lora_dropout=LORA_DROPOUT,
        total_step = ADA_TOTALSTEP
    )

    model = get_peft_model(model, peft_config)
    return model

In [33]:
hyperparameter_tuning("AdaLoRA")


--- AdaLoRA with learning_rate=1e-05 ---


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at seyonec/ChemBERTa-zinc-base-v1 and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



--- AdaLoRA_lr_1e-05 - Trainable Parameters ---
Trainable: 1606335 | All: 45730290 | % Trained: 3.51



--- AdaLoRA_lr_1e-05 ---


Epoch 1/5: 100%|██████████| 37/37 [00:05<00:00,  6.24it/s, loss=0.454]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.49it/s]


Epoch 1 | Training Time: 5.93 s | Train Loss: 0.5482 | Val Loss: 0.3565 | Val Accuracy: 0.9527


Epoch 2/5: 100%|██████████| 37/37 [00:05<00:00,  6.25it/s, loss=0.396]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.47it/s]


Epoch 2 | Training Time: 5.93 s | Train Loss: 0.3393 | Val Loss: 0.2361 | Val Accuracy: 0.9527


Epoch 3/5: 100%|██████████| 37/37 [00:05<00:00,  6.25it/s, loss=0.369]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.40it/s]


Epoch 3 | Training Time: 5.92 s | Train Loss: 0.2702 | Val Loss: 0.1987 | Val Accuracy: 0.9527


Epoch 4/5: 100%|██████████| 37/37 [00:05<00:00,  6.24it/s, loss=0.269]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.55it/s]


Epoch 4 | Training Time: 5.93 s | Train Loss: 0.2403 | Val Loss: 0.1824 | Val Accuracy: 0.9527


Epoch 5/5: 100%|██████████| 37/37 [00:05<00:00,  6.26it/s, loss=0.215]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.56it/s]


Epoch 5 | Training Time: 5.92 s | Train Loss: 0.2253 | Val Loss: 0.1720 | Val Accuracy: 0.9527


Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.85it/s]



--- AdaLoRA_lr_1e-05 ---
Training time: 31.28 seconds
Test Loss: 0.1818 | Test Accuracy: 0.9324


0,1
epoch,▁▃▅▆█
epoch_time,█▆▄▆▁
parameter_efficiency,▁▁
test_accuracy,▁
test_loss,▁
total_parameters,▁▁
train_loss,█▃▂▁▁
trainable_parameters,▁▁
training_time,▁
val_accuracy,▁▁▁▁▁

0,1
epoch,5
epoch_time,5.91913
model_type,AdaLoRA_lr_1e-05
parameter_efficiency,3.51
test_accuracy,0.93243
test_loss,0.1818
total_parameters,45730290
train_loss,0.22534
trainable_parameters,1606335
training_time,31.28



--- AdaLoRA with learning_rate=2e-05 ---


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at seyonec/ChemBERTa-zinc-base-v1 and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



--- AdaLoRA_lr_2e-05 - Trainable Parameters ---
Trainable: 1606335 | All: 45730290 | % Trained: 3.51



--- AdaLoRA_lr_2e-05 ---


Epoch 1/5: 100%|██████████| 37/37 [00:05<00:00,  6.32it/s, loss=0.254]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.64it/s]


Epoch 1 | Training Time: 5.86 s | Train Loss: 0.4113 | Val Loss: 0.2201 | Val Accuracy: 0.9527


Epoch 2/5: 100%|██████████| 37/37 [00:05<00:00,  6.29it/s, loss=0.25]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.63it/s]


Epoch 2 | Training Time: 5.89 s | Train Loss: 0.2527 | Val Loss: 0.1799 | Val Accuracy: 0.9527


Epoch 3/5: 100%|██████████| 37/37 [00:05<00:00,  6.26it/s, loss=0.0989]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.53it/s]


Epoch 3 | Training Time: 5.91 s | Train Loss: 0.2225 | Val Loss: 0.1664 | Val Accuracy: 0.9527


Epoch 4/5: 100%|██████████| 37/37 [00:05<00:00,  6.24it/s, loss=0.175]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.58it/s]


Epoch 4 | Training Time: 5.93 s | Train Loss: 0.1976 | Val Loss: 0.1569 | Val Accuracy: 0.9527


Epoch 5/5: 100%|██████████| 37/37 [00:05<00:00,  6.27it/s, loss=0.306]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.16it/s]


Epoch 5 | Training Time: 5.91 s | Train Loss: 0.1853 | Val Loss: 0.1492 | Val Accuracy: 0.9527


Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.64it/s]



--- AdaLoRA_lr_2e-05 ---
Training time: 31.16 seconds
Test Loss: 0.1439 | Test Accuracy: 0.9459


0,1
epoch,▁▃▅▆█
epoch_time,▁▄▆█▅
parameter_efficiency,▁▁
test_accuracy,▁
test_loss,▁
total_parameters,▁▁
train_loss,█▃▂▁▁
trainable_parameters,▁▁
training_time,▁
val_accuracy,▁▁▁▁▁

0,1
epoch,5
epoch_time,5.90522
model_type,AdaLoRA_lr_2e-05
parameter_efficiency,3.51
test_accuracy,0.94595
test_loss,0.14386
total_parameters,45730290
train_loss,0.18532
trainable_parameters,1606335
training_time,31.16



--- AdaLoRA with learning_rate=3e-05 ---


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at seyonec/ChemBERTa-zinc-base-v1 and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



--- AdaLoRA_lr_3e-05 - Trainable Parameters ---
Trainable: 1606335 | All: 45730290 | % Trained: 3.51



--- AdaLoRA_lr_3e-05 ---


Epoch 1/5: 100%|██████████| 37/37 [00:05<00:00,  6.26it/s, loss=0.182]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.38it/s]


Epoch 1 | Training Time: 5.91 s | Train Loss: 0.3818 | Val Loss: 0.1929 | Val Accuracy: 0.9527


Epoch 2/5: 100%|██████████| 37/37 [00:05<00:00,  6.25it/s, loss=0.217]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.35it/s]


Epoch 2 | Training Time: 5.93 s | Train Loss: 0.2297 | Val Loss: 0.1687 | Val Accuracy: 0.9527


Epoch 3/5: 100%|██████████| 37/37 [00:05<00:00,  6.24it/s, loss=0.284]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.25it/s]


Epoch 3 | Training Time: 5.93 s | Train Loss: 0.1976 | Val Loss: 0.1554 | Val Accuracy: 0.9527


Epoch 4/5: 100%|██████████| 37/37 [00:05<00:00,  6.25it/s, loss=0.267]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.52it/s]


Epoch 4 | Training Time: 5.92 s | Train Loss: 0.1758 | Val Loss: 0.1454 | Val Accuracy: 0.9527


Epoch 5/5: 100%|██████████| 37/37 [00:05<00:00,  6.25it/s, loss=0.276]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.54it/s]


Epoch 5 | Training Time: 5.93 s | Train Loss: 0.1632 | Val Loss: 0.1379 | Val Accuracy: 0.9527


Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.55it/s]



--- AdaLoRA_lr_3e-05 ---
Training time: 31.28 seconds
Test Loss: 0.1256 | Test Accuracy: 0.9459


0,1
epoch,▁▃▅▆█
epoch_time,▁▆█▄▇
parameter_efficiency,▁▁
test_accuracy,▁
test_loss,▁
total_parameters,▁▁
train_loss,█▃▂▁▁
trainable_parameters,▁▁
training_time,▁
val_accuracy,▁▁▁▁▁

0,1
epoch,5
epoch_time,5.9289
model_type,AdaLoRA_lr_3e-05
parameter_efficiency,3.51
test_accuracy,0.94595
test_loss,0.12557
total_parameters,45730290
train_loss,0.16315
trainable_parameters,1606335
training_time,31.28



--- AdaLoRA with learning_rate=5e-05 ---


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at seyonec/ChemBERTa-zinc-base-v1 and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



--- AdaLoRA_lr_5e-05 - Trainable Parameters ---
Trainable: 1606335 | All: 45730290 | % Trained: 3.51



--- AdaLoRA_lr_5e-05 ---


Epoch 1/5: 100%|██████████| 37/37 [00:05<00:00,  6.28it/s, loss=0.0596]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.68it/s]


Epoch 1 | Training Time: 5.90 s | Train Loss: 0.3224 | Val Loss: 0.1688 | Val Accuracy: 0.9527


Epoch 2/5: 100%|██████████| 37/37 [00:05<00:00,  6.26it/s, loss=0.173]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.27it/s]


Epoch 2 | Training Time: 5.91 s | Train Loss: 0.1955 | Val Loss: 0.1484 | Val Accuracy: 0.9527


Epoch 3/5: 100%|██████████| 37/37 [00:05<00:00,  6.27it/s, loss=0.116]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.30it/s]


Epoch 3 | Training Time: 5.90 s | Train Loss: 0.1624 | Val Loss: 0.1367 | Val Accuracy: 0.9527


Epoch 4/5: 100%|██████████| 37/37 [00:05<00:00,  6.24it/s, loss=0.14]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.58it/s]


Epoch 4 | Training Time: 5.93 s | Train Loss: 0.1437 | Val Loss: 0.1279 | Val Accuracy: 0.9527


Epoch 5/5: 100%|██████████| 37/37 [00:05<00:00,  6.24it/s, loss=0.258]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.52it/s]


Epoch 5 | Training Time: 5.93 s | Train Loss: 0.1307 | Val Loss: 0.1205 | Val Accuracy: 0.9595


Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.63it/s]



--- AdaLoRA_lr_5e-05 ---
Training time: 31.22 seconds
Test Loss: 0.1070 | Test Accuracy: 0.9459


0,1
epoch,▁▃▅▆█
epoch_time,▁▄▂██
parameter_efficiency,▁▁
test_accuracy,▁
test_loss,▁
total_parameters,▁▁
train_loss,█▃▂▁▁
trainable_parameters,▁▁
training_time,▁
val_accuracy,▁▁▁▁█

0,1
epoch,5
epoch_time,5.93041
model_type,AdaLoRA_lr_5e-05
parameter_efficiency,3.51
test_accuracy,0.94595
test_loss,0.10697
total_parameters,45730290
train_loss,0.13065
trainable_parameters,1606335
training_time,31.22



--- AdaLoRA with learning_rate=0.0001 ---


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at seyonec/ChemBERTa-zinc-base-v1 and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



--- AdaLoRA_lr_0.0001 - Trainable Parameters ---
Trainable: 1606335 | All: 45730290 | % Trained: 3.51



--- AdaLoRA_lr_0.0001 ---


Epoch 1/5: 100%|██████████| 37/37 [00:05<00:00,  6.27it/s, loss=0.426]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.38it/s]


Epoch 1 | Training Time: 5.91 s | Train Loss: 0.2548 | Val Loss: 0.1478 | Val Accuracy: 0.9527


Epoch 2/5: 100%|██████████| 37/37 [00:05<00:00,  6.25it/s, loss=0.0596]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.27it/s]


Epoch 2 | Training Time: 5.93 s | Train Loss: 0.1558 | Val Loss: 0.1298 | Val Accuracy: 0.9527


Epoch 3/5: 100%|██████████| 37/37 [00:05<00:00,  6.25it/s, loss=0.0502]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.46it/s]


Epoch 3 | Training Time: 5.92 s | Train Loss: 0.1305 | Val Loss: 0.1172 | Val Accuracy: 0.9662


Epoch 4/5: 100%|██████████| 37/37 [00:05<00:00,  6.24it/s, loss=0.149]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.59it/s]


Epoch 4 | Training Time: 5.93 s | Train Loss: 0.1155 | Val Loss: 0.1043 | Val Accuracy: 0.9662


Epoch 5/5: 100%|██████████| 37/37 [00:05<00:00,  6.23it/s, loss=0.0337]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.51it/s]


Epoch 5 | Training Time: 5.94 s | Train Loss: 0.0970 | Val Loss: 0.0941 | Val Accuracy: 0.9662


Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.83it/s]



--- AdaLoRA_lr_0.0001 ---
Training time: 31.28 seconds
Test Loss: 0.0832 | Test Accuracy: 0.9595


0,1
epoch,▁▃▅▆█
epoch_time,▁▅▅▇█
parameter_efficiency,▁▁
test_accuracy,▁
test_loss,▁
total_parameters,▁▁
train_loss,█▄▂▂▁
trainable_parameters,▁▁
training_time,▁
val_accuracy,▁▁███

0,1
epoch,5
epoch_time,5.9399
model_type,AdaLoRA_lr_0.0001
parameter_efficiency,3.51
test_accuracy,0.95946
test_loss,0.08322
total_parameters,45730290
train_loss,0.09696
trainable_parameters,1606335
training_time,31.28



--- AdaLoRA with learning_rate=0.0003 ---


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at seyonec/ChemBERTa-zinc-base-v1 and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



--- AdaLoRA_lr_0.0003 - Trainable Parameters ---
Trainable: 1606335 | All: 45730290 | % Trained: 3.51



--- AdaLoRA_lr_0.0003 ---


Epoch 1/5: 100%|██████████| 37/37 [00:05<00:00,  6.28it/s, loss=0.141]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.61it/s]


Epoch 1 | Training Time: 5.90 s | Train Loss: 0.2132 | Val Loss: 0.1242 | Val Accuracy: 0.9595


Epoch 2/5: 100%|██████████| 37/37 [00:05<00:00,  6.26it/s, loss=0.0983]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.28it/s]


Epoch 2 | Training Time: 5.91 s | Train Loss: 0.1220 | Val Loss: 0.1129 | Val Accuracy: 0.9662


Epoch 3/5: 100%|██████████| 37/37 [00:05<00:00,  6.25it/s, loss=0.111]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.62it/s]


Epoch 3 | Training Time: 5.92 s | Train Loss: 0.0970 | Val Loss: 0.0778 | Val Accuracy: 0.9797


Epoch 4/5: 100%|██████████| 37/37 [00:05<00:00,  6.24it/s, loss=0.0748]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.42it/s]


Epoch 4 | Training Time: 5.93 s | Train Loss: 0.0629 | Val Loss: 0.0793 | Val Accuracy: 0.9797


Epoch 5/5: 100%|██████████| 37/37 [00:05<00:00,  6.25it/s, loss=0.00538]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.53it/s]


Epoch 5 | Training Time: 5.93 s | Train Loss: 0.0560 | Val Loss: 0.0789 | Val Accuracy: 0.9730


Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.51it/s]



--- AdaLoRA_lr_0.0003 ---
Training time: 31.24 seconds
Test Loss: 0.0597 | Test Accuracy: 0.9797


0,1
epoch,▁▃▅▆█
epoch_time,▁▄▆█▇
parameter_efficiency,▁▁
test_accuracy,▁
test_loss,▁
total_parameters,▁▁
train_loss,█▄▃▁▁
trainable_parameters,▁▁
training_time,▁
val_accuracy,▁▃██▆

0,1
epoch,5
epoch_time,5.92741
model_type,AdaLoRA_lr_0.0003
parameter_efficiency,3.51
test_accuracy,0.97973
test_loss,0.05974
total_parameters,45730290
train_loss,0.05596
trainable_parameters,1606335
training_time,31.24


Unnamed: 0,learning_rate,test_accuracy,training_time
0,1e-05,0.932432,31.28
1,2e-05,0.945946,31.16
2,3e-05,0.945946,31.28
3,5e-05,0.945946,31.22
4,0.0001,0.959459,31.28
5,0.0003,0.97973,31.24


In [34]:
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model_adalora = setup_adalora_model(MODEL_NAME)

adalora_metrics = train_and_profile(model_adalora, tokenizer, "AdaLoRA")

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at seyonec/ChemBERTa-zinc-base-v1 and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



--- AdaLoRA - Trainable Parameters ---
Trainable: 1606335 | All: 45730290 | % Trained: 3.51



--- AdaLoRA ---


Epoch 1/5: 100%|██████████| 37/37 [00:05<00:00,  6.27it/s, loss=0.308]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.61it/s]


Epoch 1 | Training Time: 5.90 s | Train Loss: 0.4922 | Val Loss: 0.2395 | Val Accuracy: 0.9527


Epoch 2/5: 100%|██████████| 37/37 [00:05<00:00,  6.26it/s, loss=0.148]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 14.80it/s]


Epoch 2 | Training Time: 5.92 s | Train Loss: 0.2626 | Val Loss: 0.1835 | Val Accuracy: 0.9527


Epoch 3/5: 100%|██████████| 37/37 [00:05<00:00,  6.24it/s, loss=0.139]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.41it/s]


Epoch 3 | Training Time: 5.93 s | Train Loss: 0.2281 | Val Loss: 0.1679 | Val Accuracy: 0.9527


Epoch 4/5: 100%|██████████| 37/37 [00:05<00:00,  6.24it/s, loss=0.18]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.28it/s]


Epoch 4 | Training Time: 5.93 s | Train Loss: 0.2046 | Val Loss: 0.1585 | Val Accuracy: 0.9527


Epoch 5/5: 100%|██████████| 37/37 [00:05<00:00,  6.23it/s, loss=0.202]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.36it/s]


Epoch 5 | Training Time: 5.94 s | Train Loss: 0.1893 | Val Loss: 0.1510 | Val Accuracy: 0.9527


Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.59it/s]



--- AdaLoRA ---
Training time: 31.29 seconds
Test Loss: 0.1446 | Test Accuracy: 0.9459


0,1
epoch,▁▃▅▆█
epoch_time,▁▄▇▆█
parameter_efficiency,▁▁
test_accuracy,▁
test_loss,▁
total_parameters,▁▁
train_loss,█▃▂▁▁
trainable_parameters,▁▁
training_time,▁
val_accuracy,▁▁▁▁▁

0,1
epoch,5
epoch_time,5.94015
model_type,AdaLoRA
parameter_efficiency,3.51
test_accuracy,0.94595
test_loss,0.14459
total_parameters,45730290
train_loss,0.1893
trainable_parameters,1606335
training_time,31.29


In [35]:
def compare_models(baseline_metrics, lora_metrics, adalora_metrics):
    """
    Log a bar chart comparing training time, test accuracy, and parameter count for all models.
    """
    wandb.init(project="testing", entity="hpml-proj-deepchem", name="Model_Comparison")
    models = ["Baseline", "LoRA", "AdaLoRA"]
    test_accuracies = [baseline_metrics["test_accuracy"], lora_metrics["test_accuracy"], adalora_metrics["test_accuracy"]]
    training_times = [baseline_metrics["training_time"], lora_metrics["training_time"], adalora_metrics["training_time"]]
    param_counts = [baseline_metrics["trainable_parameters"], lora_metrics["trainable_parameters"], adalora_metrics["trainable_parameters"]]

    fig, axs = plt.subplots(1, 3, figsize=(18, 5))
    axs[0].bar(models, test_accuracies, color=["blue", "green", "red"])
    axs[0].set_title("Test Accuracy")
    axs[0].set_ylabel("Accuracy")
    axs[1].bar(models, training_times, color=["blue", "green", "red"])
    axs[1].set_title("Training Time (s)")
    axs[1].set_ylabel("Seconds")
    axs[2].bar(models, param_counts, color=["blue", "green", "red"])
    axs[2].set_title("Trainable Parameters")
    axs[2].set_ylabel("Count")
    axs[2].set_yscale("log")
    plt.tight_layout()
    wandb.log({"model_comparison": wandb.Image(fig)})
    plt.close(fig)
    wandb.finish()
