In [1]:
!!pip install deepchem transformers peft

['Collecting deepchem',
 '  Downloading deepchem-2.8.0-py3-none-any.whl.metadata (2.0 kB)',
 'Collecting rdkit (from deepchem)',
 '  Downloading rdkit-2024.9.6-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.0 kB)',
 'Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.13.0->peft)',
 '  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)',
 'Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.13.0->peft)',
 '  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)',
 'Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.13.0->peft)',
 '  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)',
 'Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.13.0->peft)',
 '  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)',
 'Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.13.0->peft)',
 '  Downl

In [192]:
import torch
import torch.nn as nn
import torch.optim as optim
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from torch.utils.data import Dataset, DataLoader, random_split
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
import os
import deepchem as dc
from peft import get_peft_model, LoraConfig, TaskType, AdaLoraConfig
import time
from sklearn.metrics import accuracy_score
from rdkit import Chem
import warnings
import logging

In [193]:
warnings.filterwarnings("ignore", category=DeprecationWarning)
logging.getLogger("deepchem").setLevel(logging.ERROR)

In [194]:
# Define paths and parameters
MODEL_NAME = "seyonec/ChemBERTa-zinc-base-v1"
MAX_LENGTH = 128
BATCH_SIZE = 32
EPOCHS = 5
LEARNING_RATE = 2e-5

# LoRA Configuration
LORA_R = 8  # Rank of LoRA
LORA_ALPHA = 16
LORA_DROPOUT = 0.1
#ADA_TOTALSTEP = 1184

In [195]:
class ClinToxDataset(Dataset):
    def __init__(self, data_path, tokenizer, split='train', max_length=128):
        self.tokenizer = tokenizer
        self.max_length = max_length
        self.split = split

        # Load ClinTox dataset from DeepChem
        tasks, datasets, transformers = dc.molnet.load_clintox()
        train_dataset, valid_dataset, test_dataset = datasets

        # Convert to SMILES and labels
        self.smiles_train, self.labels_train = self.remove_invalid_smiles(train_dataset.ids, train_dataset.y)
        self.smiles_valid, self.labels_valid = self.remove_invalid_smiles(valid_dataset.ids, valid_dataset.y)
        self.smiles_test, self.labels_test = self.remove_invalid_smiles(test_dataset.ids, test_dataset.y)

        # Set active split
        if split == 'train':
            self.smiles = self.smiles_train
            self.labels = self.labels_train
        elif split == 'valid':
            self.smiles = self.smiles_valid
            self.labels = self.labels_valid
        elif split == 'test':
            self.smiles = self.smiles_test
            self.labels = self.labels_test
        else:
            raise ValueError("Invalid split. Use 'train', 'valid', or 'test'.")

    def __len__(self):
        return len(self.smiles)

    def __getitem__(self, idx):
        smiles = self.smiles[idx]
        label = self.labels[idx]

        encoding = self.tokenizer(
            smiles,
            return_tensors="pt",
            max_length=self.max_length,
            padding="max_length",
            truncation=True
        )

        return {
            'input_ids': encoding['input_ids'].squeeze(),
            'attention_mask': encoding['attention_mask'].squeeze(),
            'labels': torch.tensor(label, dtype=torch.float)
        }

    def remove_invalid_smiles(self, smiles, labels):
        valid_indices = []
        for i, smile in enumerate(smiles):
            try:
                mol = Chem.MolFromSmiles(smile)
                if mol is not None:
                    valid_indices.append(i)
            except:
                pass

        return smiles[valid_indices], labels[valid_indices]

In [196]:
def evaluate_model(model, dataloader, device):
    model.eval()
    total_loss = 0
    all_labels = []
    all_preds = []
    criterion = nn.BCEWithLogitsLoss()

    with torch.no_grad():
        for batch in tqdm(dataloader, desc="Evaluating"):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)

            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            logits = outputs.logits.squeeze(-1)

            loss = criterion(logits, labels)
            total_loss += loss.item()

            probs = torch.sigmoid(logits)
            all_preds.extend(probs.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    # Convert to numpy arrays
    all_preds = np.array(all_preds)
    all_labels = np.array(all_labels)

    # Binary predictions for accuracy
    bin_preds = (all_preds > 0.5).astype(int)
    accuracy = accuracy_score(all_labels, bin_preds)

    avg_loss = total_loss / len(dataloader)
    return avg_loss, accuracy

In [197]:
def print_trainable_parameters(model):
    trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    all_params = sum(p.numel() for p in model.parameters())
    print(f"Trainable: {trainable_params} | All: {all_params} | % Trained: {100*trainable_params/all_params:.2f}")

In [180]:
# === Training Loop with Profiling ===
def train_and_profile(model, tokenizer, optimization_name):

    print(f"\n--- {optimization_name} - Trainable Parameters ---")
    print_trainable_parameters(model)

    # Create datasets for each split
    train_dataset = ClinToxDataset("clintox", tokenizer, split="train", max_length=MAX_LENGTH)
    val_dataset = ClinToxDataset("clintox", tokenizer, split="valid", max_length=MAX_LENGTH)
    test_dataset = ClinToxDataset("clintox", tokenizer, split="test", max_length=MAX_LENGTH)

    # Create dataloaders
    train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
    val_dataloader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)
    test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

    optimizer = optim.AdamW(model.parameters(), lr=LEARNING_RATE)
    criterion = nn.BCEWithLogitsLoss()
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    train_losses, val_losses, val_accuracies = [], [], []

    print(f"\n--- {optimization_name} ---")

    # === Start timing
    start_time = time.time()

    for epoch in range(EPOCHS):
        epoch_start_time = time.time()
        model.train()
        total_train_loss = 0
        progress_bar = tqdm(train_dataloader, desc=f"Epoch {epoch+1}/{EPOCHS}")

        for batch in progress_bar:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)

            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            logits = outputs.logits.squeeze(-1)

            loss = criterion(logits, labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            total_train_loss += loss.item()
            progress_bar.set_postfix({"loss": loss.item()})

        epoch_time = time.time() - epoch_start_time

        avg_train_loss = total_train_loss / len(train_dataloader)
        val_loss, val_acc = evaluate_model(model, val_dataloader, device)

        train_losses.append(avg_train_loss)
        val_losses.append(val_loss)
        val_accuracies.append(val_acc)

        print(f"Epoch {epoch+1} | Training Time: {epoch_time:.2f} s | Train Loss: {avg_train_loss:.4f} | Val Loss: {val_loss:.4f} | Val Accuracy: {val_acc:.4f}")

    end_time = time.time()

    test_loss, test_acc = evaluate_model(model, test_dataloader, device)
    print(f"\n--- {optimization_name} ---")
    print(f"Training time: {end_time - start_time:.2f} seconds")
    print(f"Test Loss: {test_loss:.4f} | Test Accuracy: {test_acc:.4f}")

In [181]:
def baseline_eval(model):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    test_dataset = ClinToxDataset("clintox", tokenizer, split="test", max_length=MAX_LENGTH)
    test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

    model.eval()
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for batch in tqdm(test_dataloader, desc="Evaluating"):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)

            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            logits = outputs.logits.squeeze(-1)

            probs = torch.sigmoid(logits)
            all_preds.extend(probs.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    # Convert to numpy arrays
    all_preds = np.array(all_preds)
    all_labels = np.array(all_labels)

    # Binary predictions for accuracy
    bin_preds = (all_preds > 0.5).astype(int)
    accuracy = accuracy_score(all_labels, bin_preds)

    print(f"Test Accuracy: {accuracy:.4f}")

In [182]:
baseline = AutoModelForSequenceClassification.from_pretrained(
        MODEL_NAME,
        num_labels=2,  # Binary classification for ClinTox
        return_dict=True
    )

baseline_eval(baseline)

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at seyonec/ChemBERTa-zinc-base-v1 and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 20.71it/s]

Test Accuracy: 0.1216





In [183]:
def setup_lora_model(model_name):
    """
    Set up a model with LoRA configuration
    """
    model = AutoModelForSequenceClassification.from_pretrained(
        model_name,
        num_labels=2,  # Binary classification for ClinTox
        return_dict=True
    )

    peft_config = LoraConfig(
        task_type=TaskType.SEQ_CLS,
        inference_mode=False,
        r=LORA_R,
        lora_alpha=LORA_ALPHA,
        lora_dropout=LORA_DROPOUT,
        target_modules=["query", "value"]  # Target attention modules
    )

    model = get_peft_model(model, peft_config)
    return model

In [184]:
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model_lora = setup_lora_model(MODEL_NAME)

train_and_profile(model_lora, tokenizer, "LoRA")

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at seyonec/ChemBERTa-zinc-base-v1 and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



--- LoRA - Trainable Parameters ---
Trainable: 739586 | All: 44845060 | % Trained: 1.65

--- LoRA ---


Epoch 1/5: 100%|██████████| 37/37 [00:03<00:00,  9.34it/s, loss=0.251]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 19.04it/s]


Epoch 1 | Training Time: 3.96 s | Train Loss: 0.3775 | Val Loss: 0.2009 | Val Accuracy: 0.9527


Epoch 2/5: 100%|██████████| 37/37 [00:04<00:00,  9.22it/s, loss=0.265]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 19.17it/s]


Epoch 2 | Training Time: 4.02 s | Train Loss: 0.2368 | Val Loss: 0.1685 | Val Accuracy: 0.9527


Epoch 3/5: 100%|██████████| 37/37 [00:04<00:00,  9.18it/s, loss=0.313]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 19.15it/s]


Epoch 3 | Training Time: 4.03 s | Train Loss: 0.2068 | Val Loss: 0.1540 | Val Accuracy: 0.9527


Epoch 4/5: 100%|██████████| 37/37 [00:04<00:00,  9.23it/s, loss=0.207]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 19.20it/s]


Epoch 4 | Training Time: 4.01 s | Train Loss: 0.1793 | Val Loss: 0.1384 | Val Accuracy: 0.9527


Epoch 5/5: 100%|██████████| 37/37 [00:03<00:00,  9.33it/s, loss=0.0848]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 19.48it/s]


Epoch 5 | Training Time: 3.97 s | Train Loss: 0.1516 | Val Loss: 0.1224 | Val Accuracy: 0.9595


Evaluating: 100%|██████████| 5/5 [00:00<00:00, 19.74it/s]


--- LoRA ---
Training time: 21.32 seconds
Test Loss: 0.1185 | Test Accuracy: 0.9527





In [187]:
def setup_adalora_model(model_name):
    """
    Set up a model with AdaLoRA configuration
    """
    model = AutoModelForSequenceClassification.from_pretrained(
        model_name,
        num_labels=2,  # Binary classification for ClinTox
        return_dict=True
    )

    train_dataset = ClinToxDataset("clintox", tokenizer, split="train", max_length=MAX_LENGTH)
    train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
    ADA_TOTALSTEP = EPOCHS * len(train_dataloader)

    peft_config = AdaLoraConfig(
        task_type=TaskType.SEQ_CLS,
        inference_mode=False,
        r=LORA_R,
        lora_alpha=LORA_ALPHA,
        lora_dropout=LORA_DROPOUT,
        total_step = ADA_TOTALSTEP
    )

    model = get_peft_model(model, peft_config)
    return model

In [188]:
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model_adalora = setup_adalora_model(MODEL_NAME)

train_and_profile(model_adalora, tokenizer, "AdaLoRA")

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at seyonec/ChemBERTa-zinc-base-v1 and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



--- AdaLoRA - Trainable Parameters ---
Trainable: 1606335 | All: 45730290 | % Trained: 3.51

--- AdaLoRA ---


Epoch 1/5: 100%|██████████| 37/37 [00:05<00:00,  6.34it/s, loss=0.308]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.56it/s]


Epoch 1 | Training Time: 5.84 s | Train Loss: 0.4420 | Val Loss: 0.2314 | Val Accuracy: 0.9527


Epoch 2/5: 100%|██████████| 37/37 [00:05<00:00,  6.28it/s, loss=0.243]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.18it/s]


Epoch 2 | Training Time: 5.90 s | Train Loss: 0.2545 | Val Loss: 0.1830 | Val Accuracy: 0.9527


Epoch 3/5: 100%|██████████| 37/37 [00:05<00:00,  6.25it/s, loss=0.306]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.35it/s]


Epoch 3 | Training Time: 5.93 s | Train Loss: 0.2264 | Val Loss: 0.1683 | Val Accuracy: 0.9527


Epoch 4/5: 100%|██████████| 37/37 [00:05<00:00,  6.24it/s, loss=0.278]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.42it/s]


Epoch 4 | Training Time: 5.93 s | Train Loss: 0.1992 | Val Loss: 0.1585 | Val Accuracy: 0.9527


Epoch 5/5: 100%|██████████| 37/37 [00:05<00:00,  6.25it/s, loss=0.17]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.39it/s]


Epoch 5 | Training Time: 5.92 s | Train Loss: 0.1870 | Val Loss: 0.1510 | Val Accuracy: 0.9527


Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.60it/s]


--- AdaLoRA ---
Training time: 31.18 seconds
Test Loss: 0.1396 | Test Accuracy: 0.9527





In [189]:
def setup_adalora_model(model_name, tinit=0, tfinal=0):
    """
    Set up a model with AdaLoRA configuration
    """
    model = AutoModelForSequenceClassification.from_pretrained(
        model_name,
        num_labels=2,  # Binary classification for ClinTox
        return_dict=True
    )

    train_dataset = ClinToxDataset("clintox", tokenizer, split="train", max_length=MAX_LENGTH)
    train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
    ADA_TOTALSTEP = EPOCHS * len(train_dataloader)

    peft_config = AdaLoraConfig(
        task_type=TaskType.SEQ_CLS,
        inference_mode=False,
        r=LORA_R,
        lora_alpha=LORA_ALPHA,
        lora_dropout=LORA_DROPOUT,
        total_step = ADA_TOTALSTEP,
        target_r=LORA_R//2,
        tinit=tinit,
        tfinal=tfinal
    )

    model = get_peft_model(model, peft_config)
    return model

In [190]:
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model_adalora = setup_adalora_model(MODEL_NAME)

train_and_profile(model_adalora, tokenizer, "AdaLoRA")

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at seyonec/ChemBERTa-zinc-base-v1 and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



--- AdaLoRA - Trainable Parameters ---
Trainable: 1606335 | All: 45730290 | % Trained: 3.51

--- AdaLoRA ---


Epoch 1/5: 100%|██████████| 37/37 [00:05<00:00,  6.26it/s, loss=0.16]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.35it/s]


Epoch 1 | Training Time: 5.91 s | Train Loss: 0.3989 | Val Loss: 0.2130 | Val Accuracy: 0.9527


Epoch 2/5: 100%|██████████| 37/37 [00:05<00:00,  6.25it/s, loss=0.124]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.40it/s]


Epoch 2 | Training Time: 5.93 s | Train Loss: 0.2357 | Val Loss: 0.1734 | Val Accuracy: 0.9527


Epoch 3/5: 100%|██████████| 37/37 [00:05<00:00,  6.27it/s, loss=0.143]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.45it/s]


Epoch 3 | Training Time: 5.91 s | Train Loss: 0.2073 | Val Loss: 0.1616 | Val Accuracy: 0.9527


Epoch 4/5: 100%|██████████| 37/37 [00:05<00:00,  6.27it/s, loss=0.241]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.52it/s]


Epoch 4 | Training Time: 5.90 s | Train Loss: 0.1908 | Val Loss: 0.1530 | Val Accuracy: 0.9527


Epoch 5/5: 100%|██████████| 37/37 [00:05<00:00,  6.29it/s, loss=0.0889]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.50it/s]


Epoch 5 | Training Time: 5.89 s | Train Loss: 0.1781 | Val Loss: 0.1464 | Val Accuracy: 0.9527


Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.93it/s]


--- AdaLoRA ---
Training time: 31.19 seconds
Test Loss: 0.1357 | Test Accuracy: 0.9527





In [191]:
# Hyperparameter grid
tinit_values = [0.05, 0.1, 0.2]  # 5%, 10%, 20% of steps
tfinal_values = [0.1, 0.2, 0.3]  # 10%, 20%, 30% of steps


for tinit_factor in tinit_values:
    for tfinal_factor in tfinal_values:
        print(f"\n--- Testing AdaLoRA with tinit = {tinit_factor}, tfinal = {tfinal_factor} ---")
        model = setup_adalora_model(MODEL_NAME, tinit_factor, tfinal_factor)

        # Train model with these hyperparameters
        train_and_profile(model, tokenizer, f"AdaLoRA_tinit={tinit_factor}_tfinal={tfinal_factor}")




--- Testing AdaLoRA with tinit = 0.05, tfinal = 0.1 ---


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at seyonec/ChemBERTa-zinc-base-v1 and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



--- AdaLoRA_tinit=0.05_tfinal=0.1 - Trainable Parameters ---
Trainable: 1606335 | All: 45730290 | % Trained: 3.51

--- AdaLoRA_tinit=0.05_tfinal=0.1 ---


Epoch 1/5: 100%|██████████| 37/37 [00:05<00:00,  6.33it/s, loss=0.223]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.63it/s]


Epoch 1 | Training Time: 5.85 s | Train Loss: 0.4215 | Val Loss: 0.2188 | Val Accuracy: 0.9527


Epoch 2/5: 100%|██████████| 37/37 [00:05<00:00,  6.32it/s, loss=0.219]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.69it/s]


Epoch 2 | Training Time: 5.86 s | Train Loss: 0.2474 | Val Loss: 0.1755 | Val Accuracy: 0.9527


Epoch 3/5: 100%|██████████| 37/37 [00:05<00:00,  6.30it/s, loss=0.112]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.55it/s]


Epoch 3 | Training Time: 5.87 s | Train Loss: 0.2164 | Val Loss: 0.1627 | Val Accuracy: 0.9527


Epoch 4/5: 100%|██████████| 37/37 [00:05<00:00,  6.28it/s, loss=0.152]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.67it/s]


Epoch 4 | Training Time: 5.90 s | Train Loss: 0.1917 | Val Loss: 0.1530 | Val Accuracy: 0.9527


Epoch 5/5: 100%|██████████| 37/37 [00:05<00:00,  6.27it/s, loss=0.251]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.35it/s]


Epoch 5 | Training Time: 5.90 s | Train Loss: 0.1769 | Val Loss: 0.1462 | Val Accuracy: 0.9527


Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.78it/s]



--- AdaLoRA_tinit=0.05_tfinal=0.1 ---
Training time: 31.03 seconds
Test Loss: 0.1384 | Test Accuracy: 0.9459

--- Testing AdaLoRA with tinit = 0.05, tfinal = 0.2 ---


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at seyonec/ChemBERTa-zinc-base-v1 and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



--- AdaLoRA_tinit=0.05_tfinal=0.2 - Trainable Parameters ---
Trainable: 1606335 | All: 45730290 | % Trained: 3.51

--- AdaLoRA_tinit=0.05_tfinal=0.2 ---


Epoch 1/5: 100%|██████████| 37/37 [00:05<00:00,  6.28it/s, loss=0.261]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.49it/s]


Epoch 1 | Training Time: 5.89 s | Train Loss: 0.4217 | Val Loss: 0.2150 | Val Accuracy: 0.9527


Epoch 2/5: 100%|██████████| 37/37 [00:05<00:00,  6.29it/s, loss=0.27]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.40it/s]


Epoch 2 | Training Time: 5.89 s | Train Loss: 0.2457 | Val Loss: 0.1751 | Val Accuracy: 0.9527


Epoch 3/5: 100%|██████████| 37/37 [00:05<00:00,  6.25it/s, loss=0.299]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.53it/s]


Epoch 3 | Training Time: 5.92 s | Train Loss: 0.2142 | Val Loss: 0.1626 | Val Accuracy: 0.9527


Epoch 4/5: 100%|██████████| 37/37 [00:05<00:00,  6.26it/s, loss=0.0874]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.44it/s]


Epoch 4 | Training Time: 5.92 s | Train Loss: 0.1955 | Val Loss: 0.1538 | Val Accuracy: 0.9527


Epoch 5/5: 100%|██████████| 37/37 [00:05<00:00,  6.26it/s, loss=0.176]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.43it/s]


Epoch 5 | Training Time: 5.92 s | Train Loss: 0.1792 | Val Loss: 0.1460 | Val Accuracy: 0.9527


Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.80it/s]



--- AdaLoRA_tinit=0.05_tfinal=0.2 ---
Training time: 31.19 seconds
Test Loss: 0.1378 | Test Accuracy: 0.9527

--- Testing AdaLoRA with tinit = 0.05, tfinal = 0.3 ---


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at seyonec/ChemBERTa-zinc-base-v1 and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



--- AdaLoRA_tinit=0.05_tfinal=0.3 - Trainable Parameters ---
Trainable: 1606335 | All: 45730290 | % Trained: 3.51

--- AdaLoRA_tinit=0.05_tfinal=0.3 ---


Epoch 1/5: 100%|██████████| 37/37 [00:05<00:00,  6.28it/s, loss=0.309]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.48it/s]


Epoch 1 | Training Time: 5.89 s | Train Loss: 0.4427 | Val Loss: 0.2302 | Val Accuracy: 0.9527


Epoch 2/5: 100%|██████████| 37/37 [00:05<00:00,  6.29it/s, loss=0.322]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.54it/s]


Epoch 2 | Training Time: 5.89 s | Train Loss: 0.2595 | Val Loss: 0.1815 | Val Accuracy: 0.9527


Epoch 3/5: 100%|██████████| 37/37 [00:05<00:00,  6.29it/s, loss=0.254]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.64it/s]


Epoch 3 | Training Time: 5.89 s | Train Loss: 0.2248 | Val Loss: 0.1666 | Val Accuracy: 0.9527


Epoch 4/5: 100%|██████████| 37/37 [00:05<00:00,  6.30it/s, loss=0.0723]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.54it/s]


Epoch 4 | Training Time: 5.88 s | Train Loss: 0.2026 | Val Loss: 0.1570 | Val Accuracy: 0.9527


Epoch 5/5: 100%|██████████| 37/37 [00:05<00:00,  6.30it/s, loss=0.212]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.40it/s]


Epoch 5 | Training Time: 5.88 s | Train Loss: 0.1824 | Val Loss: 0.1488 | Val Accuracy: 0.9527


Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.65it/s]



--- AdaLoRA_tinit=0.05_tfinal=0.3 ---
Training time: 31.06 seconds
Test Loss: 0.1393 | Test Accuracy: 0.9527

--- Testing AdaLoRA with tinit = 0.1, tfinal = 0.1 ---


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at seyonec/ChemBERTa-zinc-base-v1 and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



--- AdaLoRA_tinit=0.1_tfinal=0.1 - Trainable Parameters ---
Trainable: 1606335 | All: 45730290 | % Trained: 3.51

--- AdaLoRA_tinit=0.1_tfinal=0.1 ---


Epoch 1/5: 100%|██████████| 37/37 [00:05<00:00,  6.27it/s, loss=0.307]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.50it/s]


Epoch 1 | Training Time: 5.91 s | Train Loss: 0.4088 | Val Loss: 0.2181 | Val Accuracy: 0.9527


Epoch 2/5: 100%|██████████| 37/37 [00:05<00:00,  6.29it/s, loss=0.258]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.55it/s]


Epoch 2 | Training Time: 5.89 s | Train Loss: 0.2514 | Val Loss: 0.1793 | Val Accuracy: 0.9527


Epoch 3/5: 100%|██████████| 37/37 [00:05<00:00,  6.29it/s, loss=0.277]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.50it/s]


Epoch 3 | Training Time: 5.89 s | Train Loss: 0.2198 | Val Loss: 0.1652 | Val Accuracy: 0.9527


Epoch 4/5: 100%|██████████| 37/37 [00:05<00:00,  6.29it/s, loss=0.212]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.42it/s]


Epoch 4 | Training Time: 5.89 s | Train Loss: 0.1988 | Val Loss: 0.1558 | Val Accuracy: 0.9527


Epoch 5/5: 100%|██████████| 37/37 [00:05<00:00,  6.27it/s, loss=0.22]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.51it/s]


Epoch 5 | Training Time: 5.91 s | Train Loss: 0.1817 | Val Loss: 0.1481 | Val Accuracy: 0.9527


Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.77it/s]



--- AdaLoRA_tinit=0.1_tfinal=0.1 ---
Training time: 31.13 seconds
Test Loss: 0.1405 | Test Accuracy: 0.9527

--- Testing AdaLoRA with tinit = 0.1, tfinal = 0.2 ---


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at seyonec/ChemBERTa-zinc-base-v1 and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



--- AdaLoRA_tinit=0.1_tfinal=0.2 - Trainable Parameters ---
Trainable: 1606335 | All: 45730290 | % Trained: 3.51

--- AdaLoRA_tinit=0.1_tfinal=0.2 ---


Epoch 1/5: 100%|██████████| 37/37 [00:05<00:00,  6.30it/s, loss=0.281]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.43it/s]


Epoch 1 | Training Time: 5.88 s | Train Loss: 0.4353 | Val Loss: 0.2227 | Val Accuracy: 0.9527


Epoch 2/5: 100%|██████████| 37/37 [00:05<00:00,  6.27it/s, loss=0.232]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.48it/s]


Epoch 2 | Training Time: 5.91 s | Train Loss: 0.2515 | Val Loss: 0.1772 | Val Accuracy: 0.9527


Epoch 3/5: 100%|██████████| 37/37 [00:05<00:00,  6.27it/s, loss=0.165]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.36it/s]


Epoch 3 | Training Time: 5.91 s | Train Loss: 0.2172 | Val Loss: 0.1639 | Val Accuracy: 0.9527


Epoch 4/5: 100%|██████████| 37/37 [00:05<00:00,  6.25it/s, loss=0.251]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.43it/s]


Epoch 4 | Training Time: 5.92 s | Train Loss: 0.1996 | Val Loss: 0.1547 | Val Accuracy: 0.9527


Epoch 5/5: 100%|██████████| 37/37 [00:05<00:00,  6.27it/s, loss=0.09]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.44it/s]


Epoch 5 | Training Time: 5.91 s | Train Loss: 0.1828 | Val Loss: 0.1476 | Val Accuracy: 0.9527


Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.57it/s]



--- AdaLoRA_tinit=0.1_tfinal=0.2 ---
Training time: 31.18 seconds
Test Loss: 0.1398 | Test Accuracy: 0.9527

--- Testing AdaLoRA with tinit = 0.1, tfinal = 0.3 ---


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at seyonec/ChemBERTa-zinc-base-v1 and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



--- AdaLoRA_tinit=0.1_tfinal=0.3 - Trainable Parameters ---
Trainable: 1606335 | All: 45730290 | % Trained: 3.51

--- AdaLoRA_tinit=0.1_tfinal=0.3 ---


Epoch 1/5: 100%|██████████| 37/37 [00:05<00:00,  6.31it/s, loss=0.249]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.52it/s]


Epoch 1 | Training Time: 5.87 s | Train Loss: 0.4105 | Val Loss: 0.2172 | Val Accuracy: 0.9527


Epoch 2/5: 100%|██████████| 37/37 [00:05<00:00,  6.27it/s, loss=0.309]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.56it/s]


Epoch 2 | Training Time: 5.90 s | Train Loss: 0.2440 | Val Loss: 0.1751 | Val Accuracy: 0.9527


Epoch 3/5: 100%|██████████| 37/37 [00:05<00:00,  6.28it/s, loss=0.245]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.42it/s]


Epoch 3 | Training Time: 5.90 s | Train Loss: 0.2130 | Val Loss: 0.1616 | Val Accuracy: 0.9527


Epoch 4/5: 100%|██████████| 37/37 [00:05<00:00,  6.29it/s, loss=0.176]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.66it/s]


Epoch 4 | Training Time: 5.89 s | Train Loss: 0.1959 | Val Loss: 0.1525 | Val Accuracy: 0.9527


Epoch 5/5: 100%|██████████| 37/37 [00:05<00:00,  6.29it/s, loss=0.256]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.62it/s]


Epoch 5 | Training Time: 5.89 s | Train Loss: 0.1779 | Val Loss: 0.1450 | Val Accuracy: 0.9527


Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.86it/s]



--- AdaLoRA_tinit=0.1_tfinal=0.3 ---
Training time: 31.08 seconds
Test Loss: 0.1341 | Test Accuracy: 0.9527

--- Testing AdaLoRA with tinit = 0.2, tfinal = 0.1 ---


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at seyonec/ChemBERTa-zinc-base-v1 and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



--- AdaLoRA_tinit=0.2_tfinal=0.1 - Trainable Parameters ---
Trainable: 1606335 | All: 45730290 | % Trained: 3.51

--- AdaLoRA_tinit=0.2_tfinal=0.1 ---


Epoch 1/5: 100%|██████████| 37/37 [00:05<00:00,  6.30it/s, loss=0.302]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.53it/s]


Epoch 1 | Training Time: 5.88 s | Train Loss: 0.4053 | Val Loss: 0.2100 | Val Accuracy: 0.9527


Epoch 2/5: 100%|██████████| 37/37 [00:05<00:00,  6.28it/s, loss=0.191]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.46it/s]


Epoch 2 | Training Time: 5.89 s | Train Loss: 0.2415 | Val Loss: 0.1757 | Val Accuracy: 0.9527


Epoch 3/5: 100%|██████████| 37/37 [00:05<00:00,  6.29it/s, loss=0.131]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.58it/s]


Epoch 3 | Training Time: 5.88 s | Train Loss: 0.2126 | Val Loss: 0.1632 | Val Accuracy: 0.9527


Epoch 4/5: 100%|██████████| 37/37 [00:05<00:00,  6.26it/s, loss=0.167]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.54it/s]


Epoch 4 | Training Time: 5.91 s | Train Loss: 0.1915 | Val Loss: 0.1542 | Val Accuracy: 0.9527


Epoch 5/5: 100%|██████████| 37/37 [00:05<00:00,  6.27it/s, loss=0.225]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.47it/s]


Epoch 5 | Training Time: 5.91 s | Train Loss: 0.1749 | Val Loss: 0.1474 | Val Accuracy: 0.9527


Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.71it/s]



--- AdaLoRA_tinit=0.2_tfinal=0.1 ---
Training time: 31.12 seconds
Test Loss: 0.1349 | Test Accuracy: 0.9459

--- Testing AdaLoRA with tinit = 0.2, tfinal = 0.2 ---


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at seyonec/ChemBERTa-zinc-base-v1 and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



--- AdaLoRA_tinit=0.2_tfinal=0.2 - Trainable Parameters ---
Trainable: 1606335 | All: 45730290 | % Trained: 3.51

--- AdaLoRA_tinit=0.2_tfinal=0.2 ---


Epoch 1/5: 100%|██████████| 37/37 [00:05<00:00,  6.29it/s, loss=0.446]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.41it/s]


Epoch 1 | Training Time: 5.88 s | Train Loss: 0.3920 | Val Loss: 0.2111 | Val Accuracy: 0.9527


Epoch 2/5: 100%|██████████| 37/37 [00:05<00:00,  6.28it/s, loss=0.16]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.56it/s]


Epoch 2 | Training Time: 5.90 s | Train Loss: 0.2443 | Val Loss: 0.1756 | Val Accuracy: 0.9527


Epoch 3/5: 100%|██████████| 37/37 [00:05<00:00,  6.28it/s, loss=0.138]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.47it/s]


Epoch 3 | Training Time: 5.90 s | Train Loss: 0.2151 | Val Loss: 0.1639 | Val Accuracy: 0.9527


Epoch 4/5: 100%|██████████| 37/37 [00:05<00:00,  6.28it/s, loss=0.101]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.45it/s]


Epoch 4 | Training Time: 5.90 s | Train Loss: 0.1960 | Val Loss: 0.1544 | Val Accuracy: 0.9527


Epoch 5/5: 100%|██████████| 37/37 [00:05<00:00,  6.29it/s, loss=0.106]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.59it/s]


Epoch 5 | Training Time: 5.89 s | Train Loss: 0.1814 | Val Loss: 0.1470 | Val Accuracy: 0.9527


Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.78it/s]



--- AdaLoRA_tinit=0.2_tfinal=0.2 ---
Training time: 31.11 seconds
Test Loss: 0.1357 | Test Accuracy: 0.9459

--- Testing AdaLoRA with tinit = 0.2, tfinal = 0.3 ---


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at seyonec/ChemBERTa-zinc-base-v1 and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



--- AdaLoRA_tinit=0.2_tfinal=0.3 - Trainable Parameters ---
Trainable: 1606335 | All: 45730290 | % Trained: 3.51

--- AdaLoRA_tinit=0.2_tfinal=0.3 ---


Epoch 1/5: 100%|██████████| 37/37 [00:05<00:00,  6.30it/s, loss=0.353]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.47it/s]


Epoch 1 | Training Time: 5.88 s | Train Loss: 0.3759 | Val Loss: 0.2058 | Val Accuracy: 0.9527


Epoch 2/5: 100%|██████████| 37/37 [00:05<00:00,  6.27it/s, loss=0.187]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.53it/s]


Epoch 2 | Training Time: 5.90 s | Train Loss: 0.2390 | Val Loss: 0.1748 | Val Accuracy: 0.9527


Epoch 3/5: 100%|██████████| 37/37 [00:05<00:00,  6.26it/s, loss=0.229]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.44it/s]


Epoch 3 | Training Time: 5.91 s | Train Loss: 0.2119 | Val Loss: 0.1629 | Val Accuracy: 0.9527


Epoch 4/5: 100%|██████████| 37/37 [00:05<00:00,  6.28it/s, loss=0.15]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.54it/s]


Epoch 4 | Training Time: 5.90 s | Train Loss: 0.1899 | Val Loss: 0.1537 | Val Accuracy: 0.9527


Epoch 5/5: 100%|██████████| 37/37 [00:05<00:00,  6.25it/s, loss=0.301]
Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.53it/s]


Epoch 5 | Training Time: 5.92 s | Train Loss: 0.1785 | Val Loss: 0.1464 | Val Accuracy: 0.9527


Evaluating: 100%|██████████| 5/5 [00:00<00:00, 15.85it/s]


--- AdaLoRA_tinit=0.2_tfinal=0.3 ---
Training time: 31.16 seconds
Test Loss: 0.1351 | Test Accuracy: 0.9527



