In [None]:
#======GPU assign=====#

import os
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = '0'
os.environ['CUBLAS_WORKSPACE_CONFIG'] = ":4096:8"


import torch
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Device:', device)
print('Current cuda device: ', torch.cuda.current_device())
print('Count of using GPUs:', torch.cuda.device_count())

In [None]:
#====Packages====#

from datasets import load_dataset
from transformers import (
    AutoModel,
    AutoModelForCausalLM, #(Automatically loads a model for causal language modeling)
    AutoModelForSequenceClassification,
    AutoModelForTokenClassification,
    AutoTokenizer,
    HfArgumentParser,
    TrainingArguments,
    Trainer,
    pipeline,
    logging,
    TrainerCallback
    
)
from peft import LoraConfig, PeftModel, get_peft_model, PeftConfig
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
import os
import csv
from torch.utils.tensorboard import SummaryWriter
import numpy as np
import torch
import torch.nn.functional as F
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import random

In [None]:
def seed_everything(seed=42):
    random.seed(seed)
    #np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.benchmark = False
    #torch.use_deterministic_algorithms(True)
    #os.environ["PYTHONHASHSEED"] = str(seed)
    #os.environ["TF_ENABLE_ONEDNN_OPTS"] = "0"

seed_everything()

In [None]:
#======Check compatibility for precision training=======#

# gpu_name = torch.cuda.get_device_name(0)
# compute_capability = torch.cuda.get_device_capability(0)
# print(f"GPU Name: {gpu_name}")
# print(f"Compute Capability: {compute_capability}")

"""
If greater than 7 then we can set fp16 to True
"""

In [None]:
#======Model info======#

model_name = "roberta-base" 
task_name = "SST"
# output_dir = "......."


In [None]:
#=======Load dataset=======#

dataset = load_dataset("glue", "sst2", trust_remote_code=True)  
tokenizer = AutoTokenizer.from_pretrained("roberta-base", add_prefix_space=True)


# Limit training dataset if your gpu doesn't allow
train_dataset = dataset["train"].shuffle(seed=42).select(range(50000)) 
validation_dataset = dataset["validation"]

def preprocess_function(examples):
    tokenized_inputs = tokenizer(
        examples["sentence"],  # sst uses "sentence" as input
        truncation=True,
        padding="max_length",
        max_length=128,
    )
    tokenized_inputs["labels"] = examples["label"]  
    return tokenized_inputs

train_dataset = train_dataset.map(preprocess_function, batched=True)
validation_dataset = validation_dataset.map(preprocess_function, batched=True)

train_dataset = train_dataset.remove_columns(["idx", "sentence"])  
train_dataset.set_format("torch")  

validation_dataset = validation_dataset.remove_columns(["idx", "sentence"])
validation_dataset.set_format("torch")

train_dataset.set_format("torch", device="cuda:0")
validation_dataset.set_format("torch", device="cuda:0")


In [None]:
#====== Load LLM for SST-2 ======#

model = AutoModelForSequenceClassification.from_pretrained(
    "roberta-base", 
    num_labels=2,  
)

tokenizer = AutoTokenizer.from_pretrained("roberta-base", add_prefix_space=True)  
tokenizer.pad_token = tokenizer.eos_token  
tokenizer.padding_side = "right"


peft_config = LoraConfig(
    r=128, 
    lora_alpha=64,
    lora_dropout=0.1,
    bias="none",  
    task_type="SEQ_CLS", 
)

model = get_peft_model(model, peft_config)
model = model.to("cuda:0")


print(f"Trainable parameters: {sum(p.numel() for p in model.parameters() if p.requires_grad)}")

# Check if LoRA layers are correctly loaded
# print("LoRA layers in the model:")
# for name, param in model.named_parameters():
#     if "lora" in name.lower():
#         print(f"{name}: Trainable = {param.requires_grad}")


In [None]:
#=======Set training parameters=======#

training_arguments = TrainingArguments(
    output_dir=output_dir,
    num_train_epochs=4,
    per_device_train_batch_size=8,
    gradient_accumulation_steps=1,
    optim="adamw_hf",
    save_strategy="epoch",  
    evaluation_strategy="epoch", 
    learning_rate=3e-4,
    weight_decay=0.01,
    fp16=False,
    bf16=False,
    max_grad_norm=1.0,  
    max_steps=-1,
    warmup_ratio=0.06,
    lr_scheduler_type="cosine",
    report_to="tensorboard",
    logging_dir="------",
    load_best_model_at_end=True,
    save_total_limit=2
)


class CustomTrainer(Trainer):
    def training_step(self, model, inputs):
        # Move all inputs to cuda:0
        inputs = {k: v.to("cuda:0")  for k, v in inputs.items()}
        return super().training_step(model, inputs)


In [None]:
# ======Start Training======#

trainer = CustomTrainer(
    model=model,  
    args=training_arguments,
    train_dataset=train_dataset,
    eval_dataset=validation_dataset,
    tokenizer=tokenizer,
)

trainer.train()

In [None]:
#=========In/OOD benchmark eval=========#

benchmarks = {
    "sst2_test": load_dataset("glue", "sst2", split="validation"),
    "imdb_test": load_dataset("imdb", split="test")
}

def preprocess_sst2(examples, tokenizer):
    tokenized_inputs = tokenizer(
        examples["sentence"], truncation=True, padding="max_length",
        max_length=128,
    )
    tokenized_inputs["labels"] = examples["label"]
    return tokenized_inputs

def preprocess_imdb(examples, tokenizer):
    tokenized_inputs = tokenizer(
        examples["text"], truncation=True, padding="max_length",
        max_length=128,
    )
    tokenized_inputs["labels"] = examples["label"]
    return tokenized_inputs

def evaluate_model_from_path(model_path, benchmarks, batch_size=32):
    adapter_config = PeftConfig.from_pretrained(model_path)
    base_model = AutoModelForSequenceClassification.from_pretrained(
        adapter_config.base_model_name_or_path,
        num_labels=2
    ).to("cuda:0")
    model = PeftModel.from_pretrained(base_model, model_path).eval()
    tokenizer = AutoTokenizer.from_pretrained(adapter_config.base_model_name_or_path, add_prefix_space=True)

    in_distribution = ["sst2_test"]
    out_of_distribution = ["imdb_test"]

    in_acc_total, out_acc_total = 0.0, 0.0
    in_count, out_count = 0, 0

    for dataset_name, dataset in benchmarks.items():
        print(f"Evaluating on {dataset_name}...")

        if dataset_name == "sst2_test":
            tokenized_dataset = dataset.map(
                lambda x: preprocess_sst2(x, tokenizer),
                batched=True,
                remove_columns=[col for col in dataset.column_names if col not in ["label"]]
            )
        else:
            tokenized_dataset = dataset.map(
                lambda x: preprocess_imdb(x, tokenizer),
                batched=True,
                remove_columns=[col for col in dataset.column_names if col not in ["label"]]
            )

        tokenized_dataset.set_format("torch")

        data_loader = torch.utils.data.DataLoader(
            tokenized_dataset,
            batch_size=batch_size,
            collate_fn=lambda x: {
                key: torch.stack([example[key] for example in x]).to("cuda:0")
                for key in ["input_ids", "attention_mask", "labels"]
            },
        )

        all_predictions, all_labels = [], []

        for batch in data_loader:
            inputs = {k: batch[k] for k in ["input_ids", "attention_mask"]}
            labels = batch["labels"]
            with torch.no_grad():
                outputs = model(**inputs)
                predictions = outputs.logits.argmax(dim=-1).cpu().numpy()

            all_predictions.extend(predictions)
            all_labels.extend(labels.cpu().numpy())

        acc = accuracy_score(all_labels, all_predictions)
        print(f"{dataset_name} Accuracy: {acc:.4f}")

        report = classification_report(all_labels, all_predictions, zero_division=0)
        print(f"Classification Report:\n{report}")

        if dataset_name in in_distribution:
            in_acc_total += acc
            in_count += 1
        elif dataset_name in out_of_distribution:
            out_acc_total += acc
            out_count += 1

    avg_in_acc = in_acc_total / in_count if in_count > 0 else 0.0
    avg_out_acc = out_acc_total / out_count if out_count > 0 else 0.0

    print(f"Average In-Distribution Accuracy: {avg_in_acc:.4f}")
    print(f"Average Out-of-Distribution Accuracy: {avg_out_acc:.4f}")

    return {
        "avg_in_distribution_accuracy": avg_in_acc,
        "avg_out_of_distribution_accuracy": avg_out_acc
    }


In [None]:

fine_tuned_model_path = "---------------"
evaluate_model_from_path(fine_tuned_model_path, benchmarks, batch_size=32)