Part 1 Data Preprocessing

In [None]:
from datasets import load_dataset
from transformers import RobertaTokenizer
from datasets import DatasetDict

# Step 1: Load IMDb dataset (subset with 5000 samples)
dataset = load_dataset("imdb")

# Limit the data to 3000 train and 2000 test samples
small_dataset = DatasetDict({
    'train': dataset['train'].shuffle(seed=42).select(range(3000)),
    'test': dataset['test'].shuffle(seed=42).select(range(2000))
})

# Step 2: Load Roberta tokenizer
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')

# Step 3: Tokenization function
def tokenize_function(examples):
    return tokenizer(
        examples["text"],
        padding="max_length",      # Pad to max_length
        truncation=True,           # Truncate if too long
        max_length=512             # Optional: Set max length to 512 (Roberta limit)
    )

# Apply the tokenizer to the dataset
tokenized_datasets = small_dataset.map(tokenize_function, batched=True)

# For training purposes, remove columns not used by the model
tokenized_datasets = tokenized_datasets.remove_columns(["text"])
tokenized_datasets.set_format("torch")  # For PyTorch

# Quick check
print(tokenized_datasets)


Part 2 Model Implementation

Part 3 Evaluation Metrics

Method 1: Full Fine-Tuning

In [None]:
import os
import torch
import numpy as np
import time
from transformers import (
    AutoModelForSequenceClassification,
    AutoTokenizer,
    TrainingArguments,
    Trainer,
    DataCollatorWithPadding
)
from datasets import load_dataset
import evaluate
from pynvml import nvmlInit, nvmlDeviceGetHandleByIndex, nvmlDeviceGetMemoryInfo

# Function to measure GPU memory usage
def get_gpu_memory_usage():
    nvmlInit()
    handle = nvmlDeviceGetHandleByIndex(0)  # Assuming we're using GPU 0
    info = nvmlDeviceGetMemoryInfo(handle)
    return info.used / 1024**2  # Return in MB

# Load dataset
def load_imdb_dataset():
    dataset = load_dataset("imdb")
    # Ensure we only use 3000 samples for training and 2000 for testing
    train_dataset = dataset["train"].shuffle(seed=42).select(range(3000))
    test_dataset = dataset["test"].shuffle(seed=42).select(range(2000))
    return train_dataset, test_dataset

# Tokenize function
def tokenize_function(examples, tokenizer):
    return tokenizer(examples["text"], truncation=True, padding=True)

# Compute metrics function for evaluation
def compute_metrics(eval_pred):
    metric = evaluate.load("accuracy")
    predictions = np.argmax(eval_pred.predictions, axis=1)
    return metric.compute(predictions=predictions, references=eval_pred.label_ids)

def run_full_finetuning():
    method_name = "full_finetuning"
    print(f"\n{'=' * 50}")
    print(f"Running Method 1: Full Fine-Tuning")
    print(f"{'=' * 50}")
    
    # Load tokenizer
    tokenizer = AutoTokenizer.from_pretrained("roberta-base")
    
    # Load and prepare datasets
    train_dataset, test_dataset = load_imdb_dataset()
    
    # Tokenize datasets
    tokenized_train = train_dataset.map(
        lambda examples: tokenize_function(examples, tokenizer),
        batched=True
    )
    tokenized_test = test_dataset.map(
        lambda examples: tokenize_function(examples, tokenizer),
        batched=True
    )
    
    data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
    
    # Load model
    model = AutoModelForSequenceClassification.from_pretrained(
        "roberta-base",
        num_labels=2  # Binary classification: positive or negative
    )
    
    # Setup trainer
    training_args = TrainingArguments(
        output_dir=f"output/{method_name}",
        eval_strategy="epoch",
        save_strategy="epoch",
        learning_rate=2e-5,
        per_device_train_batch_size=16,
        per_device_eval_batch_size=16,
        num_train_epochs=3,
        weight_decay=0.01,
        load_best_model_at_end=True,
        metric_for_best_model="accuracy",
        push_to_hub=False,
        report_to="none",  # Disable wandb, tensorboard etc.
    )
    
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=tokenized_train,
        eval_dataset=tokenized_test,
        tokenizer=tokenizer,
        data_collator=data_collator,
        compute_metrics=compute_metrics,
    )
    
    # Train the model
    max_gpu_memory = 0
    start_time = time.time()
    
    try:
        for epoch in range(int(training_args.num_train_epochs)):
            trainer.train()
            
            # Record GPU memory usage
            if torch.cuda.is_available():
                current_memory = get_gpu_memory_usage()
                max_gpu_memory = max(max_gpu_memory, current_memory)
                print(f"GPU Memory after epoch {epoch+1}: {current_memory:.2f} MB")
    
    except Exception as e:
        print(f"Training error: {e}")
    
    end_time = time.time()
    
    # Evaluate the model
    eval_results = trainer.evaluate()
    print(f"Evaluation Results: {eval_results}")
    
    # Record and save statistics
    training_time = end_time - start_time
    
    # Count trainable parameters
    trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    total_params = sum(p.numel() for p in model.parameters())
    
    stats = {
        "method": method_name,
        "training_time": training_time,
        "trainable_parameters": trainable_params,
        "total_parameters": total_params,
        "trainable_percentage": (trainable_params / total_params) * 100,
        "accuracy": eval_results["eval_accuracy"]
    }
    
    if max_gpu_memory > 0:
        stats["max_gpu_memory_mb"] = max_gpu_memory
    
    print(f"\n{method_name} Statistics:")
    print(f"  Training Time: {training_time:.2f} seconds")
    print(f"  Trainable Parameters: {trainable_params:,}")
    print(f"  Total Parameters: {total_params:,}")
    print(f"  Trainable Parameters %: {stats['trainable_percentage']:.2f}%")
    print(f"  Accuracy: {stats['accuracy']:.4f}")
    if max_gpu_memory > 0:
        print(f"  Max GPU Memory Usage: {max_gpu_memory:.2f} MB")
    
    # Save statistics and results
    os.makedirs("stats", exist_ok=True)
    os.makedirs("results", exist_ok=True)
    
    # Save statistics
    with open(f"stats/{method_name}_stats.txt", "w") as f:
        for key, value in stats.items():
            f.write(f"{key}: {value}\n")
    
    # Save evaluation results
    with open(f"results/{method_name}_eval.txt", "w") as f:
        for key, value in eval_results.items():
            f.write(f"{key}: {value}\n")
    
    return model, eval_results, stats

if __name__ == "__main__":
    run_full_finetuning()

Method 2: LoRA Fine-Tuning using PEFT

In [None]:
import os
import torch
import numpy as np
import time
from transformers import (
    AutoModelForSequenceClassification,
    AutoTokenizer,
    TrainingArguments,
    Trainer,
    DataCollatorWithPadding
)
from datasets import load_dataset
import evaluate
from peft import get_peft_model, LoraConfig, TaskType
from pynvml import nvmlInit, nvmlDeviceGetHandleByIndex, nvmlDeviceGetMemoryInfo

# Function to measure GPU memory usage
def get_gpu_memory_usage():
    nvmlInit()
    handle = nvmlDeviceGetHandleByIndex(0)  # Assuming we're using GPU 0
    info = nvmlDeviceGetMemoryInfo(handle)
    return info.used / 1024**2  # Return in MB

# Load dataset
def load_imdb_dataset():
    dataset = load_dataset("imdb")
    # Ensure we only use 3000 samples for training and 2000 for testing
    train_dataset = dataset["train"].shuffle(seed=42).select(range(3000))
    test_dataset = dataset["test"].shuffle(seed=42).select(range(2000))
    return train_dataset, test_dataset

# Tokenize function
def tokenize_function(examples, tokenizer):
    return tokenizer(examples["text"], truncation=True, padding=True)

# Compute metrics function for evaluation
def compute_metrics(eval_pred):
    metric = evaluate.load("accuracy")
    predictions = np.argmax(eval_pred.predictions, axis=1)
    return metric.compute(predictions=predictions, references=eval_pred.label_ids)

def run_lora_finetuning():
    method_name = "lora_finetuning"
    print(f"\n{'=' * 50}")
    print(f"Running Method 2: LoRA Fine-Tuning using PEFT")
    print(f"{'=' * 50}")
    
    # Load tokenizer
    tokenizer = AutoTokenizer.from_pretrained("roberta-base")
    
    # Load and prepare datasets
    train_dataset, test_dataset = load_imdb_dataset()
    
    # Tokenize datasets
    tokenized_train = train_dataset.map(
        lambda examples: tokenize_function(examples, tokenizer),
        batched=True
    )
    tokenized_test = test_dataset.map(
        lambda examples: tokenize_function(examples, tokenizer),
        batched=True
    )
    
    data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
    
    # Load the base model
    base_model = AutoModelForSequenceClassification.from_pretrained(
        "roberta-base",
        num_labels=2  # Binary classification: positive or negative
    )
    
    # Define LoRA configuration
    lora_config = LoraConfig(
        task_type=TaskType.SEQ_CLS,
        r=8,  # Rank
        lora_alpha=16,  # Alpha scaling
        lora_dropout=0.1,
        target_modules=["query", "key", "value"],  # Target attention modules
    )
    
    # Apply LoRA adapters to the model
    model = get_peft_model(base_model, lora_config)
    print("Model with LoRA adapters:")
    model.print_trainable_parameters()  # Print % of trainable parameters
    
    # Setup trainer
    training_args = TrainingArguments(
        output_dir=f"output/{method_name}",
        eval_strategy="epoch",
        save_strategy="epoch",
        learning_rate=2e-5,
        per_device_train_batch_size=16,
        per_device_eval_batch_size=16,
        num_train_epochs=3,
        weight_decay=0.01,
        load_best_model_at_end=True,
        metric_for_best_model="accuracy",
        push_to_hub=False,
        report_to="none",  # Disable wandb, tensorboard etc.
    )
    
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=tokenized_train,
        eval_dataset=tokenized_test,
        tokenizer=tokenizer,
        data_collator=data_collator,
        compute_metrics=compute_metrics,
    )
    
    # Train the model
    max_gpu_memory = 0
    start_time = time.time()
    
    try:
        for epoch in range(int(training_args.num_train_epochs)):
            trainer.train()
            
            # Record GPU memory usage
            if torch.cuda.is_available():
                current_memory = get_gpu_memory_usage()
                max_gpu_memory = max(max_gpu_memory, current_memory)
                print(f"GPU Memory after epoch {epoch+1}: {current_memory:.2f} MB")
    
    except Exception as e:
        print(f"Training error: {e}")
    
    end_time = time.time()
    
    # Evaluate the model
    eval_results = trainer.evaluate()
    print(f"Evaluation Results: {eval_results}")
    
    # Record and save statistics
    training_time = end_time - start_time
    
    # Count trainable parameters
    trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    total_params = sum(p.numel() for p in model.parameters())
    
    stats = {
        "method": method_name,
        "training_time": training_time,
        "trainable_parameters": trainable_params,
        "total_parameters": total_params,
        "trainable_percentage": (trainable_params / total_params) * 100,
        "accuracy": eval_results["eval_accuracy"]
    }
    
    if max_gpu_memory > 0:
        stats["max_gpu_memory_mb"] = max_gpu_memory
    
    print(f"\n{method_name} Statistics:")
    print(f"  Training Time: {training_time:.2f} seconds")
    print(f"  Trainable Parameters: {trainable_params:,}")
    print(f"  Total Parameters: {total_params:,}")
    print(f"  Trainable Parameters %: {stats['trainable_percentage']:.2f}%")
    print(f"  Accuracy: {stats['accuracy']:.4f}")
    if max_gpu_memory > 0:
        print(f"  Max GPU Memory Usage: {max_gpu_memory:.2f} MB")
    
    # Save statistics and results
    os.makedirs("stats", exist_ok=True)
    os.makedirs("results", exist_ok=True)
    
    # Save statistics
    with open(f"stats/{method_name}_stats.txt", "w") as f:
        for key, value in stats.items():
            f.write(f"{key}: {value}\n")
    
    # Save evaluation results
    with open(f"results/{method_name}_eval.txt", "w") as f:
        for key, value in eval_results.items():
            f.write(f"{key}: {value}\n")
    
    return model, eval_results, stats

if __name__ == "__main__":
    run_lora_finetuning()

Method 3: QLoRA Fine-Tuning

In [None]:
import os
import torch
import numpy as np
import time
from transformers import (
    AutoModelForSequenceClassification,
    AutoTokenizer,
    TrainingArguments,
    Trainer,
    DataCollatorWithPadding
)
from datasets import load_dataset
import evaluate
from peft import get_peft_model, LoraConfig, TaskType

# Function to measure GPU memory usage
def get_gpu_memory_usage():
    try:
        from pynvml import nvmlInit, nvmlDeviceGetHandleByIndex, nvmlDeviceGetMemoryInfo
        nvmlInit()
        handle = nvmlDeviceGetHandleByIndex(0)  # Assuming we're using GPU 0
        info = nvmlDeviceGetMemoryInfo(handle)
        return info.used / 1024**2  # Return in MB
    except:
        return 0  # Return 0 if pynvml is not available

# Load dataset
def load_imdb_dataset():
    dataset = load_dataset("imdb")
    # Ensure we only use 3000 samples for training and 2000 for testing
    train_dataset = dataset["train"].shuffle(seed=42).select(range(3000))
    test_dataset = dataset["test"].shuffle(seed=42).select(range(2000))
    return train_dataset, test_dataset

# Tokenize function
def tokenize_function(examples, tokenizer):
    return tokenizer(examples["text"], truncation=True, padding="max_length", max_length=512)

# Compute metrics function for evaluation
def compute_metrics(eval_pred):
    metric = evaluate.load("accuracy")
    predictions = np.argmax(eval_pred.predictions, axis=1)
    return metric.compute(predictions=predictions, references=eval_pred.label_ids)

def run_lora_finetuning():
    """
    Run LoRA fine-tuning without quantization as a fallback
    when bitsandbytes is not available
    """
    method_name = "lora_finetuning"
    print(f"\n{'=' * 50}")
    print(f"Running Method: LoRA Fine-Tuning (Standard)")
    print(f"{'=' * 50}")
    
    # Load tokenizer
    tokenizer = AutoTokenizer.from_pretrained("roberta-base")
    
    # Load and prepare datasets
    train_dataset, test_dataset = load_imdb_dataset()
    
    # Tokenize datasets
    tokenized_train = train_dataset.map(
        lambda examples: tokenize_function(examples, tokenizer),
        batched=True
    )
    tokenized_test = test_dataset.map(
        lambda examples: tokenize_function(examples, tokenizer),
        batched=True
    )
    
    data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
    
    # Load the base model
    base_model = AutoModelForSequenceClassification.from_pretrained(
        "roberta-base",
        num_labels=2  # Binary classification
    )
    
    # Define LoRA configuration
    lora_config = LoraConfig(
        task_type=TaskType.SEQ_CLS,
        r=8,  # Rank
        lora_alpha=16,  # Alpha scaling
        lora_dropout=0.1,
        target_modules=["query", "key", "value"],  # Target attention modules
    )
    
    # Apply LoRA adapters to the model
    model = get_peft_model(base_model, lora_config)
    print("Model with LoRA adapters:")
    model.print_trainable_parameters()  # Print % of trainable parameters
    
    # Setup trainer
    training_args = TrainingArguments(
        output_dir=f"output/{method_name}",
        eval_strategy="epoch",  # Fixed from eval_strategy
        save_strategy="epoch",
        learning_rate=2e-5,
        per_device_train_batch_size=16,
        per_device_eval_batch_size=16,
        num_train_epochs=3,
        weight_decay=0.01,
        load_best_model_at_end=True,
        metric_for_best_model="accuracy",
        push_to_hub=False,
        report_to="none",  # Disable wandb, tensorboard etc.
    )
    
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=tokenized_train,
        eval_dataset=tokenized_test,
        tokenizer=tokenizer,
        data_collator=data_collator,
        compute_metrics=compute_metrics,
    )
    
    # Train the model
    max_gpu_memory = 0
    start_time = time.time()
    
    try:
        # Use built-in training loop
        trainer.train()
            
        # Record GPU memory usage
        if torch.cuda.is_available():
            current_memory = get_gpu_memory_usage()
            max_gpu_memory = max(max_gpu_memory, current_memory)
            print(f"GPU Memory after training: {current_memory:.2f} MB")
    
    except Exception as e:
        print(f"Training error: {e}")
    
    end_time = time.time()
    
    # Evaluate the model
    eval_results = trainer.evaluate()
    print(f"Evaluation Results: {eval_results}")
    
    # Record and save statistics
    training_time = end_time - start_time
    
    # Count trainable parameters
    trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    total_params = sum(p.numel() for p in model.parameters())
    
    stats = {
        "method": method_name,
        "training_time": training_time,
        "trainable_parameters": trainable_params,
        "total_parameters": total_params,
        "trainable_percentage": (trainable_params / total_params) * 100,
        "accuracy": eval_results["eval_accuracy"]
    }
    
    if max_gpu_memory > 0:
        stats["max_gpu_memory_mb"] = max_gpu_memory
    
    print(f"\n{method_name} Statistics:")
    print(f"  Training Time: {training_time:.2f} seconds")
    print(f"  Trainable Parameters: {trainable_params:,}")
    print(f"  Total Parameters: {total_params:,}")
    print(f"  Trainable Parameters %: {stats['trainable_percentage']:.2f}%")
    print(f"  Accuracy: {stats['accuracy']:.4f}")
    if max_gpu_memory > 0:
        print(f"  Max GPU Memory Usage: {max_gpu_memory:.2f} MB")
    
    # Save statistics and results
    os.makedirs("stats", exist_ok=True)
    os.makedirs("results", exist_ok=True)
    
    # Save statistics
    with open(f"stats/{method_name}_stats.txt", "w") as f:
        for key, value in stats.items():
            f.write(f"{key}: {value}\n")
    
    # Save evaluation results
    with open(f"results/{method_name}_eval.txt", "w") as f:
        for key, value in eval_results.items():
            f.write(f"{key}: {value}\n")
    
    return model, eval_results, stats

def run_qlora_finetuning():
    """
    Try to run QLoRA with quantization, falling back to standard LoRA
    if bitsandbytes is not available
    """
    try:
        # Check if bitsandbytes is properly installed
        import bitsandbytes
        from transformers import BitsAndBytesConfig
        print(f"bitsandbytes version: {bitsandbytes.__version__}")
        
        method_name = "qlora_finetuning"
        print(f"\n{'=' * 50}")
        print(f"Running Method: QLoRA Fine-Tuning")
        print(f"{'=' * 50}")
        
        # Load tokenizer
        tokenizer = AutoTokenizer.from_pretrained("roberta-base")
        
        # Load and prepare datasets
        train_dataset, test_dataset = load_imdb_dataset()
        
        # Tokenize datasets
        tokenized_train = train_dataset.map(
            lambda examples: tokenize_function(examples, tokenizer),
            batched=True
        )
        tokenized_test = test_dataset.map(
            lambda examples: tokenize_function(examples, tokenizer),
            batched=True
        )
        
        data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
        
        # Configure quantization
        quantization_config = BitsAndBytesConfig(
            load_in_4bit=True,  # Quantize to 4-bit precision
            bnb_4bit_compute_dtype=torch.float16,  # Use float16 for computation
            bnb_4bit_quant_type="nf4",  # Normal Float 4
            bnb_4bit_use_double_quant=True,  # Use double quantization for more efficiency
        )
        
        # Load the quantized base model
        base_model = AutoModelForSequenceClassification.from_pretrained(
            "roberta-base",
            num_labels=2,  # Binary classification
            quantization_config=quantization_config,
            device_map="auto"  # Automatically decide device mapping
        )
        
        # Define LoRA configuration for the quantized model
        lora_config = LoraConfig(
            task_type=TaskType.SEQ_CLS,
            r=8,  # Rank
            lora_alpha=16,  # Alpha scaling
            lora_dropout=0.1,
            target_modules=["query", "key", "value"],  # Target attention modules
        )
        
        # Apply LoRA adapters to the quantized model
        model = get_peft_model(base_model, lora_config)
        print("Model with QLoRA adapters:")
        model.print_trainable_parameters()  # Print % of trainable parameters
        
        # Setup trainer
        training_args = TrainingArguments(
            output_dir=f"output/{method_name}",
            eval_strategy="epoch",  # Fixed from eval_strategy
            save_strategy="epoch",
            learning_rate=2e-5,
            per_device_train_batch_size=16,
            per_device_eval_batch_size=16,
            num_train_epochs=3,
            weight_decay=0.01,
            load_best_model_at_end=True,
            metric_for_best_model="accuracy",
            push_to_hub=False,
            report_to="none",  # Disable wandb, tensorboard etc.
            # Important for 4-bit quantization compatibility
            fp16=True,  # Use mixed precision
            bf16=False,  # Don't use bfloat16
        )
        
        trainer = Trainer(
            model=model,
            args=training_args,
            train_dataset=tokenized_train,
            eval_dataset=tokenized_test,
            tokenizer=tokenizer,
            data_collator=data_collator,
            compute_metrics=compute_metrics,
        )
        
        # Train the model
        max_gpu_memory = 0
        start_time = time.time()
        
        try:
            # Use built-in training loop
            trainer.train()
                
            # Record GPU memory usage
            if torch.cuda.is_available():
                current_memory = get_gpu_memory_usage()
                max_gpu_memory = max(max_gpu_memory, current_memory)
                print(f"GPU Memory after training: {current_memory:.2f} MB")
        
        except Exception as e:
            print(f"Training error: {e}")
        
        end_time = time.time()
        
        # Evaluate the model
        eval_results = trainer.evaluate()
        print(f"Evaluation Results: {eval_results}")
        
        # Record and save statistics
        training_time = end_time - start_time
        
        # Count trainable parameters
        trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
        total_params = sum(p.numel() for p in model.parameters())
        
        stats = {
            "method": method_name,
            "training_time": training_time,
            "trainable_parameters": trainable_params,
            "total_parameters": total_params,
            "trainable_percentage": (trainable_params / total_params) * 100,
            "accuracy": eval_results["eval_accuracy"]
        }
        
        if max_gpu_memory > 0:
            stats["max_gpu_memory_mb"] = max_gpu_memory
        
        print(f"\n{method_name} Statistics:")
        print(f"  Training Time: {training_time:.2f} seconds")
        print(f"  Trainable Parameters: {trainable_params:,}")
        print(f"  Total Parameters: {total_params:,}")
        print(f"  Trainable Parameters %: {stats['trainable_percentage']:.2f}%")
        print(f"  Accuracy: {stats['accuracy']:.4f}")
        if max_gpu_memory > 0:
            print(f"  Max GPU Memory Usage: {max_gpu_memory:.2f} MB")
        
        # Save statistics and results
        os.makedirs("stats", exist_ok=True)
        os.makedirs("results", exist_ok=True)
        
        # Save statistics
        with open(f"stats/{method_name}_stats.txt", "w") as f:
            for key, value in stats.items():
                f.write(f"{key}: {value}\n")
        
        # Save evaluation results
        with open(f"results/{method_name}_eval.txt", "w") as f:
            for key, value in eval_results.items():
                f.write(f"{key}: {value}\n")
        
        return model, eval_results, stats
        
    except ImportError as e:
        print(f"Error: {e}")
        print("bitsandbytes package not found or incompatible version.")
        print("Falling back to standard LoRA finetuning without quantization.")
        return run_lora_finetuning()
    except Exception as e:
        print(f"Unexpected error during QLoRA setup: {e}")
        print("Falling back to standard LoRA finetuning without quantization.")
        return run_lora_finetuning()

if __name__ == "__main__":
    # First, try to install bitsandbytes if it's not available
    try:
        import bitsandbytes
        print(f"bitsandbytes is already installed (version {bitsandbytes.__version__})")
    except ImportError:
        print("bitsandbytes not found. Attempting to install...")
        import subprocess
        try:
            subprocess.check_call(["pip", "install", "bitsandbytes>=0.39.0"])
            print("bitsandbytes installation successful!")
        except subprocess.CalledProcessError:
            print("Failed to install bitsandbytes. Will use standard LoRA instead of QLoRA.")
    
    # Run QLoRA (with fallback to standard LoRA if necessary)
    run_qlora_finetuning()

Method 4: Adapter Tuning (IA3)

In [None]:
import os
import torch
import numpy as np
import time
from transformers import (
    AutoModelForSequenceClassification,
    AutoTokenizer,
    TrainingArguments,
    Trainer,
    DataCollatorWithPadding
)
from datasets import load_dataset
import evaluate
from peft import get_peft_model, IA3Config, TaskType
from pynvml import nvmlInit, nvmlDeviceGetHandleByIndex, nvmlDeviceGetMemoryInfo

# Function to measure GPU memory usage
def get_gpu_memory_usage():
    nvmlInit()
    handle = nvmlDeviceGetHandleByIndex(0)  # Assuming we're using GPU 0
    info = nvmlDeviceGetMemoryInfo(handle)
    return info.used / 1024**2  # Return in MB

# Load dataset
def load_imdb_dataset():
    dataset = load_dataset("imdb")
    # Use only a subset to speed up training/testing
    train_dataset = dataset["train"].shuffle(seed=42).select(range(3000))
    test_dataset = dataset["test"].shuffle(seed=42).select(range(2000))
    return train_dataset, test_dataset

# Tokenize function
def tokenize_function(examples, tokenizer):
    return tokenizer(examples["text"], truncation=True, padding="max_length", max_length=512)

# Compute metrics function for evaluation
def compute_metrics(eval_pred):
    metric = evaluate.load("accuracy")
    predictions = np.argmax(eval_pred.predictions, axis=1)
    return metric.compute(predictions=predictions, references=eval_pred.label_ids)

# Main IA3 finetuning function
def run_ia3_finetuning():
    method_name = "ia3_adapter_tuning"
    print(f"\n{'=' * 50}")
    print(f"Running Method: IA3 Adapter Tuning")
    print(f"{'=' * 50}")

    # Load tokenizer
    tokenizer = AutoTokenizer.from_pretrained("roberta-base")

    # Load and tokenize dataset
    train_dataset, test_dataset = load_imdb_dataset()
    tokenized_train = train_dataset.map(lambda x: tokenize_function(x, tokenizer), batched=True)
    tokenized_test = test_dataset.map(lambda x: tokenize_function(x, tokenizer), batched=True)
    data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

    # Load base model
    base_model = AutoModelForSequenceClassification.from_pretrained("roberta-base", num_labels=2)

    # Define IA3 configuration with correct module names for RoBERTa
    ia3_config = IA3Config(
        task_type=TaskType.SEQ_CLS,
        target_modules=[
            "self.query",
            "self.key", 
            "self.value", 
            "output.dense", 
            "intermediate.dense"
        ],
        feedforward_modules=[
            "intermediate.dense", 
            "output.dense"
        ],
        inference_mode=False
    )

    # Apply IA3 to model
    model = get_peft_model(base_model, ia3_config)
    print("Model with IA3 adapters:")
    model.print_trainable_parameters()

    # Training arguments
    training_args = TrainingArguments(
        output_dir=f"output/{method_name}",
        eval_strategy="epoch",
        save_strategy="epoch",
        learning_rate=2e-5,
        per_device_train_batch_size=16,
        per_device_eval_batch_size=16,
        num_train_epochs=3,
        weight_decay=0.01,
        load_best_model_at_end=True,
        metric_for_best_model="accuracy",
        push_to_hub=False,
        report_to="none"
    )

    # Initialize trainer
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=tokenized_train,
        eval_dataset=tokenized_test,
        tokenizer=tokenizer,
        data_collator=data_collator,
        compute_metrics=compute_metrics
    )

    # Train and measure GPU memory
    max_gpu_memory = 0
    start_time = time.time()

    try:
        trainer.train()
        if torch.cuda.is_available():
            current_memory = get_gpu_memory_usage()
            max_gpu_memory = max(max_gpu_memory, current_memory)
            print(f"GPU Memory after training: {current_memory:.2f} MB")
    except Exception as e:
        print(f"Training error: {e}")

    end_time = time.time()

    # Evaluation
    eval_results = trainer.evaluate()

    # Stats collection
    training_time = end_time - start_time
    trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    total_params = sum(p.numel() for p in model.parameters())

    stats = {
        "method": method_name,
        "training_time": training_time,
        "trainable_parameters": trainable_params,
        "total_parameters": total_params,
        "trainable_percentage": (trainable_params / total_params) * 100,
        "accuracy": eval_results["eval_accuracy"]
    }

    if max_gpu_memory > 0:
        stats["max_gpu_memory_mb"] = max_gpu_memory

    # Print stats
    print(f"\n{method_name} Statistics:")
    print(f"  Training Time: {training_time:.2f} seconds")
    print(f"  Trainable Parameters: {trainable_params:,}")
    print(f"  Total Parameters: {total_params:,}")
    print(f"  Trainable Parameters %: {stats['trainable_percentage']:.2f}%")
    print(f"  Accuracy: {stats['accuracy']:.4f}")
    if max_gpu_memory > 0:
        print(f"  Max GPU Memory Usage: {max_gpu_memory:.2f} MB")

    # Save stats and results
    os.makedirs("stats", exist_ok=True)
    os.makedirs("results", exist_ok=True)

    with open(f"stats/{method_name}_stats.txt", "w") as f:
        for key, value in stats.items():
            f.write(f"{key}: {value}\n")

    with open(f"results/{method_name}_eval.txt", "w") as f:
        for key, value in eval_results.items():
            f.write(f"{key}: {value}\n")

    return model, eval_results, stats

# Entry point
if __name__ == "__main__":
    run_ia3_finetuning()

Part 4. Visualization

Generate comparative bar charts illustrating:
● Accuracy
● Training time
● Number of trainable parameters
● GPU memory usage

In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import glob

def read_stats_files(directory="stats"):
    """Read all stats files and combine them into a DataFrame"""
    all_stats = []
    
    # Find all stats files in the directory
    stats_files = glob.glob(f"{directory}/*_stats.txt")
    
    if not stats_files:
        print(f"No stats files found in {directory}")
        return None
    
    print(f"Found {len(stats_files)} stats files: {stats_files}")
    
    # Read each stats file and extract data
    for file_path in stats_files:
        stats = {}
        method_name = os.path.basename(file_path).replace("_stats.txt", "")
        stats["method"] = method_name
        
        try:
            with open(file_path, 'r') as f:
                for line in f:
                    if ':' in line:
                        key, value = line.strip().split(':', 1)
                        key = key.strip()
                        value = value.strip()
                        
                        # Try to convert to numeric if possible
                        try:
                            if '.' in value:
                                stats[key] = float(value)
                            else:
                                stats[key] = int(value)
                        except ValueError:
                            stats[key] = value
            
            all_stats.append(stats)
            print(f"Loaded stats from {file_path}: {stats}")
            
        except FileNotFoundError:
            print(f"Warning: File {file_path} not found")
        except Exception as e:
            print(f"Error reading {file_path}: {e}")
    
    # Convert to DataFrame
    if all_stats:
        return pd.DataFrame(all_stats)
    return None

def create_visualizations():
    """Create visualizations comparing the different fine-tuning methods"""
    # Load results data from stats files
    df = read_stats_files()
    
    if df is None or df.empty:
        print("No valid stats data found. Creating example data for visualization.")
        # Create example data
        data = {
            'method': ['lora_finetuning', 'qlora_finetuning'],
            'accuracy': [0.85, 0.84],
            'training_time': [600, 400],
            'trainable_parameters': [1000000, 300000],
            'trainable_percentage': [5.0, 1.5],
            'max_gpu_memory_mb': [4000, 2500]
        }
        df = pd.DataFrame(data)
    
    # Create output directory for plots
    os.makedirs("plots", exist_ok=True)
    
    # Set style
    sns.set_style("whitegrid")
    plt.rcParams.update({'font.size': 14})
    
    # Create method names for plots with better formatting
    df['display_name'] = df['method'].apply(lambda x: x.replace('_finetuning', '').upper())
    
    # 1. Accuracy plot
    plt.figure(figsize=(12, 6))
    ax = sns.barplot(x='display_name', y='accuracy', data=df, palette='viridis')
    plt.title("Accuracy Comparison")
    plt.xlabel("Method")
    plt.ylabel("Accuracy")
    
    # Set y-limit to focus on differences if values are close
    min_acc = df['accuracy'].min() * 0.95
    max_acc = df['accuracy'].max() * 1.05
    plt.ylim(min_acc, max_acc)
    
    # Add values on top of bars
    for i, p in enumerate(ax.patches):
        ax.annotate(f"{p.get_height():.4f}", 
                    (p.get_x() + p.get_width() / 2., p.get_height()), 
                    ha='center', va='bottom', rotation=0)
    
    plt.tight_layout()
    plt.savefig("plots/accuracy_comparison.png", dpi=300)
    
    # 2. Training time plot
    plt.figure(figsize=(12, 6))
    ax = sns.barplot(x='display_name', y='training_time', data=df, palette='viridis')
    plt.title("Training Time Comparison")
    plt.xlabel("Method")
    plt.ylabel("Training Time (seconds)")
    
    # Add values on top of bars
    for i, p in enumerate(ax.patches):
        ax.annotate(f"{p.get_height():.1f}s", 
                    (p.get_x() + p.get_width() / 2., p.get_height()), 
                    ha='center', va='bottom', rotation=0)
    
    plt.tight_layout()
    plt.savefig("plots/training_time_comparison.png", dpi=300)
    
    # 3. Trainable parameters plot
    plt.figure(figsize=(12, 6))
    
    # Convert to millions for better readability
    df['trainable_parameters_millions'] = df['trainable_parameters'] / 1000000
    
    ax = sns.barplot(x='display_name', y='trainable_parameters_millions', data=df, palette='viridis')
    plt.title("Trainable Parameters Comparison")
    plt.xlabel("Method")
    plt.ylabel("Trainable Parameters (millions)")
    
    # Add values on top of bars
    for i, p in enumerate(ax.patches):
        ax.annotate(f"{p.get_height():.2f}M", 
                    (p.get_x() + p.get_width() / 2., p.get_height()), 
                    ha='center', va='bottom', rotation=0)
    
    plt.tight_layout()
    plt.savefig("plots/trainable_parameters_comparison.png", dpi=300)
    
    # 4. GPU Memory Usage
    if 'max_gpu_memory_mb' in df.columns:
        plt.figure(figsize=(12, 6))
        ax = sns.barplot(x='display_name', y='max_gpu_memory_mb', data=df, palette='viridis')
        plt.title("GPU Memory Usage Comparison")
        plt.xlabel("Method")
        plt.ylabel("GPU Memory (MB)")
        
        # Add values on top of bars
        for i, p in enumerate(ax.patches):
            ax.annotate(f"{p.get_height():.1f} MB", 
                        (p.get_x() + p.get_width() / 2., p.get_height()), 
                        ha='center', va='bottom', rotation=0)
        
        plt.tight_layout()
        plt.savefig("plots/gpu_memory_comparison.png", dpi=300)
    
    # 5. Combined plot showing all metrics normalized
    # Normalize data for easy comparison
    metrics = ['accuracy', 'training_time', 'trainable_parameters']
    if 'max_gpu_memory_mb' in df.columns:
        metrics.append('max_gpu_memory_mb')
    
    # Create a new DataFrame with normalized values
    df_norm = pd.DataFrame()
    df_norm['method'] = df['display_name']
    
    for metric in metrics:
        if metric == 'accuracy':  # For accuracy, higher is better
            df_norm[metric] = df[metric] / df[metric].max()
        else:  # For other metrics, lower is better
            if df[metric].max() > 0:  # Avoid division by zero
                df_norm[metric] = 1 - (df[metric] / df[metric].max())
            else:
                df_norm[metric] = 0
    
    # Plot combined metrics
    plt.figure(figsize=(14, 8))
    df_melted = pd.melt(df_norm, id_vars=['method'], var_name='Metric', value_name='Normalized Score')
    
    # Format metric names for the legend
    metric_names = {
        'accuracy': 'Accuracy (higher is better)',
        'training_time': 'Training Time (lower is better)',
        'trainable_parameters': 'Trainable Parameters (lower is better)',
        'max_gpu_memory_mb': 'GPU Memory Usage (lower is better)'
    }
    df_melted['Metric'] = df_melted['Metric'].map(lambda x: metric_names.get(x, x))
    
    ax = sns.barplot(x='method', y='Normalized Score', hue='Metric', data=df_melted)
    plt.title("Normalized Comparison of All Metrics")
    plt.xlabel("Method")
    plt.ylabel("Normalized Score (higher is better)")
    plt.legend(title='Metric', loc='lower right')
    
    plt.tight_layout()
    plt.savefig("plots/combined_normalized_comparison.png", dpi=300)
    
    print("All visualizations have been saved to the 'plots' directory.")

if __name__ == "__main__":
    create_visualizations()