In [1]:
import time
import tracemalloc
import torch
from transformers import Trainer, TrainingArguments, AutoTokenizer, AutoModelForSeq2SeqLM
from datasets import load_dataset
import evaluate
from langchain_ollama import OllamaLLM
import numpy as np
import pandas as pd

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
ollama_model_name = "llama3.1:latest"
ollama_llm = OllamaLLM(model=ollama_model_name)

In [3]:
# Load dataset (e.g., 'wmt16' for translation)
dataset = load_dataset('wmt16', 'ro-en')

In [6]:
# Load tokenizer for the baseline BART model
tokenizer = AutoTokenizer.from_pretrained("facebook/bart-base")



In [7]:
# Tokenization function
def tokenize_function(examples):
    source_texts = [example['en'] for example in examples['translation']]
    return tokenizer(source_texts, truncation=True, padding="max_length", max_length=128)

# Apply tokenization to the dataset
tokenized_datasets = dataset.map(tokenize_function, batched=True)

In [8]:
def preprocess_function(examples):
    source_texts = [example['en'] for example in examples['translation']]
    target_texts = [example['ro'] for example in examples['translation']]
    
    model_inputs = tokenizer(source_texts, truncation=True, padding="max_length", max_length=128)
    
    # Tokenize Romanian texts as labels
    with tokenizer.as_target_tokenizer():
        labels = tokenizer(target_texts, truncation=True, padding="max_length", max_length=128)
    
    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

In [9]:
# Take 10% of both train and validation datasets
sample_size_train = int(len(tokenized_datasets['train']) * 0.0001)
sample_size_eval = int(len(tokenized_datasets['validation']) * 0.01)

tokenized_train_dataset = tokenized_datasets['train'].shuffle(seed=42).select(range(sample_size_train)).map(preprocess_function, batched=True)
tokenized_eval_dataset = tokenized_datasets['validation'].shuffle(seed=42).select(range(sample_size_eval)).map(preprocess_function, batched=True)

In [23]:
import nltk
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
import torch.nn.functional as F

def evaluate_model(trainer, eval_dataset):
    # Generate predictions
    predictions = trainer.predict(eval_dataset).predictions
    logits = predictions[0]  # Assuming predictions is a tuple

    # Convert logits to probabilities and then get the predicted token IDs
    predicted_ids = torch.argmax(F.softmax(torch.tensor(logits), dim=-1), dim=-1).tolist()

    # Decode predictions
    decoded_preds = tokenizer.batch_decode(predicted_ids, skip_special_tokens=True)

    # Extract references from the evaluation dataset
    decoded_refs = tokenizer.batch_decode(eval_dataset['labels'], skip_special_tokens=True)

    # Initialize BLEU score accumulator
    total_bleu_score = 0
    num_sentences = len(decoded_preds)

    # Smoothing function
    smoothing_function = SmoothingFunction()

    for pred, ref in zip(decoded_preds, decoded_refs):
        # Tokenize predictions and references
        hypothesis = pred.split()  # Tokenization
        reference = ref.split()     # Tokenization

        # Compute BLEU score with smoothing
        BLEUscore = sentence_bleu([reference], hypothesis, smoothing_function=smoothing_function.method1)

        total_bleu_score += BLEUscore

    average_bleu_score = total_bleu_score / num_sentences if num_sentences > 0 else 0

    return average_bleu_score

In [24]:
def perplexity_eval(trainer, eval_dataset):
    # Load the perplexity metric
    perplexity_metric = evaluate.load("perplexity", module_type="metric")

    # Generate predictions using the trainer
    predictions = trainer.predict(eval_dataset).predictions
    logits = predictions[0]  # Assuming predictions is a tuple

    # Decode predictions to text
    predicted_ids = torch.argmax(F.softmax(torch.tensor(logits), dim=-1), dim=-1).tolist()
    decoded_preds = tokenizer.batch_decode(predicted_ids, skip_special_tokens=True)

    # Compute perplexity for the decoded predictions
    results = perplexity_metric.compute(model_id='gpt2',
                                        add_start_token=False,
                                        predictions=decoded_preds)

    return results

In [25]:
def normal_fine_tuning():
    tracemalloc.start()
    baseline_model = AutoModelForSeq2SeqLM.from_pretrained("facebook/bart-base")  # Adjust model name as needed

    training_args = TrainingArguments(
        output_dir="./results_baseline",
        evaluation_strategy="epoch",
        learning_rate=2e-5,
        per_device_train_batch_size=2,
        per_device_eval_batch_size=2,
        num_train_epochs=1,
        weight_decay=0.01,
    )

    trainer = Trainer(
        model=baseline_model,
        args=training_args,
        train_dataset=tokenized_train_dataset,
        eval_dataset=tokenized_eval_dataset,
    )

    start_training_time = time.time()
    trainer.train()
    end_training_time = time.time()

    # Evaluate and compute metrics
    bleu_score = evaluate_model(trainer, tokenized_eval_dataset)

    perplexity = perplexity_eval(trainer, tokenized_eval_dataset)

    current, peak = tracemalloc.get_traced_memory()
    tracemalloc.stop()

    return {
        "bleu_score": bleu_score,
        "perplexity": perplexity['mean_perplexity'],
        "training_time": end_training_time - start_training_time,
        "memory_current": current / (1024 * 1024),  # Convert to MB
        "memory_peak": peak / (1024 * 1024),  # Convert to MB
    }


In [31]:
import numpy as np
import torch.nn.functional as F
from torch import nn

# Function to apply low-rank approximation
def low_rank_approximation(layer, rank):
    # Get the weight matrix of the layer
    weight_matrix = layer.weight.data.cpu().numpy()
    
    # Apply SVD
    U, S, Vt = np.linalg.svd(weight_matrix, full_matrices=False)
    
    # Keep only the top `rank` singular values and corresponding vectors
    U_reduced = U[:, :rank]
    S_reduced = S[:rank]
    Vt_reduced = Vt[:rank, :]
    
    # Reconstruct the weight matrix using the reduced components
    low_rank_matrix = np.dot(U_reduced, np.dot(np.diag(S_reduced), Vt_reduced))
    
    # Update the layer's weight
    layer.weight.data = torch.tensor(low_rank_matrix, device=layer.weight.device)
    
# Low-Rank Fine-Tuning Function
def low_rank_fine_tuning(rank=10):  # Default rank value
    tracemalloc.start()
    
    low_rank_model = AutoModelForSeq2SeqLM.from_pretrained("facebook/bart-base")
    
    # Apply low-rank approximation to all linear layers (or specific layers)
    for name, layer in low_rank_model.named_modules():
        if isinstance(layer, nn.Linear):
            low_rank_approximation(layer, rank)

    training_args = TrainingArguments(
        output_dir="./results_low_rank",
        evaluation_strategy="epoch",
        learning_rate=2e-5,
        per_device_train_batch_size=2,
        per_device_eval_batch_size=2,
        num_train_epochs=1,
        weight_decay=0.01,
    )

    trainer = Trainer(
        model=low_rank_model,
        args=training_args,
        train_dataset=tokenized_train_dataset,
        eval_dataset=tokenized_eval_dataset,
    )

    start_training_time = time.time()
    trainer.train()
    end_training_time = time.time()

    # Evaluate and compute metrics
    bleu_score = evaluate_model(trainer, tokenized_eval_dataset)

    perplexity = perplexity_eval(trainer, tokenized_eval_dataset)

    current, peak = tracemalloc.get_traced_memory()
    tracemalloc.stop()

    return {
        "bleu_score": bleu_score,
        "perplexity": perplexity['mean_perplexity'],
        "training_time": end_training_time - start_training_time,
        "memory_current": current / (1024 * 1024),  # Convert to MB
        "memory_peak": peak / (1024 * 1024),  # Convert to MB
    }

In [27]:
# Run normal fine-tuning'
print("Baseline - \n")
baseline_metrics = normal_fine_tuning()
print(f"Baseline BLEU Score: {baseline_metrics['bleu_score']}")
print(f"Perplexity: {baseline_metrics['perplexity']}")
print(f"Training Time: {baseline_metrics['training_time']} seconds")
print(f"Current Memory Usage: {baseline_metrics['memory_current']} MB")
print(f"Peak Memory Usage: {baseline_metrics['memory_peak']} MB")


Baseline - 



Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
                                               
100%|██████████| 31/31 [00:08<00:00,  3.62it/s]


{'eval_loss': 7.929084300994873, 'eval_runtime': 0.3654, 'eval_samples_per_second': 51.998, 'eval_steps_per_second': 27.367, 'epoch': 1.0}
{'train_runtime': 8.5586, 'train_samples_per_second': 7.127, 'train_steps_per_second': 3.622, 'train_loss': 8.867292834866431, 'epoch': 1.0}


100%|██████████| 10/10 [00:00<00:00, 18.81it/s]
100%|██████████| 10/10 [00:00<00:00, 21.09it/s]
100%|██████████| 2/2 [00:01<00:00,  1.82it/s]

Baseline BLEU Score: 0.008674067409273102
Perplexity: 219.19412487431578
Training Time: 8.775947093963623 seconds
Current Memory Usage: 1.4311714172363281 MB
Peak Memory Usage: 533.319130897522 MB





In [32]:
# Run low-rank fine-tuning
print("Low Rank - \n")
low_rank_metrics = low_rank_fine_tuning()
print(f"Low-Rank BLEU Score: {low_rank_metrics['bleu_score']}")
print(f"Perplexity: {low_rank_metrics['perplexity']}")
print(f"Training Time: {low_rank_metrics['training_time']} seconds")
print(f"Current Memory Usage: {low_rank_metrics['memory_current']} MB")
print(f"Peak Memory Usage: {low_rank_metrics['memory_peak']} MB")

Low Rank - 



Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
                                               
100%|██████████| 31/31 [00:08<00:00,  3.60it/s]


{'eval_loss': 11.589853286743164, 'eval_runtime': 0.3694, 'eval_samples_per_second': 51.44, 'eval_steps_per_second': 27.073, 'epoch': 1.0}
{'train_runtime': 8.6156, 'train_samples_per_second': 7.08, 'train_steps_per_second': 3.598, 'train_loss': 11.952506772933468, 'epoch': 1.0}


100%|██████████| 10/10 [00:00<00:00, 18.42it/s]
100%|██████████| 10/10 [00:00<00:00, 20.68it/s]
100%|██████████| 2/2 [00:01<00:00,  1.67it/s]

Low-Rank BLEU Score: 0.0001774546387040829
Perplexity: 40.78178782212107
Training Time: 8.815294027328491 seconds
Current Memory Usage: 1.4161357879638672 MB
Peak Memory Usage: 594.1062326431274 MB





: 

In [20]:
# Create a DataFrame to store the metrics
results_df = pd.DataFrame({
    'Metric': ['BLEU Score', 'Perplexity', 'Training Time (s)', 'Current Memory Usage (MB)', 'Peak Memory Usage (MB)'],
    'Baseline': [baseline_metrics['bleu_score'], baseline_metrics['perplexity'],
                 baseline_metrics['training_time'], baseline_metrics['memory_current'],
                 baseline_metrics['memory_peak']],
    'Low Rank': [low_rank_metrics['bleu_score'], low_rank_metrics['perplexity'],
                 low_rank_metrics['training_time'], low_rank_metrics['memory_current'],
                 low_rank_metrics['memory_peak']]
})

# Export the DataFrame to a CSV file
results_df.to_csv('fine_tuning_metrics.csv', index=False)

print("Metrics exported to fine_tuning_metrics.csv")

Metrics exported to fine_tuning_metrics.csv
