# Model Comparation 
This notebook is used for a comparation between 3 models: BERT, RoBERTa, DeepSeek. The model with the best metrics will be selected as the baseline for the application. I'll use Stanford Sentiment Treebank (SST-2) as the testing dataset. Although it's a binary dataset, the first iteration will be a 2-class classification: postive sentiments and negative sentiments. 

In [None]:
from datasets import load_dataset
# Set up HF token
from huggingface_hub import login
import os

# The token is hardcoded here because is testing notebook, and may cause some issues if loaded with load_dotenv()
HF_TOKEN="your_hf_token"
login(token=HF_TOKEN)

# Reference: https://huggingface.co/datasets/cardiffnlp/tweet_eval
dataset = load_dataset("glue", "sst2")

In [None]:
from transformers import Trainer, TrainingArguments, AutoTokenizer, AutoModelForSequenceClassification, DataCollatorWithPadding


def train_evaluate_model(model: str):
    """
    TODO: CREATE DOCUMENTATION
    """
    tokenizer = AutoTokenizer.from_pretrained(model)
    model = AutoModelForSequenceClassification.from_pretrained(model, num_labels=2)  # sst2 is a binary problem
    data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
    save_name = model.split("/")[-1]  # Standarize the model name


    def tokenize_function(examples):
        return tokenizer(
            examples["sentence"],  
            truncation=True,
            padding=False,
            max_length=256
        )
    
    # Tokenize the dataset for the model
    tokenized_dataset = dataset.map(
        tokenize_function, 
        batched=True,
        remove_columns=["sentence", "idx"]  # Remove unnecesary data
    )

    # Split the dataset
    train_dataset = tokenized_dataset["train"]
    val_dataset = tokenized_dataset["validation"]
    test_dataset = tokenized_dataset["test"]

    print(f"   Train: {len(train_dataset)} samples")
    print(f"   Val: {len(val_dataset)} samples")
    print(f"   Test: {len(test_dataset)} samples")

    output_dir = f"./results/{save_name}"
    model_save_dir = f"./models/{save_name}"

    training_args = TrainingArguments(
            output_dir=output_dir,
            num_train_epochs=3,
            per_device_train_batch_size=16,
            per_device_eval_batch_size=32,
            learning_rate=2e-5,
            
            # Evaluation and best saving
            eval_strategy="epoch",
            save_strategy="epoch",
            load_best_model_at_end=True,
            metric_for_best_model="eval_loss",
        )

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=val_dataset,
        data_collator=data_collator,
        tokenizer=tokenizer 
    )

    print("Starting training...")
    trainer.train()

    # Check directory
    os.makedirs(model_save_dir, exist_ok=True)
    
    # Save model
    trainer.save_model(model_save_dir)
    print(f"Model saved in: {model_save_dir}")

    # Save metrics
    eval_results = trainer.evaluate()
    with open(f"{model_save_dir}/eval_results.txt", "w") as f:
        for key, value in eval_results.items():
            f.write(f"{key}: {value}\n")

    # We'll only save the toknize dataset and the model
    # We can add other parameters, but for a simple comparation it's not necessary
    return {
        'model': model,
        'datasets': {
            'train': train_dataset,
            'val': val_dataset, 
            'test': test_dataset
        }
    }

In [None]:
deberta = train_evaluate_model("microsoft/deberta-large-mnli")

In [None]:
bert = train_evaluate_model("nlptown/bert-base-multilingual-uncased-sentiment")

In [None]:
roberta = train_evaluate_model("cardiffnlp/twitter-roberta-base-sentiment-latest")