In [None]:
!pip install evaluate

Collecting evaluate
  Downloading evaluate-0.4.6-py3-none-any.whl.metadata (9.5 kB)
Downloading evaluate-0.4.6-py3-none-any.whl (84 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m4.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: evaluate
Successfully installed evaluate-0.4.6


In [None]:
import os
import numpy as np
import random
import torch
import evaluate
from dataclasses import dataclass
from datasets import load_dataset
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    TrainingArguments,
    Trainer
)

In [None]:
# --- 1. CONFIGURATION (The Control Center) ---
# Professional engineers keep all "knobs" in one place using dataclasses.
@dataclass
class ModelConfig:
    model_name: str = "distilbert-base-uncased"
    dataset_name: str = "imdb"
    num_labels: int = 2
    output_dir: str = "./results_level2"
    seed: int = 42

@dataclass
class TrainConfig:
    learning_rate: float = 2e-5
    batch_size: int = 16
    epochs: int = 2
    weight_decay: float = 0.01

# Initialize our configs
model_cfg = ModelConfig()
train_cfg = TrainConfig()

In [None]:
# --- 2. UTILITIES (Helper Functions) ---
def set_seed(seed_value):
    """Ensures the run is reproducible."""
    random.seed(seed_value)
    np.random.seed(seed_value)
    torch.manual_seed(seed_value)
    torch.cuda.manual_seed_all(seed_value)

def compute_metrics(eval_pred):
    """Calculates Accuracy and F1 score."""
    accuracy_metric = evaluate.load("accuracy")
    f1_metric = evaluate.load("f1")

    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)

    acc = accuracy_metric.compute(predictions=predictions, references=labels)
    f1 = f1_metric.compute(predictions=predictions, references=labels)
    return {**acc, **f1}

def get_tokenized_data(config: ModelConfig):
    """Loads and tokenizes the dataset."""
    print(f"Loading {config.dataset_name}...")
    dataset = load_dataset(config.dataset_name)
    tokenizer = AutoTokenizer.from_pretrained(config.model_name)

    def tokenize_function(examples):
        return tokenizer(examples["text"], padding="max_length", truncation=True)

    print("Tokenizing dataset...")
    tokenized = dataset.map(tokenize_function, batched=True)

    # We return the tokenizer too, because we need it for inference later
    return tokenized, tokenizer

In [None]:
# --- 3. MAIN EXECUTION ---
def run_training():
    # A. Setup
    set_seed(model_cfg.seed)

    # B. Data
    tokenized_datasets, tokenizer = get_tokenized_data(model_cfg)

    # Use smaller subsets for demonstration (Remove these lines for full training)
    train_dataset = tokenized_datasets["train"].shuffle(seed=model_cfg.seed).select(range(1000))
    eval_dataset = tokenized_datasets["test"].shuffle(seed=model_cfg.seed).select(range(500))

    # C. Model
    print(f"Initializing {model_cfg.model_name}...")
    model = AutoModelForSequenceClassification.from_pretrained(
        model_cfg.model_name,
        num_labels=model_cfg.num_labels
    )

    # D. Trainer Setup
    training_args = TrainingArguments(
        output_dir=model_cfg.output_dir,
        eval_strategy="epoch",
        save_strategy="epoch",
        learning_rate=train_cfg.learning_rate,
        per_device_train_batch_size=train_cfg.batch_size,
        per_device_eval_batch_size=train_cfg.batch_size,
        num_train_epochs=train_cfg.epochs,
        weight_decay=train_cfg.weight_decay,
        load_best_model_at_end=True,
        report_to="none", # Keeping WandB off for now
    )

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=eval_dataset,
        compute_metrics=compute_metrics,
    )

    # E. Train & Evaluate
    print("Starting training...")
    trainer.train()

    print("Evaluating...")
    results = trainer.evaluate()
    print(f"Final Metrics: {results}")

    # F. Save the final artifact (Model + Tokenizer)
    # This is critical for production: always save the tokenizer with the model!
    save_path = f"{model_cfg.output_dir}/final_model"
    model.save_pretrained(save_path)
    tokenizer.save_pretrained(save_path)
    print(f"Model saved to {save_path}")

In [None]:
# Run the script
if __name__ == "__main__":
    run_training()

Loading imdb...


Access to the secret `HF_TOKEN` has not been granted on this notebook.
You will not be requested again.
Please restart the session if you want to be prompted again.


README.md: 0.00B [00:00, ?B/s]

plain_text/train-00000-of-00001.parquet:   0%|          | 0.00/21.0M [00:00<?, ?B/s]

plain_text/test-00000-of-00001.parquet:   0%|          | 0.00/20.5M [00:00<?, ?B/s]

plain_text/unsupervised-00000-of-00001.p(…):   0%|          | 0.00/42.0M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/25000 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/25000 [00:00<?, ? examples/s]

Generating unsupervised split:   0%|          | 0/50000 [00:00<?, ? examples/s]

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Tokenizing dataset...


Map:   0%|          | 0/25000 [00:00<?, ? examples/s]

Map:   0%|          | 0/25000 [00:00<?, ? examples/s]

Map:   0%|          | 0/50000 [00:00<?, ? examples/s]

Initializing distilbert-base-uncased...


model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Starting training...


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,No log,0.415171,0.854,0.848233
2,No log,0.329087,0.864,0.865613


Downloading builder script: 0.00B [00:00, ?B/s]

Downloading builder script: 0.00B [00:00, ?B/s]

Evaluating...


Final Metrics: {'eval_loss': 0.32908689975738525, 'eval_accuracy': 0.864, 'eval_f1': 0.8656126482213439, 'eval_runtime': 8.2644, 'eval_samples_per_second': 60.501, 'eval_steps_per_second': 3.872, 'epoch': 2.0}
Model saved to ./results_level2/final_model
