###Hyper-parameter-search

In [None]:
from sklearn.model_selection import train_test_split
import random, math
import numpy as np

train_pilot_df = pd.read_csv("data/training.csv")
train_pilot_df["label"] = train_pilot_df["label"].map(LABEL2ID)

assert train_pilot_df["label"].isna().sum() == 0


pilot_df, _ = train_test_split(
    train_pilot_df,
    train_size=0.1,
    stratify=train_pilot_df["label"],
    random_state=seed
)



pilot_ds = Dataset.from_pandas(pilot_df[["sentence","label"]])
pilot_ds = pilot_ds.map(tokenize, batched=True)
pilot_ds = pilot_ds.remove_columns(["sentence"])
pilot_ds.set_format(type="torch", columns=["input_ids","attention_mask","label"])

lr_min, lr_max = 5e-6, 3e-5
wd_min, wd_max = 0.0, 0.05
bs_choices = [8, 16]
scheduler_choices= ["cosine", "linear"]
warmup_min, warmup_max = 0.0, 0.2

N = 30
random.seed(seed); np.random.seed(seed)
configs = []
for _ in range(N):
    lr = 10**random.uniform(math.log10(lr_min), math.log10(lr_max))
    wd = random.uniform(wd_min, wd_max)
    bs = random.choice(bs_choices)
    sched = random.choice(scheduler_choices)
    warm  = random.uniform(warmup_min, warmup_max)
    configs.append({
        "learning_rate": lr,
        "weight_decay": wd,
        "per_device_train_batch_size": bs,
        "lr_scheduler_type": sched,
        "warmup_ratio": warm
    })

def run_pilot(config, idx):
    args = arguments.copy()
    args.update({
        "learning_rate": config["learning_rate"],
        "weight_decay": config["weight_decay"],
        "per_device_train_batch_size": config["per_device_train_batch_size"],
        "lr_scheduler_type": config["lr_scheduler_type"],
        "warmup_ratio": config["warmup_ratio"],
        "num_train_epochs": 1,
        "output_dir": f"pilot/run_{idx}",
        "save_strategy": "no",
        "eval_strategy": "epoch",
        "logging_steps": 100,
        "load_best_model_at_end": False,
    })
    pilot_args = TrainingArguments(**args)

    model = AutoModelForSequenceClassification.from_pretrained(
        model_name, num_labels=3, id2label=ID2LABEL, label2id=LABEL2ID
    )
    trainer = Trainer(
        model=model,
        args=pilot_args,
        train_dataset=pilot_ds,
        eval_dataset=val_ds,
        tokenizer=tokenizer,
        compute_metrics=compute_metrics,
        data_collator=collator,
    )

    trainer.train()
    metrics = trainer.evaluate()
    history = trainer.state.log_history
    train_losses = [log["loss"] for log in history if "loss" in log and "eval_loss" not in log]
    train_loss = train_losses[-1] if train_losses else None

    return {
        **config,
        "train_loss":    train_loss,
        "eval_loss":     metrics.get("eval_loss"),
        "eval_accuracy": metrics.get("eval_accuracy"),
        "eval_f1":       metrics.get("eval_f1"),
        "eval_mae":      metrics.get("eval_mae"),
    }

In [None]:
results = []
for idx, cfg in enumerate(configs, 1):
    print(f"→ Pilot {idx}/{N}: {cfg}")
    res = run_pilot(cfg, idx)
    results.append(res)
    print(
        f"   train_loss={res['train_loss']:.4f}, "
        f"eval_loss={res['eval_loss']:.4f}, "
        f"acc={res['eval_accuracy']:.4f}, "
        f"f1={res['eval_f1']:.4f}, "
        f"mae={res['eval_mae']:.4f}\n"
    )

# Build DataFrame and sort by MAE ascending
df = pd.DataFrame(results)
df_sorted = df.sort_values("eval_mae").reset_index(drop=True)
df_sorted

In [None]:
# Assume df_sorted from the previous pilot sweep is in scope
top5 = df_sorted.head(5)

results_2ep = []

for idx, row in top5.iterrows():
    # Extract config
    config = {
        "learning_rate":               row["learning_rate"],
        "weight_decay":                row["weight_decay"],
        "per_device_train_batch_size": int(row["per_device_train_batch_size"]),
        "lr_scheduler_type":           row["lr_scheduler_type"],
        "warmup_ratio":                row["warmup_ratio"],
    }
    print(f"→ Top-5 run {idx+1}: {config}")

    # Build TrainingArguments for 2 epochs
    args = arguments.copy()
    args.update({
        "learning_rate":            config["learning_rate"],
        "weight_decay":             config["weight_decay"],
        "per_device_train_batch_size": config["per_device_train_batch_size"],
        "lr_scheduler_type":        config["lr_scheduler_type"],
        "warmup_ratio":             config["warmup_ratio"],
        "num_train_epochs":         2,
        "output_dir":               f"pilot/top5_run_{idx+1}",
        "eval_strategy":      "epoch",
        "save_strategy":            "no",
        "load_best_model_at_end":   False,
        "logging_steps":            100,
    })
    two_epoch_args = TrainingArguments(**args)

    # Initialize fresh model & trainer
    model = AutoModelForSequenceClassification.from_pretrained(
        model_name, num_labels=3, id2label=ID2LABEL, label2id=LABEL2ID
    )
    trainer = Trainer(
        model=model,
        args=two_epoch_args,
        train_dataset=pilot_ds,
        eval_dataset=val_ds,
        tokenizer=tokenizer,
        compute_metrics=compute_metrics,
        data_collator=collator,
    )

    # Train for 2 epochs and evaluate
    trainer.train()
    metrics = trainer.evaluate()

    # Extract final train loss from log history
    history = trainer.state.log_history
    train_losses = [log["loss"] for log in history if "loss" in log and "eval_loss" not in log]
    train_loss = train_losses[-1] if train_losses else None

    # Record results
    results_2ep.append({
        **config,
        "train_loss":    train_loss,
        "eval_loss":     metrics.get("eval_loss"),
        "eval_accuracy": metrics.get("eval_accuracy"),
        "eval_f1":       metrics.get("eval_f1"),
        "eval_mae":      metrics.get("eval_mae"),
    })
    print(
        f"   train_loss={train_loss:.4f}, "
        f"eval_loss={metrics['eval_loss']:.4f}, "
        f"acc={metrics['eval_accuracy']:.4f}, "
        f"f1={metrics['eval_f1']:.4f}, "
        f"mae={metrics['eval_mae']:.4f}\n"
    )

# Display summary table
df_2ep = pd.DataFrame(results_2ep)
df_2ep_sorted = df_2ep.sort_values("eval_mae").reset_index(drop=True)
df_2ep_sorted

In [None]:
from transformers import set_seed

# 1) Extract top-2 configs from the 2-epoch results
top2 = df_2ep_sorted.head(2).copy().reset_index(drop=True)

# 2) Define the seeds to test
seeds_to_test = [13, 23]

# 3) Prepare a list to collect all runs
stability_results = []

for idx, row in top2.iterrows():
    config = {
        "learning_rate": row["learning_rate"],
        "weight_decay": row["weight_decay"],
        "per_device_train_batch_size": int(row["per_device_train_batch_size"]),
        "lr_scheduler_type": row["lr_scheduler_type"],
        "warmup_ratio": row["warmup_ratio"],
    }
    print(f"→ Config #{idx+1}: {config}")

    for seed_val in seeds_to_test:
        # 4) Reseed RNGs for reproducible init
        random.seed(seed_val)
        np.random.seed(seed_val)
        torch.manual_seed(seed_val)
        torch.cuda.manual_seed_all(seed_val)
        set_seed(seed_val)

        # 5) Build TrainingArguments for 2 epochs
        args = arguments.copy()
        args.update({
            "learning_rate":            config["learning_rate"],
            "weight_decay":             config["weight_decay"],
            "per_device_train_batch_size": config["per_device_train_batch_size"],
            "lr_scheduler_type":        config["lr_scheduler_type"],
            "warmup_ratio":             config["warmup_ratio"],
            "num_train_epochs":         2,
            "output_dir":               f"pilot/top2_cfg{idx+1}_seed{seed_val}",
            "eval_strategy":      "epoch",
            "save_strategy":            "no",
            "load_best_model_at_end":   False,
            "logging_steps":            100,
        })
        seed_args = TrainingArguments(**args)

        # 6) Init fresh model & trainer
        model = AutoModelForSequenceClassification.from_pretrained(
            model_name, num_labels=3, id2label=ID2LABEL, label2id=LABEL2ID
        )
        trainer = Trainer(
            model=model,
            args=seed_args,
            train_dataset=pilot_ds,
            eval_dataset=pilot_ds,     # evaluating on same pilot slice
            tokenizer=tokenizer,
            compute_metrics=compute_metrics,
            data_collator=collator,
        )

        # 7) Train & evaluate
        trainer.train()
        metrics = trainer.evaluate()
        # extract train_loss from log history
        history = trainer.state.log_history
        train_losses = [log["loss"] for log in history if "loss" in log and "eval_loss" not in log]
        train_loss = train_losses[-1] if train_losses else None

        stability_results.append({
            "cfg_idx":      idx+1,
            "seed":         seed_val,
            **config,
            "train_loss":   train_loss,
            "eval_loss":    metrics.get("eval_loss"),
            "eval_accuracy":metrics.get("eval_accuracy"),
            "eval_f1":      metrics.get("eval_f1"),
            "eval_mae":     metrics.get("eval_mae"),
        })
        print(f"   seed={seed_val} → mae={metrics['eval_mae']:.4f}")

# 8) Build a DataFrame and compute mean±std per config
stability_df = pd.DataFrame(stability_results)
summary = stability_df.groupby("cfg_idx").agg(
    lr     = ("learning_rate", "first"),
    wd     = ("weight_decay", "first"),
    bs     = ("per_device_train_batch_size", "first"),
    sched  = ("lr_scheduler_type", "first"),
    warmup = ("warmup_ratio", "first"),
    mae_mean = ("eval_mae", "mean"),
    mae_std  = ("eval_mae", "std"),
).reset_index()

summary

In [None]:
# 1) Extract the winning config
best_cfg = {
    "learning_rate":               2.02e-05,
    "weight_decay":                0.0191,
    "per_device_train_batch_size": 8,
    "lr_scheduler_type":           "cosine",
    "warmup_ratio":                0.10,
}

# 2) Build TrainingArguments for a single epoch on full data
args = arguments.copy()
args.update({
    "learning_rate":             best_cfg["learning_rate"],
    "weight_decay":              best_cfg["weight_decay"],
    "per_device_train_batch_size": best_cfg["per_device_train_batch_size"],
    "lr_scheduler_type":         best_cfg["lr_scheduler_type"],
    "warmup_ratio":              best_cfg["warmup_ratio"],
    "num_train_epochs":          1,
    "output_dir":                "sanity/full_data_cfg2",
    "eval_strategy":       "epoch",
    "save_strategy":             "no",
    "load_best_model_at_end":    False,
    "logging_steps":             500,
})
sanity_args = TrainingArguments(**args)

# 3) Initialize model & trainer
model = AutoModelForSequenceClassification.from_pretrained(
    model_name, num_labels=3, id2label=ID2LABEL, label2id=LABEL2ID
)
trainer = Trainer(
    model=model,
    args=sanity_args,
    train_dataset=train_ds,
    eval_dataset=val_ds,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
    data_collator=collator,
)

# 4) Train and evaluate
trainer.train()
