## Setup: CPU/GPU switch, imports, paths, config

In [2]:
# 01 - Setup: CPU/GPU switch, imports, paths, config

import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"

import random
import numpy as np
import pandas as pd
from pathlib import Path
from typing import List, Dict, Tuple

import torch

# ---- Device selection ----
RUN_DEVICE = "gpu"  # "gpu" or "cpu"

if RUN_DEVICE.lower() == "gpu" and torch.cuda.is_available():
    device = torch.device("cuda")
    torch.backends.cudnn.benchmark = True
    print("Using GPU:", torch.cuda.get_device_name(0))
else:
    device = torch.device("cpu")
    torch.backends.cudnn.enabled = False
    torch.set_num_threads(max(1, os.cpu_count() // 2))
    print("Using CPU")

# ---- Seeds ----
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
if device.type == "cuda":
    torch.cuda.manual_seed_all(SEED)

# ---- Metrics ----
from sklearn.metrics import f1_score, classification_report, confusion_matrix

# ---- Transformers ----
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    get_linear_schedule_with_warmup,
)

# ---- WSD: NLTK WordNet (custom simple Lesk) ----
import nltk

# Make sure WordNet + stopwords are available
try:
    nltk.data.find("corpora/wordnet")
except LookupError:
    print("Downloading NLTK WordNet data...")
    nltk.download("wordnet")
    nltk.download("omw-1.4")

try:
    nltk.data.find("corpora/stopwords")
except LookupError:
    print("Downloading NLTK stopwords data...")
    nltk.download("stopwords")

from nltk.corpus import wordnet as wn
from nltk.corpus import stopwords
import string

STOP_WORDS = set(stopwords.words("english"))

# ---- Data paths (SemEval SubTaskA) ----
DATA_DIR = Path("SemEval_2022_Task2-idiomaticity/SubTaskA")
TRAIN_ONE_SHOT = DATA_DIR / "Data" / "train_one_shot.csv"
TRAIN_ZERO_SHOT = DATA_DIR / "Data" / "train_zero_shot.csv"
DEV = DATA_DIR / "Data" / "dev.csv"
DEV_GOLD = DATA_DIR / "Data" / "dev_gold.csv"
EVAL = DATA_DIR / "Data" / "eval.csv"
EVAL_SUB_FMT = DATA_DIR / "Data" / "eval_submission_format.csv"

# ---- Outputs ----
OUT_DIR = Path("outputs_en_xlmr_wsd")
OUT_DIR.mkdir(parents=True, exist_ok=True)

# ---- Model & training config ----
MODEL_NAME = "xlm-roberta-base"
NUM_LABELS = 2

# "zero_shot" uses train_zero_shot.csv, "one_shot" uses train_one_shot.csv
TRAIN_SETTING = "zero_shot"  # or "one_shot"

NUM_EPOCHS = 3
LR = 2e-5
WEIGHT_DECAY = 0.01
WARMUP_RATIO = 0.1

BATCH_SIZE_GPU = 16
BATCH_SIZE_CPU = 8
BATCH_SIZE = BATCH_SIZE_GPU if device.type == "cuda" else BATCH_SIZE_CPU

print(f"TRAIN_SETTING={TRAIN_SETTING}, BATCH_SIZE={BATCH_SIZE}, NUM_EPOCHS={NUM_EPOCHS}, LR={LR}")

# Toggle WSD usage (set False for baseline)
USE_WSD = True
print("USE_WSD =", USE_WSD)


Using GPU: NVIDIA H100 80GB HBM3 MIG 2g.20gb
Downloading NLTK WordNet data...
Downloading NLTK stopwords data...


[nltk_data] Downloading package wordnet to /home/mhossai6/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /home/mhossai6/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     /home/mhossai6/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


TRAIN_SETTING=zero_shot, BATCH_SIZE=16, NUM_EPOCHS=3, LR=2e-05
USE_WSD = True


## Data loading, context formatting, WSD helpers

In [3]:
# 02 - Data loading, context formatting, WSD helpers (EN only)

def load_any_csv(path: Path) -> pd.DataFrame:
    return pd.read_csv(path, sep=None, engine="python", dtype=str)

def ensure_label_int(df: pd.DataFrame, col="Label") -> pd.DataFrame:
    if col in df.columns:
        df[col] = df[col].astype(int)
    return df

def mark_first_case_insensitive(text: str, needle: str, ltag="<mwe>", rtag="</mwe>") -> str:
    if not isinstance(text, str) or not isinstance(needle, str):
        return text
    lt = text.lower()
    ln = needle.lower()
    i = lt.find(ln)
    if i == -1:
        return text
    return text[:i] + ltag + text[i:i+len(needle)] + rtag + text[i+len(needle):]

def pack_context(prev: str, target: str, nxt: str, mwe: str) -> str:
    prev = "" if pd.isna(prev) else prev
    nxt = "" if pd.isna(nxt) else nxt
    target = "" if pd.isna(target) else target
    tgt_marked = mark_first_case_insensitive(target, mwe)
    return f"Previous: {prev}\nTarget: {tgt_marked}\nNext: {nxt}"

# ---- WSD helpers (custom simple Lesk using NLTK WordNet) ----

def build_simple_sentence(row) -> str:
    prev = row.get("Previous", "")
    tgt = row.get("Target", "")
    nxt = row.get("Next", "")

    def _clean(x):
        if isinstance(x, str):
            return x
        if pd.isna(x):
            return ""
        return str(x)

    prev = _clean(prev)
    tgt = _clean(tgt)
    nxt = _clean(nxt)
    return " ".join([prev, tgt, nxt]).strip()


def get_mwe_head(mwe: str) -> str:
    if not isinstance(mwe, str):
        return ""
    toks = mwe.split()
    return toks[-1] if toks else ""


def simple_lesk_nltk(context_sentence: str, ambiguous_word: str):
    """
    Very small Lesk-style WSD using NLTK WordNet.
    Returns a Synset or None.
    """
    if not context_sentence or not ambiguous_word:
        return None

    # preprocess context
    tokens = [
        w.strip(string.punctuation).lower()
        for w in context_sentence.split()
    ]
    context = [w for w in tokens if w and w not in STOP_WORDS]

    synsets = wn.synsets(ambiguous_word)
    if not synsets:
        return None

    best_syn = None
    max_overlap = 0

    for syn in synsets:
        sig_tokens = syn.definition().split()
        for ex in syn.examples():
            sig_tokens += ex.split()

        sig_tokens = [
            w.strip(string.punctuation).lower()
            for w in sig_tokens
        ]
        signature = [w for w in sig_tokens if w and w not in STOP_WORDS]

        overlap = len(set(signature) & set(context))
        if overlap > max_overlap:
            max_overlap = overlap
            best_syn = syn

    return best_syn


def annotate_with_wsd(df: pd.DataFrame) -> pd.DataFrame:
    """
    Adds SenseID and SenseGloss columns for EN rows using simple_lesk_nltk.
    Operates in-memory; no files written.
    """
    df = df.copy()
    sense_ids = []
    sense_glosses = []

    for _, row in df.iterrows():
        sent = build_simple_sentence(row)
        mwe = row.get("MWE", "") or ""
        head = get_mwe_head(mwe)

        if not sent or not head:
            sense_ids.append("")
            sense_glosses.append("")
            continue

        try:
            synset = simple_lesk_nltk(sent, head)
        except Exception:
            synset = None

        if synset is None:
            sense_ids.append("")
            sense_glosses.append("")
        else:
            sense_ids.append(synset.name())         # e.g. 'bank.n.02'
            sense_glosses.append(synset.definition())

    df["SenseID"] = sense_ids
    df["SenseGloss"] = sense_glosses
    return df


def prepare_supervised_frame_en(
    train_path: Path,
    dev_path: Path,
    dev_gold_path: Path,
) -> Tuple[pd.DataFrame, pd.DataFrame]:
    """
    Prepare EN train/dev frames with optional WSD gloss appended to 'text'.
    """
    train_df = load_any_csv(train_path)
    dev_df = load_any_csv(dev_path)
    gold_df = load_any_csv(dev_gold_path)

    train_df.columns = [c.strip() for c in train_df.columns]
    dev_df.columns = [c.strip() for c in dev_df.columns]
    gold_df.columns = [c.strip() for c in gold_df.columns]

    train_df = train_df[train_df["Language"] == "EN"].copy()
    dev_df = dev_df[dev_df["Language"] == "EN"].copy()

    gold = gold_df[gold_df["Language"] == "EN"][["ID", "Label"]].copy()
    gold["ID"] = gold["ID"].astype(str)
    dev_df["ID"] = dev_df["ID"].astype(str)
    dev_lab = dev_df.merge(gold, on="ID", how="left")

    train_df = ensure_label_int(train_df, "Label")
    dev_lab = ensure_label_int(dev_lab, "Label")

    if USE_WSD:
        print("Annotating EN train/dev with WSD (this might take a bit)...")
        train_df = annotate_with_wsd(train_df)
        dev_lab = annotate_with_wsd(dev_lab)

    def _build_text(row):
        ctx = pack_context(
            row.get("Previous", ""),
            row.get("Target", ""),
            row.get("Next", ""),
            row.get("MWE", ""),
        )
        if USE_WSD:
            gloss = (row.get("SenseGloss", "") or "").strip()
            if gloss:
                return ctx + f"\n\nSense gloss (WordNet): {gloss}"
        return ctx

    train_df["text"] = train_df.apply(_build_text, axis=1)
    dev_lab["text"] = dev_lab.apply(_build_text, axis=1)

    return train_df, dev_lab


def prepare_eval_frame_en(eval_path: Path) -> pd.DataFrame:
    """
    Prepare EN eval frame (no labels) with optional WSD.
    """
    df = load_any_csv(eval_path)
    df.columns = [c.strip() for c in df.columns]
    df = df[df["Language"] == "EN"].copy()

    if USE_WSD:
        print("Annotating EN eval with WSD (this might take a bit)...")
        df = annotate_with_wsd(df)

    def _build_text(row):
        ctx = pack_context(
            row.get("Previous", ""),
            row.get("Target", ""),
            row.get("Next", ""),
            row.get("MWE", ""),
        )
        if USE_WSD:
            gloss = (row.get("SenseGloss", "") or "").strip()
            if gloss:
                return ctx + f"\n\nSense gloss (WordNet): {gloss}"
        return ctx

    df["text"] = df.apply(_build_text, axis=1)
    return df


## Dataset, dataloader, XLM-R model + tokenizer

In [4]:
# 03 - Dataset, dataloader, XLM-R model + tokenizer

class IdiomDataset(torch.utils.data.Dataset):
    def __init__(self, df: pd.DataFrame, tokenizer, max_length: int = 256, with_labels: bool = True):
        self.texts = df["text"].tolist()
        self.with_labels = with_labels
        self.labels = df["Label"].tolist() if with_labels and "Label" in df.columns else None
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx: int):
        text = self.texts[idx]
        enc = self.tokenizer(
            text,
            padding="max_length",
            truncation=True,
            max_length=self.max_length,
            return_tensors="pt",
        )
        item = {
            "input_ids": enc["input_ids"].squeeze(0),
            "attention_mask": enc["attention_mask"].squeeze(0),
        }
        if self.with_labels and self.labels is not None:
            item["labels"] = torch.tensor(self.labels[idx], dtype=torch.long)
        return item


def build_model_and_tokenizer(move_to_device: bool = True):
    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
    special_tokens = {"additional_special_tokens": ["<mwe>", "</mwe>"]}
    tokenizer.add_special_tokens(special_tokens)

    model = AutoModelForSequenceClassification.from_pretrained(
        MODEL_NAME,
        num_labels=NUM_LABELS,
    )
    model.resize_token_embeddings(len(tokenizer))

    if move_to_device:
        model.to(device)

    return model, tokenizer


def make_loader(df: pd.DataFrame, tokenizer, batch_size: int, shuffle: bool, max_length: int = 256, with_labels: bool = True):
    ds = IdiomDataset(df, tokenizer, max_length=max_length, with_labels=with_labels)
    return torch.utils.data.DataLoader(
        ds,
        batch_size=batch_size,
        shuffle=shuffle,
        num_workers=2 if device.type == "cuda" else 0,
    )


## Training, prediction utilities

In [5]:
# 04 - Training epoch, prediction utilities

def run_epoch(
    model,
    dataloader,
    optimizer,
    scheduler,
    train: bool = True,
    class_weights: torch.Tensor = None,
) -> Tuple[float, float]:
    """
    Run one epoch (train or eval). Returns (avg_loss, macro_F1).
    If train=False, optimizer and scheduler are not used.
    """
    if train:
        model.train()
    else:
        model.eval()

    total_loss = 0.0
    all_y_true = []
    all_y_pred = []

    if class_weights is not None:
        class_weights = class_weights.to(device)

    for batch in dataloader:
        input_ids = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        labels = batch["labels"].to(device)

        with torch.set_grad_enabled(train):
            outputs = model(
                input_ids=input_ids,
                attention_mask=attention_mask,
                labels=labels,
            )
            logits = outputs.logits
            if class_weights is not None:
                log_probs = torch.log_softmax(logits, dim=-1)
                loss = torch.nn.functional.nll_loss(
                    log_probs,
                    labels,
                    weight=class_weights,
                )
            else:
                loss = outputs.loss

            if train:
                optimizer.zero_grad()
                loss.backward()
                torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
                optimizer.step()
                if scheduler is not None:
                    scheduler.step()

        total_loss += loss.item() * labels.size(0)
        preds = torch.argmax(logits, dim=-1)
        all_y_true.extend(labels.detach().cpu().tolist())
        all_y_pred.extend(preds.detach().cpu().tolist())

    avg_loss = total_loss / max(1, len(all_y_true))
    macro_f1 = f1_score(all_y_true, all_y_pred, average="macro")
    return avg_loss, macro_f1


def predict(model, dataloader) -> Tuple[List[int], List[int]]:
    """
    Returns (y_true, y_pred) for labeled data.
    """
    model.eval()
    all_y_true = []
    all_y_pred = []

    with torch.no_grad():
        for batch in dataloader:
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            labels = batch["labels"].to(device)

            outputs = model(
                input_ids=input_ids,
                attention_mask=attention_mask,
            )
            logits = outputs.logits
            preds = torch.argmax(logits, dim=-1)

            all_y_true.extend(labels.detach().cpu().tolist())
            all_y_pred.extend(preds.detach().cpu().tolist())

    return all_y_true, all_y_pred


def predict_no_labels(model, dataloader) -> List[int]:
    """
    Returns predicted labels for unlabeled data (eval set).
    """
    model.eval()
    all_y_pred = []

    with torch.no_grad():
        for batch in dataloader:
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)

            outputs = model(
                input_ids=input_ids,
                attention_mask=attention_mask,
            )
            logits = outputs.logits
            preds = torch.argmax(logits, dim=-1)

            all_y_pred.extend(preds.detach().cpu().tolist())

    return all_y_pred


## Train EN XLM-R (+/- WSD), evaluate on dev + eval

In [6]:
# 05 - Train EN XLM-R (+/- WSD) and evaluate on EN dev + eval

if TRAIN_SETTING == "one_shot":
    train_path = TRAIN_ONE_SHOT
elif TRAIN_SETTING == "zero_shot":
    train_path = TRAIN_ZERO_SHOT
else:
    raise ValueError(f"Unknown TRAIN_SETTING: {TRAIN_SETTING}")

print("\nPreparing EN train/dev frames...")
train_en_df, dev_en_df = prepare_supervised_frame_en(
    train_path=train_path,
    dev_path=DEV,
    dev_gold_path=DEV_GOLD,
)
print(f"EN train size: {len(train_en_df)}, EN dev size: {len(dev_en_df)}")

# Class weights for EN training
label_counts = train_en_df["Label"].value_counts().to_dict()
print("EN train label counts:", label_counts)
total = sum(label_counts.values())
class_weights = [total / (2.0 * label_counts.get(i, 1)) for i in range(NUM_LABELS)]
class_weights_tensor = torch.tensor(class_weights, dtype=torch.float)
print("EN class weights:", class_weights)

# Build model + tokenizer
model, tokenizer = build_model_and_tokenizer(move_to_device=True)

# Dataloaders
train_loader_en = make_loader(
    train_en_df,
    tokenizer,
    batch_size=BATCH_SIZE,
    shuffle=True,
    max_length=256,
    with_labels=True,
)
dev_loader_en = make_loader(
    dev_en_df,
    tokenizer,
    batch_size=BATCH_SIZE,
    shuffle=False,
    max_length=256,
    with_labels=True,
)

# Optimizer & scheduler
num_training_steps = NUM_EPOCHS * len(train_loader_en)
optimizer = torch.optim.AdamW(
    model.parameters(),
    lr=LR,
    weight_decay=WEIGHT_DECAY,
)
scheduler = get_linear_schedule_with_warmup(
    optimizer,
    num_warmup_steps=int(WARMUP_RATIO * num_training_steps),
    num_training_steps=num_training_steps,
)

best_dev_f1_en = -1.0
suffix = "wsd" if USE_WSD else "baseline"
best_path_en = OUT_DIR / f"xlmr_en_{TRAIN_SETTING}_{suffix}_best.pt"

for epoch in range(1, NUM_EPOCHS + 1):
    print(f"\n=== Epoch {epoch}/{NUM_EPOCHS} (EN {TRAIN_SETTING}, USE_WSD={USE_WSD}) ===")
    train_loss, train_f1 = run_epoch(
        model,
        train_loader_en,
        optimizer,
        scheduler,
        train=True,
        class_weights=class_weights_tensor,
    )
    print(f"[EN train] loss={train_loss:.4f} macro-F1={train_f1:.4f}")

    dev_loss, dev_f1 = run_epoch(
        model,
        dev_loader_en,
        optimizer=None,
        scheduler=None,
        train=False,
        class_weights=None,
    )
    print(f"[EN dev]   loss={dev_loss:.4f} macro-F1={dev_f1:.4f}")

    if dev_f1 > best_dev_f1_en:
        best_dev_f1_en = dev_f1
        torch.save(model.state_dict(), best_path_en)
        print(f"New best EN dev macro-F1={best_dev_f1_en:.4f} -> saved to {best_path_en}")

print(f"\nBest EN dev macro-F1 ({TRAIN_SETTING}, USE_WSD={USE_WSD}): {best_dev_f1_en:.4f}")

# Reload best checkpoint and evaluate final dev + eval
model_best, tokenizer_best = build_model_and_tokenizer(move_to_device=True)
state = torch.load(best_path_en, map_location=device)
model_best.load_state_dict(state)
model_best.to(device)
model_best.eval()
print(f"Loaded best checkpoint from: {best_path_en}")

# Dev evaluation
dev_loader_en_best = make_loader(
    dev_en_df,
    tokenizer_best,
    batch_size=BATCH_SIZE,
    shuffle=False,
    max_length=256,
    with_labels=True,
)
ytrue_dev, ypred_dev = predict(model_best, dev_loader_en_best)
dev_f1_final = f1_score(ytrue_dev, ypred_dev, average="macro")
print(f"\n[EN dev final] macro-F1={dev_f1_final:.4f}")
print(classification_report(ytrue_dev, ypred_dev, digits=4))
print(confusion_matrix(ytrue_dev, ypred_dev))

# Eval predictions
eval_en_df = prepare_eval_frame_en(EVAL)
eval_loader_en = make_loader(
    eval_en_df,
    tokenizer_best,
    batch_size=BATCH_SIZE,
    shuffle=False,
    max_length=256,
    with_labels=False,
)
eval_preds = predict_no_labels(model_best, eval_loader_en)

setting_str = TRAIN_SETTING
sub_en = pd.DataFrame({
    "ID": eval_en_df["ID"].astype(str),
    "Language": eval_en_df["Language"],
    "Setting": [setting_str] * len(eval_en_df),
    "Label": eval_preds,
})
sub_path_en = OUT_DIR / f"eval_submission_en_xlmr_{setting_str}_{suffix}.csv"
sub_en.to_csv(sub_path_en, index=False)
print(f"\nWrote EN eval submission to: {sub_path_en}")



Preparing EN train/dev frames...
Annotating EN train/dev with WSD (this might take a bit)...
EN train size: 3327, EN dev size: 466
EN train label counts: {0: 1762, 1: 1565}
EN class weights: [0.9440976163450624, 1.0629392971246006]


2025-11-26 22:34:34.700322: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-26 22:34:34.712254: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1764225274.721466  482884 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1764225274.724153  482884 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1764225274.733398  482884 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking 


=== Epoch 1/3 (EN zero_shot, USE_WSD=True) ===
[EN train] loss=0.6191 macro-F1=0.6297
[EN dev]   loss=0.7501 macro-F1=0.7320
New best EN dev macro-F1=0.7320 -> saved to outputs_en_xlmr_wsd/xlmr_en_zero_shot_wsd_best.pt

=== Epoch 2/3 (EN zero_shot, USE_WSD=True) ===
[EN train] loss=0.2104 macro-F1=0.9399
[EN dev]   loss=1.0133 macro-F1=0.7939
New best EN dev macro-F1=0.7939 -> saved to outputs_en_xlmr_wsd/xlmr_en_zero_shot_wsd_best.pt

=== Epoch 3/3 (EN zero_shot, USE_WSD=True) ===
[EN train] loss=0.0755 macro-F1=0.9834
[EN dev]   loss=1.0156 macro-F1=0.8166
New best EN dev macro-F1=0.8166 -> saved to outputs_en_xlmr_wsd/xlmr_en_zero_shot_wsd_best.pt

Best EN dev macro-F1 (zero_shot, USE_WSD=True): 0.8166


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Loaded best checkpoint from: outputs_en_xlmr_wsd/xlmr_en_zero_shot_wsd_best.pt

[EN dev final] macro-F1=0.8166
              precision    recall  f1-score   support

           0     0.8072    0.7363    0.7701       182
           1     0.8400    0.8873    0.8630       284

    accuracy                         0.8283       466
   macro avg     0.8236    0.8118    0.8166       466
weighted avg     0.8272    0.8283    0.8267       466

[[134  48]
 [ 32 252]]
Annotating EN eval with WSD (this might take a bit)...

Wrote EN eval submission to: outputs_en_xlmr_wsd/eval_submission_en_xlmr_zero_shot_wsd.csv


## Save run metadata

In [None]:
# 06 - Save run metadata

with open(OUT_DIR / f"run_en_xlmr_{TRAIN_SETTING}_{suffix}.txt", "w") as f:
    f.write(f"MODEL_NAME={MODEL_NAME}\n")
    f.write(f"DEVICE={device.type}\n")
    f.write(f"TRAIN_SETTING={TRAIN_SETTING}\n")
    f.write(f"USE_WSD={USE_WSD}\n")
    f.write(f"NUM_EPOCHS={NUM_EPOCHS}\n")
    f.write(f"BATCH_SIZE={BATCH_SIZE}\n")
    f.write(f"LR={LR}\n")
    f.write(f"BEST_EN_DEV_F1={best_dev_f1_en:.4f}\n")

print("Saved run metadata.")
print("Outputs directory:", OUT_DIR)


Saved run metadata.
Outputs directory: outputs_en_xlmr_wsd


: 