## Setup: CPU-only, imports, seeds, paths

In [5]:
# 01 - Setup: CPU-only, imports, seeds, paths

import os
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
os.environ["TOKENIZERS_PARALLELISM"] = "false"

import random
import numpy as np
import pandas as pd
from pathlib import Path
from typing import List, Tuple, Optional

import torch
torch.backends.cudnn.enabled = False
torch.set_num_threads(max(1, os.cpu_count() // 2))
device = torch.device("cpu")

from sklearn.metrics import f1_score, classification_report
from sklearn.model_selection import train_test_split

from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    get_linear_schedule_with_warmup
)
from torch.utils.data import Dataset, DataLoader

SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)

DATA_DIR = Path("SemEval_2022_Task2-idiomaticity/SubTaskA")
TRAIN_ONE_SHOT = DATA_DIR / "Data" / "train_one_shot.csv"
TRAIN_ZERO_SHOT = DATA_DIR / "Data" / "train_zero_shot.csv"
DEV = DATA_DIR / "Data" / "dev.csv"
DEV_GOLD = DATA_DIR / "Data" / "dev_gold.csv"
EVAL = DATA_DIR / "Data" / "eval.csv"
EVAL_SUB_FMT = DATA_DIR / "Data" / "eval_submission_format.csv"

OUT_DIR = Path("outputs_en_xlmr")
OUT_DIR.mkdir(parents=True, exist_ok=True)

MODEL_NAME = "xlm-roberta-base"  # change to xlm-roberta-large if you can tolerate slower CPU training
BATCH_SIZE = 8                   # CPU-friendly; lower if memory constrained
MAX_LEN = 256                    # long enough for Prev/Target/Next
EPOCHS = 2                       # keep small for CPU; adjust if you have patience
LR = 2e-5
WARMUP_RATIO = 0.06
WEIGHT_DECAY = 0.01

## IO helpers and data preparation for Subtask A (EN only)

In [6]:
# 02 - IO helpers and data preparation for Subtask A (EN only)

def load_any_csv(path: Path) -> pd.DataFrame:
    # auto-detect separator
    return pd.read_csv(path, sep=None, engine="python", dtype=str)

def ensure_label_int(df: pd.DataFrame, col="Label") -> pd.DataFrame:
    if col in df.columns:
        df[col] = df[col].astype(int)
    return df

def mark_first_case_insensitive(text: str, needle: str, ltag: str="<mwe>", rtag: str="</mwe>") -> str:
    if not isinstance(text, str) or not isinstance(needle, str):
        return text
    low_t = text.lower()
    low_n = needle.lower()
    idx = low_t.find(low_n)
    if idx == -1:
        return text
    return text[:idx] + ltag + text[idx:idx+len(needle)] + rtag + text[idx+len(needle):]

def build_input(prev: str, target: str, nxt: str, mwe: str, sep_token: str) -> str:
    target_marked = mark_first_case_insensitive(target, mwe, "<mwe>", "</mwe>")
    # XLM-R sep token is '</s>'; include context on both sides
    prev = "" if pd.isna(prev) else prev
    nxt = "" if pd.isna(nxt) else nxt
    return f"{prev} {sep_token} {target_marked} {sep_token} {nxt}".strip()

def prepare_supervised_frame(train_path: Path, dev_path: Path, dev_gold_path: Path, language="EN") -> Tuple[pd.DataFrame, pd.DataFrame]:
    train_df = load_any_csv(train_path)
    dev_df = load_any_csv(dev_path)
    gold_df = load_any_csv(dev_gold_path)

    # Harmonize columns
    train_df.columns = [c.strip() for c in train_df.columns]
    dev_df.columns = [c.strip() for c in dev_df.columns]
    gold_df.columns = [c.strip() for c in gold_df.columns]

    # Filter EN
    train_df = train_df[train_df["Language"] == language].copy()
    dev_df = dev_df[dev_df["Language"] == language].copy()

    # Attach gold labels to dev
    dev_gold = gold_df[gold_df["Language"] == language][["ID", "Label"]].copy()
    dev_gold["ID"] = dev_gold["ID"].astype(str)
    dev_df["ID"] = dev_df["ID"].astype(str)
    dev_labeled = dev_df.merge(dev_gold, on="ID", how="left")
    dev_labeled = ensure_label_int(dev_labeled, "Label")

    # Ensure train label int
    train_df = ensure_label_int(train_df, "Label")

    return train_df, dev_labeled

def prepare_eval_frame(eval_path: Path, language="EN") -> pd.DataFrame:
    eval_df = load_any_csv(eval_path)
    eval_df.columns = [c.strip() for c in eval_df.columns]
    eval_df = eval_df[eval_df["Language"] == language].copy()
    return eval_df

## Dataset & collate for XLM-R

In [7]:
# 03 - Dataset & collate for XLM-R

class IdiomDataset(Dataset):
    def __init__(self, df: pd.DataFrame, tokenizer, max_len: int, is_infer: bool=False):
        self.df = df.reset_index(drop=True)
        self.tokenizer = tokenizer
        self.max_len = max_len
        self.is_infer = is_infer

        self.sep = tokenizer.sep_token if tokenizer.sep_token is not None else "</s>"
        self.texts = []
        self.labels = []

        for i, row in self.df.iterrows():
            prev = row.get("Previous", "")
            target = row.get("Target", "")
            nxt = row.get("Next", "")
            mwe = row.get("MWE", "")
            text = build_input(prev, target, nxt, mwe, self.sep)
            self.texts.append(text)
            if not is_infer:
                self.labels.append(int(row["Label"]))

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        item = {
            "text": self.texts[idx]
        }
        if not self.is_infer:
            item["label"] = self.labels[idx]
        return item

def collate_fn(batch, tokenizer, max_len: int, is_infer: bool=False):
    texts = [b["text"] for b in batch]
    enc = tokenizer(
        texts,
        padding=True,
        truncation=True,
        max_length=max_len,
        return_tensors="pt"
    )
    if not is_infer:
        labels = torch.tensor([b["label"] for b in batch], dtype=torch.long)
        return enc, labels
    return enc

## Model, training, evaluation (CPU-only) + weighted-loss variant

In [15]:
# 04 - Model, training, evaluation (CPU-only) + weighted-loss variant

def build_model_and_tokenizer(model_name: str):
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    special_tokens = {"additional_special_tokens": ["<mwe>", "</mwe>"]}
    tokenizer.add_special_tokens(special_tokens)

    model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
    model.resize_token_embeddings(len(tokenizer))
    model.to(device)
    return model, tokenizer

def run_epoch(model, loader, tokenizer, optimizer, scheduler, train_mode: bool):
    if train_mode:
        model.train()
    else:
        model.eval()

    all_preds = []
    all_labels = []
    total_loss = 0.0

    for batch in loader:
        if train_mode:
            optimizer.zero_grad()

        enc, labels = batch
        enc = {k: v.to(device) for k, v in enc.items()}
        labels = labels.to(device)

        with torch.set_grad_enabled(train_mode):
            out = model(**enc, labels=labels)
            loss = out.loss
            logits = out.logits

            if train_mode:
                loss.backward()
                torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
                optimizer.step()
                if scheduler is not None:
                    scheduler.step()

        total_loss += loss.item() * labels.size(0)
        preds = torch.argmax(logits, dim=-1).detach().cpu().numpy().tolist()
        labs = labels.detach().cpu().numpy().tolist()
        all_preds.extend(preds)
        all_labels.extend(labs)

    avg_loss = total_loss / len(all_labels)
    macro_f1 = f1_score(all_labels, all_preds, average="macro")
    return avg_loss, macro_f1, all_labels, all_preds

# ---- weighted-loss version for tiny/imbalanced one-shot ----
import torch.nn.functional as F

def run_epoch_weighted(model, loader, tokenizer, optimizer, scheduler, train_mode: bool, class_weights: Optional[torch.Tensor]=None):
    if train_mode:
        model.train()
    else:
        model.eval()

    all_preds = []
    all_labels = []
    total_loss = 0.0

    for batch in loader:
        if train_mode:
            optimizer.zero_grad()

        enc, labels = batch
        enc = {k: v.to(device) for k, v in enc.items()}
        labels = labels.to(device)

        with torch.set_grad_enabled(train_mode):
            out = model(**enc)
            logits = out.logits
            if class_weights is None:
                loss = F.cross_entropy(logits, labels)
            else:
                loss = F.cross_entropy(logits, labels, weight=class_weights)

            if train_mode:
                loss.backward()
                torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
                optimizer.step()
                if scheduler is not None:
                    scheduler.step()

        total_loss += loss.item() * labels.size(0)
        preds = torch.argmax(logits, dim=-1).detach().cpu().numpy().tolist()
        labs = labels.detach().cpu().numpy().tolist()
        all_preds.extend(preds)
        all_labels.extend(labs)

    avg_loss = total_loss / len(all_labels)
    macro_f1 = f1_score(all_labels, all_preds, average="macro")
    return avg_loss, macro_f1, all_labels, all_preds

@torch.no_grad()
def predict(model, loader, tokenizer):
    model.eval()
    all_preds = []
    for batch in loader:
        enc = batch
        enc = {k: v.to(device) for k, v in enc.items()}
        logits = model(**enc).logits
        preds = torch.argmax(logits, dim=-1).cpu().numpy().tolist()
        all_preds.extend(preds)
    return all_preds

## Data loaders factory

In [9]:
# 05 - Data loaders factory

def make_loaders(train_df: pd.DataFrame, dev_df: pd.DataFrame, tokenizer, max_len: int, batch_size: int):
    train_ds = IdiomDataset(train_df, tokenizer, max_len, is_infer=False)
    dev_ds = IdiomDataset(dev_df, tokenizer, max_len, is_infer=False)

    train_loader = DataLoader(
        train_ds,
        batch_size=batch_size,
        shuffle=True,
        collate_fn=lambda b: collate_fn(b, tokenizer, max_len, is_infer=False)
    )
    dev_loader = DataLoader(
        dev_ds,
        batch_size=batch_size,
        shuffle=False,
        collate_fn=lambda b: collate_fn(b, tokenizer, max_len, is_infer=False)
    )
    return train_loader, dev_loader

def make_infer_loader(df: pd.DataFrame, tokenizer, max_len: int, batch_size: int):
    ds = IdiomDataset(df, tokenizer, max_len, is_infer=True)
    loader = DataLoader(
        ds,
        batch_size=batch_size,
        shuffle=False,
        collate_fn=lambda b: collate_fn(b, tokenizer, max_len, is_infer=True)
    )
    return loader


## Training runner

In [10]:
# 06 - Training runner

def train_and_eval(
    train_df: pd.DataFrame,
    dev_df: pd.DataFrame,
    run_name: str,
    model_name: str = MODEL_NAME,
    max_len: int = MAX_LEN,
    batch_size: int = BATCH_SIZE,
    epochs: int = EPOCHS,
    lr: float = LR,
    weight_decay: float = WEIGHT_DECAY,
    warmup_ratio: float = WARMUP_RATIO
):
    ckpt_dir = OUT_DIR / f"ckpt_{run_name}"
    ckpt_dir.mkdir(parents=True, exist_ok=True)

    model, tokenizer = build_model_and_tokenizer(model_name)
    train_loader, dev_loader = make_loaders(train_df, dev_df, tokenizer, max_len, batch_size)

    total_steps = epochs * len(train_loader)
    warmup_steps = max(1, int(warmup_ratio * total_steps))
    optimizer = torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=weight_decay)
    scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=warmup_steps, num_training_steps=total_steps)

    best_f1 = -1.0
    best_path = ckpt_dir / "best.pt"

    for epoch in range(1, epochs + 1):
        tr_loss, tr_f1, _, _ = run_epoch(model, train_loader, tokenizer, optimizer, scheduler, train_mode=True)
        dv_loss, dv_f1, y_true, y_pred = run_epoch(model, dev_loader, tokenizer, optimizer=None, scheduler=None, train_mode=False)

        print(f"[{run_name}] Epoch {epoch}/{epochs} | Train loss {tr_loss:.4f} F1 {tr_f1:.4f} | Dev loss {dv_loss:.4f} F1 {dv_f1:.4f}")

        if dv_f1 > best_f1:
            best_f1 = dv_f1
            torch.save({"model_state": model.state_dict(), "tokenizer": tokenizer.get_vocab(), "f1": best_f1}, best_path)

    print(f"[{run_name}] Best dev macro-F1: {best_f1:.4f}")
    return best_path, tokenizer


## One-shot (EN) with class-weighted loss, longer training on CPU, and eval submission

In [17]:
# 07 - One-shot (EN) with class-weighted loss, longer training on CPU, and eval submission

from collections import Counter

train_1s_df, dev_1s_df = prepare_supervised_frame(TRAIN_ONE_SHOT, DEV, DEV_GOLD, language="EN")

label_counts = Counter(train_1s_df["Label"].astype(int).tolist())
num0, num1 = label_counts.get(0, 1), label_counts.get(1, 1)
total = num0 + num1
w0 = total / (2.0 * num0)
w1 = total / (2.0 * num1)
class_weights = torch.tensor([w0, w1], dtype=torch.float32, device=device)

model_1s, tok_1s = build_model_and_tokenizer(MODEL_NAME)
train_loader_1s, dev_loader_1s = make_loaders(train_1s_df, dev_1s_df, tok_1s, MAX_LEN, BATCH_SIZE)

EPOCHS_1S = 6
LR_1S = 1.5e-5
WARMUP_RATIO_1S = 0.1

total_steps = EPOCHS_1S * len(train_loader_1s)
warmup_steps = max(1, int(WARMUP_RATIO_1S * total_steps))
optimizer_1s = torch.optim.AdamW(model_1s.parameters(), lr=LR_1S, weight_decay=WEIGHT_DECAY)
scheduler_1s = get_linear_schedule_with_warmup(optimizer_1s, num_warmup_steps=warmup_steps, num_training_steps=total_steps)

best_f1 = -1.0
best_dir_1s = OUT_DIR / "ckpt_oneshot_en_xlmr_weighted"
best_dir_1s.mkdir(parents=True, exist_ok=True)
best_file_1s = best_dir_1s / "best.pt"

for epoch in range(1, EPOCHS_1S + 1):
    tr_loss, tr_f1, _, _ = run_epoch_weighted(model_1s, train_loader_1s, tok_1s, optimizer_1s, scheduler_1s, train_mode=True, class_weights=class_weights)
    dv_loss, dv_f1, y_true, y_pred = run_epoch_weighted(model_1s, dev_loader_1s, tok_1s, optimizer_1s, scheduler_1s, train_mode=False, class_weights=None)
    print(f"[oneshot_en_weighted] Epoch {epoch}/{EPOCHS_1S} | Train loss {tr_loss:.4f} F1 {tr_f1:.4f} | Dev loss {dv_loss:.4f} F1 {dv_f1:.4f}")
    if dv_f1 > best_f1:
        best_f1 = dv_f1
        torch.save({"model_state": model_1s.state_dict(), "f1": best_f1}, best_file_1s)

print(f"[oneshot_en_weighted] Best dev macro-F1: {best_f1:.4f}")

# ---- reload best (CPU) and produce eval predictions ----
model_1s_eval = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=2)
tok_1s.add_special_tokens({"additional_special_tokens": ["<mwe>", "</mwe>"]})
model_1s_eval.resize_token_embeddings(len(tok_1s))
state = torch.load(best_file_1s, map_location="cpu")
model_1s_eval.load_state_dict(state["model_state"])
model_1s_eval.to(device)
model_1s_eval.eval()

eval_en_df = prepare_eval_frame(EVAL, language="EN")
eval_loader_1s = make_infer_loader(eval_en_df, tok_1s, MAX_LEN, BATCH_SIZE)
eval_preds_1s = predict(model_1s_eval, eval_loader_1s, tok_1s)

sub_1s = pd.DataFrame({
    "ID": eval_en_df["ID"].astype(str),
    "Language": eval_en_df["Language"],
    "Setting": ["zero_shot"] * len(eval_en_df),
    "Label": eval_preds_1s
})
sub_path_1s = OUT_DIR / "eval_submission_en_oneshot_weighted.csv"
sub_1s.to_csv(sub_path_1s, index=False)
print(f"Wrote {sub_path_1s}")

Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[oneshot_en_weighted] Epoch 1/6 | Train loss 0.6872 F1 0.5220 | Dev loss 0.6936 F1 0.3155
[oneshot_en_weighted] Epoch 2/6 | Train loss 0.6971 F1 0.3746 | Dev loss 0.6950 F1 0.2809
[oneshot_en_weighted] Epoch 3/6 | Train loss 0.6827 F1 0.5402 | Dev loss 0.6899 F1 0.6680
[oneshot_en_weighted] Epoch 4/6 | Train loss 0.6842 F1 0.5646 | Dev loss 0.6862 F1 0.4116
[oneshot_en_weighted] Epoch 5/6 | Train loss 0.6793 F1 0.5735 | Dev loss 0.6864 F1 0.5660
[oneshot_en_weighted] Epoch 6/6 | Train loss 0.6633 F1 0.5496 | Dev loss 0.6851 F1 0.4906
[oneshot_en_weighted] Best dev macro-F1: 0.6680


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Wrote outputs_en_xlmr/eval_submission_en_oneshot_weighted.csv


## Zero-shot (EN): load, train, eval, and write eval predictions

In [18]:
# 08 - Zero-shot (EN): load, train, eval, and write eval predictions

train_0s_df, dev_0s_df = prepare_supervised_frame(TRAIN_ZERO_SHOT, DEV, DEV_GOLD, language="EN")

best_zeroshot_path, zeroshot_tokenizer = train_and_eval(
    train_df=train_0s_df,
    dev_df=dev_0s_df,
    run_name="zeroshot_en_xlmr"
)

# Reload best for inference
model_zeroshot = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=2)
special_tokens = {"additional_special_tokens": ["<mwe>", "</mwe>"]}
zeroshot_tokenizer.add_special_tokens(special_tokens)
model_zeroshot.resize_token_embeddings(len(zeroshot_tokenizer))
state = torch.load(OUT_DIR / "ckpt_zeroshot_en_xlmr" / "best.pt", map_location="cpu")
model_zeroshot.load_state_dict(state["model_state"])
model_zeroshot.to(device)
model_zeroshot.eval()

eval_en_df = prepare_eval_frame(EVAL, language="EN")
eval_loader_0s = make_infer_loader(eval_en_df, zeroshot_tokenizer, MAX_LEN, BATCH_SIZE)
eval_preds_0s = predict(model_zeroshot, eval_loader_0s, zeroshot_tokenizer)

sub_0s = pd.DataFrame({
    "ID": eval_en_df["ID"].astype(str),
    "Language": eval_en_df["Language"],
    "Setting": ["zero_shot"] * len(eval_en_df),
    "Label": eval_preds_0s
})
sub_path_0s = OUT_DIR / "eval_submission_en_zeroshot.csv"
sub_0s.to_csv(sub_path_0s, index=False)
print(f"Wrote {sub_path_0s}")

Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


KeyboardInterrupt: 

## Dev-set diagnostics: confusion matrix & report for best runs

In [19]:
# 09 - Dev-set diagnostics: confusion matrix & report for best runs

from collections import Counter
from sklearn.metrics import confusion_matrix

def eval_on_dev(best_ckpt_path: Path, tokenizer, dev_df: pd.DataFrame, tag: str):
    model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=2)
    tokenizer.add_special_tokens({"additional_special_tokens": ["<mwe>", "</mwe>"]})
    model.resize_token_embeddings(len(tokenizer))
    state = torch.load(best_ckpt_path, map_location="cpu")
    model.load_state_dict(state["model_state"])
    model.to(device)
    model.eval()

    dev_loader = DataLoader(
        IdiomDataset(dev_df, tokenizer, MAX_LEN, is_infer=False),
        batch_size=BATCH_SIZE,
        shuffle=False,
        collate_fn=lambda b: collate_fn(b, tokenizer, MAX_LEN, is_infer=False)
    )

    _, f1, y_true, y_pred = run_epoch(model, dev_loader, tokenizer, optimizer=None, scheduler=None, train_mode=False)
    print(f"[{tag}] Dev macro-F1: {f1:.4f}")
    print(classification_report(y_true, y_pred, digits=4))
    print(confusion_matrix(y_true, y_pred))

# evaluate weighted one-shot
eval_on_dev(OUT_DIR / "ckpt_oneshot_en_xlmr_weighted" / "best.pt", tok_1s, dev_1s_df, "One-shot EN XLM-R (weighted)")

# evaluate zero-shot (original)
eval_on_dev(OUT_DIR / "ckpt_zeroshot_en_xlmr" / "best.pt", zeroshot_tokenizer, dev_0s_df, "Zero-shot EN XLM-R")


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[One-shot EN XLM-R (weighted)] Dev macro-F1: 0.6680
              precision    recall  f1-score   support

           0     0.6885    0.4615    0.5526       182
           1     0.7151    0.8662    0.7834       284

    accuracy                         0.7082       466
   macro avg     0.7018    0.6639    0.6680       466
weighted avg     0.7047    0.7082    0.6933       466

[[ 84  98]
 [ 38 246]]


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[Zero-shot EN XLM-R] Dev macro-F1: 0.7451
              precision    recall  f1-score   support

           0     0.6667    0.7363    0.6997       182
           1     0.8189    0.7641    0.7905       284

    accuracy                         0.7532       466
   macro avg     0.7428    0.7502    0.7451       466
weighted avg     0.7594    0.7532    0.7551       466

[[134  48]
 [ 67 217]]


## Save final best checkpoints to disk

In [20]:
# 10 - Save final best checkpoints to disk

final_oneshot_dir = OUT_DIR / "final_oneshot_en_xlmr"
final_zeroshot_dir = OUT_DIR / "final_zeroshot_en_xlmr"
final_oneshot_dir.mkdir(parents=True, exist_ok=True)
final_zeroshot_dir.mkdir(parents=True, exist_ok=True)

# Save tokenizer vocab and model weights for reproducibility
# One-shot
torch.save(torch.load(OUT_DIR / "ckpt_oneshot_en_xlmr" / "best.pt", map_location="cpu"), final_oneshot_dir / "model.pt")

# Zero-shot
torch.save(torch.load(OUT_DIR / "ckpt_zeroshot_en_xlmr" / "best.pt", map_location="cpu"), final_zeroshot_dir / "model.pt")

print("Saved final checkpoints.")


Saved final checkpoints.
