In [None]:
!pip install nlpaug
def worker_init_fn(worker_id):
    import nltk
    for pkg in ["averaged_perceptron_tagger", "wordnet", "omw-1.4"]:
        try:
            nltk.data.find(pkg)
        except LookupError:
            nltk.download(pkg, quiet=True)
    nltk.download = lambda *args, **kwargs: True
import os

In [None]:
import torch
import json
import random
import numpy as np
import nlpaug.augmenter.word as naw
from torch.utils.data import Dataset, DataLoader
from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoConfig, get_linear_schedule_with_warmup
from torch.optim import AdamW
from tqdm.auto import tqdm
from torch.optim.lr_scheduler import ReduceLROnPlateau

In [None]:
class EnvironmentManager:
    @staticmethod
    def set_seed(seed=42):
        random.seed(seed)
        np.random.seed(seed)
        torch.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = True

In [None]:
class ScoreCalculator:
    def __init__(self, beta):
        self.beta = beta

    def get_weighted_score(self, average):
        vals, counts = np.unique(average, return_counts=True)
        weights = np.exp(counts * self.beta)
        weighted_avg = np.sum(vals * weights) / np.sum(weights)
        
        return weighted_avg


In [None]:
import numpy as np
import statistics
class AmbiStoryDataset(Dataset):
    def __init__(self, orig_path, tokenizer, config, bt_path=None):
        self.tokenizer = tokenizer
        self.cfg = config
        self.score_engine = ScoreCalculator(config['beta'])
        
        with open(orig_path, 'r') as f:
            self.data = json.load(f)
        self.sids = list(self.data.keys())
        
        self.bt_data = None
        if bt_path:
            with open(bt_path, 'r') as f:
                self.bt_data = json.load(f)
            self._init_augmenters()

    def _init_augmenters(self):
        p = self.cfg.get('eda_intensity', 0.1)
        
        self.syn_aug = naw.SynonymAug(aug_src='wordnet', aug_p=p)
        self.del_aug = naw.RandomWordAug(action="delete", aug_p=p)
        self.swap_aug = naw.RandomWordAug(action="swap", aug_p=p)

    def _apply_eda(self, text):
        if not text or len(text.split()) < 5: 
            return text
            
        mode = self.cfg.get('eda_mode', 'random')
        
        if mode == 'random':
            eda_cfg = self.cfg.get('eda_weights', {"synonym": 1, "deletion": 1, "swap": 1})
            techniques = ['synonym', 'deletion', 'swap']
            weights = [eda_cfg['synonym'], eda_cfg['deletion'], eda_cfg['swap']]
            
            choice = random.choices(techniques, weights=weights, k=1)[0]
            
            try:
                if choice == 'synonym': return self.syn_aug.augment(text)[0]
                elif choice == 'deletion': return self.del_aug.augment(text)[0]
                elif choice == 'swap': return self.swap_aug.augment(text)[0]
            except: return text

        elif mode == 'chain':
            augmented_text = text
            try:
                augmented_text = self.syn_aug.augment(augmented_text)[0]
                augmented_text = self.swap_aug.augment(augmented_text)[0]
                augmented_text = self.del_aug.augment(augmented_text)[0]
                return augmented_text
            except:
                return text
        
        return text

    def __len__(self):
        return len(self.sids)

    def __getitem__(self, idx):
        sid = self.sids[idx]
        item_orig = self.data[sid]
    
        if self.bt_data and random.random() < self.cfg['p_use_bt']:
            source = self.bt_data[sid]
        else:
            source = item_orig
    
        precontext = source['precontext']
        ending = source['ending']
        

        if self.bt_data and random.random() < self.cfg['p_eda']:
            precontext = self._apply_eda(precontext)
            ending = self._apply_eda(ending)
    
        
        full_story = f"{precontext} {item_orig['sentence']} {ending}".strip()
        full_meaning = f"{item_orig['example_sentence']} {item_orig['judged_meaning']}".strip()
        encoding = self.tokenizer(
                full_story,
                item_orig['judged_meaning'],
                max_length=self.cfg['max_length'],
                padding='max_length',
                truncation=True,
                return_tensors='pt'
            )

        ratings = item_orig["choices"]
        gold_mean = float(np.mean(ratings))
        gold_stdev = float(statistics.stdev(ratings)) if len(ratings) >= 2 else 0.0

        if self.bt_data:
            label_scalar = float(self.score_engine.get_weighted_score(ratings))
        else:
            label_scalar = gold_mean

        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'labels': torch.tensor(label_scalar, dtype=torch.float),
            'gold_mean': torch.tensor(gold_mean, dtype=torch.float),
            'gold_stdev': torch.tensor(gold_stdev, dtype=torch.float),
            'id': int(sid) if str(sid).isdigit() else sid,
        }

In [None]:
import torch
import numpy as np
from scipy.stats import spearmanr

@torch.no_grad()
def official_scores_from_stats(model, loader, device, clamp_1_5=True, rounded=False):
    model.eval()

    preds = []
    gold_means = []
    correct = 0
    total = 0

    for batch in loader:
        ids = batch["input_ids"].to(device)
        mask = batch["attention_mask"].to(device)

        gold_mean = batch["gold_mean"].cpu().numpy().astype(float)
        gold_stdev = batch["gold_stdev"].cpu().numpy().astype(float)

        out = model(ids, attention_mask=mask)
        p = out.logits.squeeze(-1).detach().cpu().numpy().astype(float)

        if clamp_1_5:
            p = np.clip(p, 1.0, 5.0)
        if rounded:
            p = np.rint(p).astype(int)

        preds.extend(p.tolist())
        gold_means.extend(gold_mean.tolist())

        for pred, m, sd in zip(p, gold_mean, gold_stdev):
            ok = ((m - sd) < pred < (m + sd)) or (abs(m - pred) < 1.0)
            correct += int(ok)
            total += 1

    corr, _ = spearmanr(preds, gold_means)
    acc = correct / total if total else 0.0
    return float(corr), float(acc)

In [None]:
class Trainer:
    def __init__(self, config):
        self.cfg = config
        self.device = torch.device(config['device'])
        self._prepare_components()
        self.best_dev_acc = -1.0
        self.best_epoch = -1

    def _prepare_components(self):
        self.tokenizer = AutoTokenizer.from_pretrained(self.cfg['model_name'])
        self.model = AutoModelForSequenceClassification.from_pretrained(
            self.cfg['model_name'], num_labels=1
        ).to(self.device)
            
        train_ds = AmbiStoryDataset(self.cfg['train_orig'], self.tokenizer, self.cfg, bt_path=self.cfg['train_bt'])
        dev_ds = AmbiStoryDataset(self.cfg['dev_orig'], self.tokenizer, self.cfg)
        
        self.train_loader = DataLoader(train_ds, batch_size=self.cfg['batch_size'], shuffle=True, num_workers=2,
                                        worker_init_fn=worker_init_fn,persistent_workers=True)
        self.dev_loader = DataLoader(dev_ds, batch_size=self.cfg['batch_size'], num_workers=2,
                                        worker_init_fn=worker_init_fn,persistent_workers=True)
        
        self.optimizer = AdamW(self.model.parameters(), lr=self.cfg['learning_rate'],weight_decay=self.cfg['weight_decay'])
        
        self.loss_fn = torch.nn.HuberLoss(delta=self.cfg['huber_delta'])
        self.label_smoothing = self.cfg['label_smoothing']
        self.scheduler = ReduceLROnPlateau(
            self.optimizer,
            mode='max',               
            factor=0.5,                
            patience=3,               
            threshold=1e-4,      
            min_lr=1e-6
        )


    def fit(self):
        print(f"Starting Training on {self.device}")
        for epoch in range(self.cfg['epochs']):
            self.model.train()
            total_loss = 0
            tk = tqdm(self.train_loader, desc=f"Epoch {epoch+1}")
            
            for batch in tk:
                self.optimizer.zero_grad()
                
                ids = batch['input_ids'].to(self.device)
                mask = batch['attention_mask'].to(self.device)
                labels = batch['labels'].to(self.device)
                
                out = self.model(ids, attention_mask=mask)

                if self.label_smoothing > 0.0:
                    center = self.cfg['label_smoothing_center']
                    smoothed_labels = (
                        (1.0 - self.label_smoothing) * labels
                        + self.label_smoothing * center
                    )
                else:
                    smoothed_labels = labels
                    
                loss = self.loss_fn(out.logits.flatten(), smoothed_labels)
                
                loss.backward()
                self.optimizer.step()
                
                
                total_loss += loss.item()
                tk.set_postfix(loss=total_loss/len(self.train_loader))
                
            train_s, train_a = official_scores_from_stats(self.model, self.train_loader, self.device)
            dev_s, dev_a = official_scores_from_stats(self.model, self.dev_loader, self.device)
            dev_s_round, dev_a_round = official_scores_from_stats(self.model, self.dev_loader, self.device, rounded=True)

            self.scheduler.step(train_a)
            
            if dev_a_round > self.best_dev_acc:
                self.best_dev_acc = dev_a_round
                self.best_epoch = epoch + 1
                torch.save(self.model.state_dict(), "best_model.pt")
                print(f"New best model saved (epoch {self.best_epoch}) | best_dev_acc={self.best_dev_acc:.4f}")
            
                        
            print(
                f"Epoch {epoch+1}/{self.cfg['epochs']} | ",
                f"TRAIN Spearman={train_s:.4f} Acc@SD/1={train_a:.4f} | ",
                f"DEV Spearman={dev_s:.4f} Acc@SD/1={dev_a:.4f} | ",
                f"DEV_ROUND Spearman={dev_s_round:.4f} Acc@SD/1={dev_a_round:.4f}",
                flush=True
            )
            
        if os.path.exists("best_model.pt"):
            self.model.load_state_dict(torch.load("best_model.pt", map_location=self.device))
            print(f"Loaded best model from best_model.pt (epoch {self.best_epoch}, best_dev_acc={self.best_dev_acc:.4f})")
        else:
            print("Best model file not found â€” using current model weights.")

        save_predictions_jsonl(
            model=self.model,
            loader=self.dev_loader,
            out_path=f"predictions.jsonl",
            device=self.device,
            round_to_int=True
        )
import json
import torch

def save_predictions_jsonl(model, loader, out_path, device, round_to_int=True):
    model.eval()

    preds = []
    with torch.no_grad():
        for batch in loader:
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)

            ids = batch["id"] 
            if torch.is_tensor(ids):
                ids = ids.cpu().tolist()

            outputs = model(input_ids=input_ids, attention_mask=attention_mask)

            logits = outputs.logits.squeeze(-1)

            pred_vals = logits.detach().cpu().tolist()
            if not isinstance(pred_vals, list):
                pred_vals = [pred_vals]

            for sid, p in zip(ids, pred_vals):
                if round_to_int:
                    p = int(round(float(p)))
                    p = max(1, min(5, p))
                else:
                    p = float(p)

                preds.append({"id": str(sid), "prediction": p})

    def _sort_key(x):
        try:
            return int(x["id"])
        except:
            return x["id"]

    preds.sort(key=_sort_key)

    with open(out_path, "w", encoding="utf-8") as f:
        for row in preds:
            f.write(json.dumps(row) + "\n")

    print(f"Saved {len(preds)} predictions to {out_path}")

In [None]:
if __name__ == "__main__":
    EnvironmentManager.set_seed(42)
    
    CONFIG = {
        "model_name": "roberta-base",
        "batch_size": 32,
        "epochs": 30,
        "learning_rate": 2e-5,
        "max_length": 256,
        "beta": 0.5,
        "train_orig": "/kaggle/input/ambistory-raw/train.json",
        "train_bt": "/kaggle/input/ambistory-processed/train_bt_only.json",
        "dev_orig": "/kaggle/input/ambistory-raw/dev.json",
        "p_use_bt": 0.5,
        "p_eda": 0.2,
        "eda_intensity": 0.1,
        "eda_mode": "random",
        "eda_weights": {
            "synonym": 0.7,
            "deletion": 0.15,
            "swap": 0.15
        },
        "weight_decay": 1e-4,
        "label_smoothing": 0.1,
        "label_smoothing_center": 3.0,
        #"hidden_dropout_prob": 0.1,
        #"attention_dropout_prob": 0.1,
        #"classifier_dropout": 0.1,
        #"warmup_ratio": 0.1,
        "huber_delta": 1,
        "device": "cuda" if torch.cuda.is_available() else "cpu"
    }

    orchestrator = Trainer(CONFIG)
    orchestrator.fit()