In [3]:
from biLSTM import BiLSTMClassifier
import pandas as pd
import numpy as np
import sentencepiece as spm
import re
from bs4 import BeautifulSoup
import unicodedata
from nltk.stem import WordNetLemmatizer
from gensim.models import Word2Vec
from tqdm import tqdm
from sklearn.model_selection import train_test_split
import torch
import os
from torch.utils.data import DataLoader
import random
import torch.nn as nn
from sklearn.metrics import f1_score
import torch.nn.functional as F
import json
from sklearn.metrics import precision_recall_fscore_support,accuracy_score
import time
import itertools
import hashlib
import random



SEED = 42  # or any integer

# Python & NumPy

np.random.seed(SEED)
os.environ["PYTHONHASHSEED"] = str(SEED)

# PyTorch (CPU & CUDA)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)  # if multi-GPU

# Tokeniser

#### helpers

In [4]:
# masking special token
def mask_tokens(text):
    # replace URLs (http, https, www)
    text = re.sub(r'(https?://\S+|www\.\S+)', '<URL>', text)

    # replace common file extensions (customize list)
    text = re.sub(r'\b[\w\-]+\.(pdf|docx|xlsx|txt|csv|tar|doc\.gz|doc)\b', '<FILE>', text)

    # emails
    text = re.sub(r'\b[\w\.-]+@[\w\.-]+\.\w+\b', '<EMAIL>', text)

    # money 
    text = re.sub(r'\$\d+(?:\.\d{2})?','<MONEY>',text)

    # numbers 
    text = re.sub(r'\b\d+\b','<NUMBER>',text)
    text = text.replace('<NUMBER>', '')

    return text


# un HTML raw text 
def strip_html(raw_html):
    """
    Strip HTML tags, scripts, styles, and normalize whitespace
    to return clean raw text from HTML emails.
    """
    soup = BeautifulSoup(raw_html, "html.parser")

    
    for a in soup.find_all("a"):
        href = a.get("href",None)
        if not href:   # skip if no href
                continue

        # print(a_attribute)

        a_attribute = mask_tokens(href)

        if a_attribute == '<URL>' : 
            a.replace_with('<URL>')

        elif a_attribute =='<EMAIL>' : 
            a.replace_with('<EMAIL>')
        
        elif a_attribute == '<FILE>' : 
            a.replace_with('<FILE>')

        elif a_attribute == '<MONEY>' : 
            a.replace_with('<MONEY>')
        
        elif a_attribute == '<NUMBER>' : 
            a.replace_with('<NUMBER>')

    # remove script, style, head, and metadata tags
    for tag in soup(["script", "style", "head", "title", "meta", "[document]"]):
        tag.decompose()

    # extract text
    text = soup.get_text(separator=" ")

    # normalize unicode 
    text = unicodedata.normalize("NFKC", text)

    # replace non-breaking spaces specifically (unicode)
    text = text.replace("\xa0", " ")

    # collapse all whitespace tokens (line breaks, tabs, multiple spaces) into one space and remove extra spaces
    text = re.sub(r"\s+", " ", text).strip()

    # rim leading/trailing spaces
    return text

# special case handling
mapper = str.maketrans({
    '0':'o','1':'l','3':'e','4':'a','5':'s','7':'t','$':'s','@':'a'
})

def deobfuscate_words(text):
    """
    capture non-alphanumeric sequence in windows of 1-3 and replaces with ' ' 
    l-o-v-e -> l-o , - is detected and removed -> love
    """
    # replace text to number 
    text = text.translate(mapper)
    # remove weird spaces etc 
    text = re.sub(r'(?i)(?<=\w)[^A-Za-z0-9\s]{1,3}(?=\w)', '', text)
    return text

def word_capper(text):
    text = re.sub(r'(.)\1{' + str(2) + r',}', lambda m: m.group(1)*2, text)
    text = re.sub(r'([!?.,])\1{1,}', r'\1\1', text)
    return text


# whitelist filtering
def char_lvl_whitelist_filter(text): 
    text = re.sub(r'[^a-zA-Z0-9\s\.\,\!\?\'\":;\-\_\(\)\@\#\$\%\^\&\<\>]', '', text)
    return text

# word level processor 
def lemmatizer(text) :
    lemmatizer = WordNetLemmatizer()
    sentence = ''

    lemmatized_words = [lemmatizer.lemmatize(word) for word in text]

    return sentence.join(lemmatized_words)

#final clean
def final_punc_removal(text):
    text = re.sub(r'[^A-Za-z0-9\s<>]', ' ', text)
    text = re.sub(r'\s+', ' ', text).strip()
    return text

    
def preprocess_email_text(raw): 
    """
    the whole pipeline of processing
    input : dataframe with text column and ham/spam label
    output : dataframe with cleaned sentences and ham/spam label
    """
    raw = strip_html(raw) # process html first to capture links from <a> tags
    raw = mask_tokens(raw) # mask special tokens 
    raw = deobfuscate_words(raw)
    raw = word_capper(raw)
    raw = lemmatizer(raw)
    raw = char_lvl_whitelist_filter(raw)
    raw = final_punc_removal(raw)
    raw = raw.lower()
    return raw

def preprocess_email_df(df, text_col):
    df[text_col] = df[text_col].apply(preprocess_email_text)
    return df


def vocab_builder(
    input_df
    ,vocab_size
    ,model_type
) : 
    
    input_df["Body"].to_csv("emails_clean.txt", index=False, header=False)

    # train SentencePiece model
    spm.SentencePieceTrainer.Train(
        f"--input=emails_clean.txt "
        f"--model_prefix=email_sp "
        f"--vocab_size={vocab_size} "
        f"--character_coverage=1.0 "
        f"--model_type={model_type} "
        f"--shuffle_input_sentence=false "
        f"--seed_sentencepiece_size=1000000 "
        f"--user_defined_symbols=<url>,<email>,<file>,<money>,<pad>"
    )


#####################################################################################################################
def vocab_to_id_mapper(
        input_df
        ,max_len
        ,sp
) :
    
    pad_id = sp.piece_to_id("<pad>")
    if pad_id == -1:  
        pad_id = 0

    
    def encode_ids(text) :
        if not isinstance(text, str):
            text = "" if pd.isna(text) else str(text)
        return sp.encode_as_ids(text)

    def pad_ids(ids,max_len,pad_id) -> np.ndarray:
        if len(ids) >= max_len:
            return np.array(ids[:max_len], dtype=np.int32)
        return np.array(ids + [pad_id] * (max_len - len(ids)), dtype=np.int32)

    
    df = input_df.copy()
    df["sp_ids"] = df["Body"].apply(encode_ids)

    # overwrite sp_ids_padded with NumPy arrays directly
    df["sp_ids_padded"] = df["sp_ids"].apply(lambda ids: pad_ids(ids, max_len, pad_id))

    return df

def build_embedding_matrix(w2v, sp, pad_id: int, seed: int = 42):
    """
    Build embedding matrix aligned with SentencePiece IDs.
    """
    vocab_size = sp.get_piece_size()
    emb_dim = w2v.vector_size

    E = np.zeros((vocab_size, emb_dim), dtype=np.float32)
    rng = np.random.default_rng(seed)

    for sp_id in range(vocab_size):
        piece = sp.id_to_piece(sp_id)
        if piece in w2v.wv:
            E[sp_id] = w2v.wv[piece]
        else:
            E[sp_id] = rng.normal(0.0, 0.01, size=emb_dim).astype(np.float32)

    # Keep PAD = 0
    if 0 <= pad_id < vocab_size:
        E[pad_id] = 0.0

    metadata = {
        "vocab_size": vocab_size,
        "emb_dim": emb_dim,
        "pad_id": pad_id,
        "trained_vocab": len(w2v.wv),
        "oov_count": vocab_size - len(w2v.wv),
    }
    return E, metadata

class TextDS(torch.utils.data.Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(np.stack(X), dtype=torch.long)
        self.y = torch.tensor(y, dtype=torch.long)
    def __len__(self): return len(self.y)
    def __getitem__(self, i): return self.X[i], self.y[i]

In [10]:
# build sp -> word embedin matrix 
load_path = 'full_220/'


# load saved model
w2v_model = Word2Vec.load(load_path+"word2vec.model")

#sentencePiece model & pad_id
import sentencepiece as spm
sp = spm.SentencePieceProcessor()
sp.load(load_path+"email_sp.model")
pad_id = sp.piece_to_id("<pad>")
if pad_id == -1:
    pad_id = 0

subword_processor = sp 

embedding_matrix, embedding_summary = build_embedding_matrix(w2v_model,subword_processor,pad_id)

INFO:gensim.utils:loading Word2Vec object from full_220/word2vec.model
INFO:gensim.utils:loading wv recursively from full_220/word2vec.model.wv.* with mmap=None
INFO:gensim.utils:loading vectors from full_220/word2vec.model.wv.vectors.npy with mmap=None
INFO:gensim.utils:loading syn1neg from full_220/word2vec.model.syn1neg.npy with mmap=None
INFO:gensim.utils:setting ignored attribute cum_table to None
INFO:gensim.utils:Word2Vec lifecycle event {'fname': 'full_220/word2vec.model', 'datetime': '2025-10-25T04:46:12.611708', 'gensim': '4.3.3', 'python': '3.10.19 | packaged by conda-forge | (main, Oct 13 2025, 14:08:27) [GCC 14.3.0]', 'platform': 'Linux-6.8.0-1040-aws-x86_64-with-glibc2.35', 'event': 'loaded'}


In [4]:
embedding_summary


{'vocab_size': 50000,
 'emb_dim': 300,
 'pad_id': 7,
 'trained_vocab': 47842,
 'oov_count': 2158}

In [5]:
pad_id = subword_processor.piece_to_id('<pad>')
pad_id

7

#### Train-valid-test split

In [None]:
train_df_raw = pd.read_csv('../raw_encoder_data_sets/train_set.csv')
val_df_raw = pd.read_csv('../raw_encoder_data_sets/valid_set.csv')
test_df_raw = pd.read_csv('../raw_encoder_data_sets/test_set.csv')


In [None]:
print(train_df_raw.shape)
print(val_df_raw.shape)
print(test_df_raw.shape)


In [None]:
print(len(set(val_df_raw["text_combined"]) & set(test_df_raw["text_combined"])))
print(len(set(val_df_raw["text_combined"]) & set(train_df_raw["text_combined"])))
print(len(set(test_df_raw["text_combined"]) & set(train_df_raw["text_combined"])))

In [None]:
# preprocess same as word2vec preprocessing
train_df = preprocess_email_df(train_df_raw,'text_combined')
val_df = preprocess_email_df(val_df_raw,'text_combined')
test_df = preprocess_email_df(test_df_raw,'text_combined')


In [None]:
text_col = "text_combined"

# remove overlaps in hierarchical order: Train → Val → Test
train_texts = set(train_df[text_col])

# remove from val if it appears in train
val_df = val_df[~val_df[text_col].isin(train_texts)].reset_index(drop=True)
val_texts = set(val_df[text_col])

# remove from test if it appears in train or val
test_df = test_df[
    ~test_df[text_col].isin(train_texts.union(val_texts))
].reset_index(drop=True)

In [5]:
# train_df.to_csv('clean_data_bilstm/train_clean.csv',index=False)
# val_df.to_csv('clean_data_bilstm/val_clean.csv',index= False)
# test_df.to_csv('clean_data_bilstm/test_clean.csv',index = False)

train_df = pd.read_csv('clean_data_bilstm/train_clean.csv')
val_df = pd.read_csv('clean_data_bilstm/val_clean.csv')
test_df = pd.read_csv('clean_data_bilstm/test_clean.csv')


In [6]:
print(train_df.shape)
print(val_df.shape)
print(test_df.shape)

(65662, 2)
(8062, 2)
(8056, 2)


In [7]:
# check no overlaps across sets
print(len(set(train_df["text_combined"]) & set(test_df["text_combined"])))
print(len(set(val_df["text_combined"]) & set(test_df["text_combined"])))
print(len(set(train_df["text_combined"]) & set(val_df["text_combined"])))

0
0
0


In [8]:
train_df.rename(columns = {'text_combined':'Body'},inplace=True)
val_df.rename(columns = {'text_combined':'Body'},inplace=True)
test_df.rename(columns = {'text_combined':'Body'},inplace=True)


In [11]:
# tokenise and pad
train_df = vocab_to_id_mapper(train_df,256,sp)
val_df = vocab_to_id_mapper(val_df,256,sp)
test_df = vocab_to_id_mapper(test_df,256,sp)


In [12]:
# convert to torch object for model injection

train_ds = TextDS(train_df['sp_ids_padded'].values, train_df['label'].values)
val_ds   = TextDS(val_df['sp_ids_padded'].values, val_df['label'].values)
test_ds  = TextDS(test_df['sp_ids_padded'].values, test_df['label'].values)

In [13]:
assert train_df['sp_ids_padded'].apply(len).eq(256).all()
assert val_df['sp_ids_padded'].apply(len).eq(256).all()
assert test_df['sp_ids_padded'].apply(len).eq(256).all()

# Training Loop

#### helpers

In [14]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [15]:
best_ckpt_dir = 'best_ckpts'
CKPT_DIR = "checkpoints"
os.makedirs(CKPT_DIR, exist_ok=True)

def save_ckpt(path, epoch, model, optimizer, scheduler, config, best_f1, seed):
    state = {
        "epoch": epoch,
        "model": model.state_dict(),
        "optimizer": optimizer.state_dict(),
        "scheduler": scheduler.state_dict() if scheduler is not None else None,
        "config": config,
        "best_val_f1": best_f1,
        "seed": seed,
    }
    tmp = path + ".tmp"
    torch.save(state, tmp)
    os.replace(tmp, path)  # atomic replace

def load_ckpt(path, model, optimizer=None, scheduler=None, map_location="cpu"):
    ckpt = torch.load(path, map_location=map_location)
    model.load_state_dict(ckpt["model"])
    if optimizer is not None and "optimizer" in ckpt and ckpt["optimizer"] is not None:
        optimizer.load_state_dict(ckpt["optimizer"])
    if scheduler is not None and "scheduler" in ckpt and ckpt["scheduler"] is not None:
        scheduler.load_state_dict(ckpt["scheduler"])
    return ckpt

def latest_epoch_path():
    files = [f for f in os.listdir(CKPT_DIR) if f.startswith("epoch_") and f.endswith(".pt")]
    if not files: return None
    files.sort()
    return os.path.join(CKPT_DIR, files[-1])

def evaluate_metrics(dl, model, device):
    model.eval()
    y_true, y_pred = [], []
    with torch.no_grad():
        for xb, yb in dl:
            xb = xb.to(device)
            logits = model(xb)
            preds = torch.argmax(logits, dim=1).cpu().numpy()
            y_true.extend(yb.numpy())
            y_pred.extend(preds)
    p, r, f1, _ = precision_recall_fscore_support(y_true, y_pred, average="binary", zero_division=0)
    acc = accuracy_score(y_true, y_pred)
    return p, r, f1, acc

def seed_worker(worker_id):
    worker_seed = torch.initial_seed() % 2**32
    np.random.seed(worker_seed)
    random.seed(worker_seed)

def load_best_for_inference(manifest_path="checkpoints/manifest.json", map_location=None):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu") if map_location is None else map_location
    with open(manifest_path) as f:
        m = json.load(f)

    # rebuild model
    embedding_matrix = np.load(m["embedding_matrix_file"])
    model = BiLSTMClassifier(
        embedding_matrix=embedding_matrix,
        pad_id=m["pad_id"],
        hidden_dim=m["hidden_dim"],
        num_layers=m["num_layers"],
        dropout=m["dropout"],
        bidirectional=m["bidirectional"],
        num_classes=m["num_classes"],
    ).to(device)

    # load best weights
    ckpt = torch.load(m["best_ckpt"], map_location=device)
    model.load_state_dict(ckpt["model"])
    model.eval()
    return model, m

def get_lr(optim):  
    for pg in optim.param_groups:
        return pg["lr"]

In [16]:
g = torch.Generator()
g.manual_seed(SEED)

train_dl = DataLoader(train_ds, batch_size=64, shuffle=True,
                      num_workers=2, pin_memory=True,
                      worker_init_fn=seed_worker, generator=g)
val_dl   = DataLoader(val_ds, batch_size=128, shuffle=False,
                      num_workers=2, pin_memory=True,
                      worker_init_fn=seed_worker, generator=g)
test_dl  = DataLoader(test_ds, batch_size=128, shuffle=False,
                      num_workers=2, pin_memory=True,
                      worker_init_fn=seed_worker, generator=g)

#### hyper param tuning

In [16]:
# hyper param tuning
BASE_CONFIG = {
    "seed": 42,
    "batch_size_train": 64,
    "batch_size_eval": 128,
    "hidden_dim": 256,
    "num_layers": 2,
    "dropout": 0.5,
    "bidirectional": True,
    "lr": 1e-3,
    "max_epochs": 10,
    "patience": 3,            # early stopping on val F1
    "pad_id": int(pad_id),
    "max_len": 256,
    "num_classes": 2,
    "weight_decay" : 1e-5
}

# --- define the discrete choices (2 each) ---
GRID_CHOICES = {
    "hidden_dim":   [128, 256],        # ← pick your two
    "lr":           [5e-4, 1e-3],      # ← two learning rates
    "weight_decay": [1e-5, 2e-5],      # ← two L2 values
    "dropout":      [0.4, 0.5],        # ← two dropout probs
}

# optional: fix RNG for any shuffling you do later
SEED = 42
random.seed(SEED)

# --- build cartesian product (2*2*2*2 = 16 configs) ---
keys = list(GRID_CHOICES.keys())
product_vals = list(itertools.product(*(GRID_CHOICES[k] for k in keys)))

grid = []
for vals in product_vals:
    cfg = {k: v for k, v in zip(keys, vals)}
    # stable hash ID for tracking & checkpoint folder naming
    cfg_str = json.dumps(cfg, sort_keys=True)
    cfg_id = hashlib.md5(cfg_str.encode()).hexdigest()[:8]
    cfg["cfg_id"] = cfg_id
    grid.append(cfg)

# (optional) shuffle run order to avoid bias if you stop early
random.shuffle(grid)



def train_eval_config(cfg, CONFIG_BASE, embedding_matrix, train_dl, val_dl, device):
    """Train a single BiLSTM config and return best F1 + training history."""
    start_time = time.time()

    # unpack config
    hdim = cfg["hidden_dim"]
    lr = cfg["lr"]
    wd = cfg["weight_decay"]
    drop = cfg["dropout"]

    print(f"\n=== Testing Config {cfg['cfg_id']} ===")
    print(f"hidden_dim={hdim} | lr={lr} | weight_decay={wd} | dropout={drop}")

    # --- Model / optimizer ---
    model = BiLSTMClassifier(
        embedding_matrix=embedding_matrix,
        pad_id=CONFIG_BASE["pad_id"],
        hidden_dim=hdim,
        num_layers=CONFIG_BASE["num_layers"],
        dropout=drop,
        bidirectional=CONFIG_BASE["bidirectional"],
        num_classes=CONFIG_BASE["num_classes"],
    ).to(device)

    optimizer = torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=wd)
    criterion = torch.nn.CrossEntropyLoss()
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer, mode="min", factor=0.5, patience=1
    )

    best_f1 = -1.0
    best_val_loss = float("inf")
    patience = CONFIG_BASE.get("patience", 1)
    no_improve = 0
    history = []

    # training loop
    for epoch in range(1, CONFIG_BASE["max_epochs"] + 1):
        model.train()
        running_loss = 0.0
        train_pbar = tqdm(train_dl, desc=f"[{cfg['cfg_id']}] Epoch {epoch:02d} [Train]", leave=False)
        for xb, yb in train_pbar:
            xb, yb = xb.to(device), yb.to(device)
            optimizer.zero_grad(set_to_none=True)
            logits = model(xb)
            loss = criterion(logits, yb)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            train_pbar.set_postfix(loss=f"{loss.item():.4f}")
        train_loss_avg = running_loss / max(1, len(train_dl))

        # Validation
        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            val_pbar = tqdm(val_dl, desc=f"[{cfg['cfg_id']}] Epoch {epoch:02d} [Val]", leave=False)
            for xb, yb in val_pbar:
                xb, yb = xb.to(device), yb.to(device)
                logits = model(xb)
                vloss = criterion(logits, yb)
                val_loss += vloss.item()
                val_pbar.set_postfix(vloss=f"{vloss.item():.4f}")
        val_loss_avg = val_loss / max(1, len(val_dl))

        p, r, val_f1, acc = evaluate_metrics(val_dl, model, device)
        scheduler.step(val_loss_avg)

        # record metrics — add cfg_id here
        history.append({
            "cfg_id": cfg["cfg_id"],
            "epoch": epoch,
            "train_loss": float(train_loss_avg),
            "val_loss": float(val_loss_avg),
            "val_precision": float(p),
            "val_recall": float(r),
            "val_F1": float(val_f1),
            "val_accuracy": float(acc),
        })

        print(f"Epoch {epoch:02d} | Train {train_loss_avg:.4f} | Val {val_loss_avg:.4f} | F1 {val_f1:.4f}")

        # early stop on val loss
        if val_loss_avg < best_val_loss - 1e-4:
            best_val_loss = val_loss_avg
            best_f1 = val_f1
            no_improve = 0
        else:
            no_improve += 1
            if no_improve >= patience:
                print(f"Early stopping at epoch {epoch:02d} (best F1={best_f1:.4f})")
                break

    total_time = (time.time() - start_time) / 60
    print(f" Finished {cfg['cfg_id']} | Best F1={best_f1:.4f} | Time={total_time:.2f} min")


    return best_f1, history


In [17]:
all_results = []

for cfg in grid:
    f1, hist = train_eval_config(cfg, BASE_CONFIG, embedding_matrix, train_dl, val_dl, device)
    all_results.append({
        **cfg,
        "best_F1": f1,
        "epochs_ran": len(hist),
    })

# save final grid results
results_df = pd.DataFrame(all_results).sort_values("best_F1", ascending=False)
results_df.to_csv("grid_results_short.csv", index=False)
print(" Grid Search Complete:")
print(results_df.head())

INFO:biLSTM:BiLSTM Encoder initialized | emb_dim=300, hidden_dim=128, layers=2, bidirectional=True, freeze_embeddings=True



=== Testing Config 883cf59c ===
hidden_dim=128 | lr=0.001 | weight_decay=2e-05 | dropout=0.5


                                                                                             

Epoch 01 | Train 0.1635 | Val 0.0802 | F1 0.9743


                                                                                             

Epoch 02 | Train 0.0800 | Val 0.0687 | F1 0.9790


                                                                                             

Epoch 03 | Train 0.0530 | Val 0.0647 | F1 0.9789


                                                                                             

Epoch 04 | Train 0.0345 | Val 0.0453 | F1 0.9872


                                                                                             

Epoch 05 | Train 0.0243 | Val 0.0707 | F1 0.9812


                                                                                             

Epoch 06 | Train 0.0181 | Val 0.0412 | F1 0.9888


                                                                                             

Epoch 07 | Train 0.0159 | Val 0.0396 | F1 0.9890


                                                                                             

Epoch 08 | Train 0.0124 | Val 0.0484 | F1 0.9878


                                                                                             

Epoch 09 | Train 0.0093 | Val 0.0547 | F1 0.9888


INFO:biLSTM:BiLSTM Encoder initialized | emb_dim=300, hidden_dim=256, layers=2, bidirectional=True, freeze_embeddings=True


Epoch 10 | Train 0.0038 | Val 0.0401 | F1 0.9917
Early stopping at epoch 10 (best F1=0.9890)
 Finished 883cf59c | Best F1=0.9890 | Time=5.69 min

=== Testing Config f6cb5e97 ===
hidden_dim=256 | lr=0.0005 | weight_decay=1e-05 | dropout=0.5


                                                                                             

Epoch 01 | Train 0.1731 | Val 0.0804 | F1 0.9714


                                                                                             

Epoch 02 | Train 0.0826 | Val 0.0607 | F1 0.9800


                                                                                             

Epoch 03 | Train 0.0555 | Val 0.0496 | F1 0.9839


                                                                                             

Epoch 04 | Train 0.0418 | Val 0.0653 | F1 0.9766


                                                                                             

Epoch 05 | Train 0.0353 | Val 0.0440 | F1 0.9857


                                                                                             

Epoch 06 | Train 0.0225 | Val 0.0436 | F1 0.9874


                                                                                             

Epoch 07 | Train 0.0225 | Val 0.0431 | F1 0.9883


                                                                                             

Epoch 08 | Train 0.0139 | Val 0.0457 | F1 0.9876


                                                                                             

Epoch 09 | Train 0.0106 | Val 0.0552 | F1 0.9885


INFO:biLSTM:BiLSTM Encoder initialized | emb_dim=300, hidden_dim=128, layers=2, bidirectional=True, freeze_embeddings=True


Epoch 10 | Train 0.0038 | Val 0.0567 | F1 0.9895
Early stopping at epoch 10 (best F1=0.9883)
 Finished f6cb5e97 | Best F1=0.9883 | Time=11.51 min

=== Testing Config d53d97aa ===
hidden_dim=128 | lr=0.001 | weight_decay=1e-05 | dropout=0.5


                                                                                             

Epoch 01 | Train 0.1538 | Val 0.0741 | F1 0.9744


                                                                                             

Epoch 02 | Train 0.0657 | Val 0.0487 | F1 0.9833


                                                                                             

Epoch 03 | Train 0.0436 | Val 0.0376 | F1 0.9878


                                                                                             

Epoch 04 | Train 0.0299 | Val 0.0355 | F1 0.9884


                                                                                             

Epoch 05 | Train 0.0200 | Val 0.0391 | F1 0.9887


                                                                                             

Epoch 06 | Train 0.0161 | Val 0.0422 | F1 0.9892


INFO:biLSTM:BiLSTM Encoder initialized | emb_dim=300, hidden_dim=128, layers=2, bidirectional=True, freeze_embeddings=True


Epoch 07 | Train 0.0055 | Val 0.0425 | F1 0.9900
Early stopping at epoch 07 (best F1=0.9884)
 Finished d53d97aa | Best F1=0.9884 | Time=3.99 min

=== Testing Config 85686737 ===
hidden_dim=128 | lr=0.001 | weight_decay=2e-05 | dropout=0.4


                                                                                             

Epoch 01 | Train 0.1523 | Val 0.0664 | F1 0.9772


                                                                                             

Epoch 02 | Train 0.0639 | Val 0.0473 | F1 0.9843


                                                                                             

Epoch 03 | Train 0.0394 | Val 0.0387 | F1 0.9879


                                                                                             

Epoch 04 | Train 0.0289 | Val 0.0392 | F1 0.9878


                                                                                             

Epoch 05 | Train 0.0192 | Val 0.0417 | F1 0.9887


                                                                                             

Epoch 06 | Train 0.0076 | Val 0.0377 | F1 0.9904


                                                                                             

Epoch 07 | Train 0.0051 | Val 0.0630 | F1 0.9877


                                                                                             

Epoch 08 | Train 0.0049 | Val 0.0530 | F1 0.9892


INFO:biLSTM:BiLSTM Encoder initialized | emb_dim=300, hidden_dim=256, layers=2, bidirectional=True, freeze_embeddings=True


Epoch 09 | Train 0.0011 | Val 0.0465 | F1 0.9910
Early stopping at epoch 09 (best F1=0.9904)
 Finished 85686737 | Best F1=0.9904 | Time=5.12 min

=== Testing Config d595bd3c ===
hidden_dim=256 | lr=0.001 | weight_decay=2e-05 | dropout=0.4


                                                                                             

Epoch 01 | Train 0.2157 | Val 0.0713 | F1 0.9760


                                                                                             

Epoch 02 | Train 0.0663 | Val 0.0572 | F1 0.9819


                                                                                             

Epoch 03 | Train 0.0414 | Val 0.0376 | F1 0.9867


                                                                                             

Epoch 04 | Train 0.0275 | Val 0.0538 | F1 0.9818


                                                                                             

Epoch 05 | Train 0.0173 | Val 0.0405 | F1 0.9904


INFO:biLSTM:BiLSTM Encoder initialized | emb_dim=300, hidden_dim=256, layers=2, bidirectional=True, freeze_embeddings=True


Epoch 06 | Train 0.0060 | Val 0.0444 | F1 0.9897
Early stopping at epoch 06 (best F1=0.9867)
 Finished d595bd3c | Best F1=0.9867 | Time=6.92 min

=== Testing Config 7898bc9a ===
hidden_dim=256 | lr=0.0005 | weight_decay=2e-05 | dropout=0.4


                                                                                             

Epoch 01 | Train 0.1706 | Val 0.2086 | F1 0.9227


                                                                                             

Epoch 02 | Train 0.0985 | Val 0.2552 | F1 0.9076


                                                                                             

Epoch 03 | Train 0.0745 | Val 0.0496 | F1 0.9848


                                                                                             

Epoch 04 | Train 0.0455 | Val 0.0695 | F1 0.9765


                                                                                             

Epoch 05 | Train 0.0328 | Val 0.0590 | F1 0.9811


                                                                                             

Epoch 06 | Train 0.0194 | Val 0.0443 | F1 0.9875


                                                                                             

Epoch 07 | Train 0.0147 | Val 0.0446 | F1 0.9868


                                                                                             

Epoch 08 | Train 0.0120 | Val 0.0561 | F1 0.9828


INFO:biLSTM:BiLSTM Encoder initialized | emb_dim=300, hidden_dim=256, layers=2, bidirectional=True, freeze_embeddings=True


Epoch 09 | Train 0.0058 | Val 0.0495 | F1 0.9889
Early stopping at epoch 09 (best F1=0.9875)
 Finished 7898bc9a | Best F1=0.9875 | Time=10.36 min

=== Testing Config a207c8ec ===
hidden_dim=256 | lr=0.001 | weight_decay=1e-05 | dropout=0.4


                                                                                             

Epoch 01 | Train 0.1503 | Val 0.0732 | F1 0.9752


                                                                                             

Epoch 02 | Train 0.0656 | Val 0.0577 | F1 0.9815


                                                                                             

Epoch 03 | Train 0.0883 | Val 0.0424 | F1 0.9861


                                                                                             

Epoch 04 | Train 0.0295 | Val 0.0372 | F1 0.9897


                                                                                             

Epoch 05 | Train 0.0276 | Val 0.0405 | F1 0.9875


                                                                                             

Epoch 06 | Train 0.0136 | Val 0.0462 | F1 0.9872


                                                                                             

Epoch 07 | Train 0.0055 | Val 0.0368 | F1 0.9907


                                                                                             

Epoch 08 | Train 0.0031 | Val 0.0488 | F1 0.9912


                                                                                             

Epoch 09 | Train 0.0041 | Val 0.0542 | F1 0.9904


INFO:biLSTM:BiLSTM Encoder initialized | emb_dim=300, hidden_dim=256, layers=2, bidirectional=True, freeze_embeddings=True


Epoch 10 | Train 0.0008 | Val 0.0563 | F1 0.9911
Early stopping at epoch 10 (best F1=0.9907)
 Finished a207c8ec | Best F1=0.9907 | Time=11.52 min

=== Testing Config 292efb59 ===
hidden_dim=256 | lr=0.0005 | weight_decay=1e-05 | dropout=0.4


                                                                                             

Epoch 01 | Train 0.1494 | Val 0.0642 | F1 0.9756


                                                                                             

Epoch 02 | Train 0.0745 | Val 0.0622 | F1 0.9778


                                                                                             

Epoch 03 | Train 0.0692 | Val 0.0561 | F1 0.9817


                                                                                             

Epoch 04 | Train 0.0403 | Val 0.0534 | F1 0.9828


                                                                                             

Epoch 05 | Train 0.0279 | Val 0.0723 | F1 0.9798


                                                                                             

Epoch 06 | Train 0.0225 | Val 0.0475 | F1 0.9872


                                                                                             

Epoch 07 | Train 0.0159 | Val 0.0523 | F1 0.9861


                                                                                             

Epoch 08 | Train 0.0129 | Val 0.0419 | F1 0.9892


                                                                                             

Epoch 09 | Train 0.0103 | Val 0.0485 | F1 0.9910


INFO:biLSTM:BiLSTM Encoder initialized | emb_dim=300, hidden_dim=128, layers=2, bidirectional=True, freeze_embeddings=True


Epoch 10 | Train 0.0076 | Val 0.0519 | F1 0.9878
 Finished 292efb59 | Best F1=0.9892 | Time=11.50 min

=== Testing Config 16440076 ===
hidden_dim=128 | lr=0.0005 | weight_decay=1e-05 | dropout=0.5


                                                                                             

Epoch 01 | Train 0.1540 | Val 0.0741 | F1 0.9756


                                                                                             

Epoch 02 | Train 0.0855 | Val 0.0665 | F1 0.9777


                                                                                             

Epoch 03 | Train 0.0579 | Val 0.0562 | F1 0.9828


                                                                                             

Epoch 04 | Train 0.0391 | Val 0.0507 | F1 0.9825


                                                                                             

Epoch 05 | Train 0.0374 | Val 0.0488 | F1 0.9847


                                                                                             

Epoch 06 | Train 0.0293 | Val 0.0403 | F1 0.9873


                                                                                             

Epoch 07 | Train 0.0187 | Val 0.0491 | F1 0.9855


                                                                                             

Epoch 08 | Train 0.0177 | Val 0.0398 | F1 0.9886


                                                                                             

Epoch 09 | Train 0.0121 | Val 0.0384 | F1 0.9881


INFO:biLSTM:BiLSTM Encoder initialized | emb_dim=300, hidden_dim=128, layers=2, bidirectional=True, freeze_embeddings=True


Epoch 10 | Train 0.0120 | Val 0.0408 | F1 0.9898
 Finished 16440076 | Best F1=0.9881 | Time=5.67 min

=== Testing Config f1ed75eb ===
hidden_dim=128 | lr=0.0005 | weight_decay=2e-05 | dropout=0.4


                                                                                             

Epoch 01 | Train 0.1478 | Val 0.0666 | F1 0.9776


                                                                                             

Epoch 02 | Train 0.0716 | Val 0.0507 | F1 0.9824


                                                                                             

Epoch 03 | Train 0.0539 | Val 0.0455 | F1 0.9846


                                                                                             

Epoch 04 | Train 0.0377 | Val 0.0453 | F1 0.9857


                                                                                             

Epoch 05 | Train 0.0261 | Val 0.0465 | F1 0.9846


                                                                                             

Epoch 06 | Train 0.0190 | Val 0.0639 | F1 0.9823


                                                                                             

Epoch 07 | Train 0.0106 | Val 0.0355 | F1 0.9892


                                                                                             

Epoch 08 | Train 0.0068 | Val 0.0455 | F1 0.9881


                                                                                             

Epoch 09 | Train 0.0065 | Val 0.0474 | F1 0.9900


INFO:biLSTM:BiLSTM Encoder initialized | emb_dim=300, hidden_dim=256, layers=2, bidirectional=True, freeze_embeddings=True


Epoch 10 | Train 0.0020 | Val 0.0498 | F1 0.9883
Early stopping at epoch 10 (best F1=0.9892)
 Finished f1ed75eb | Best F1=0.9892 | Time=5.69 min

=== Testing Config bda3e873 ===
hidden_dim=256 | lr=0.001 | weight_decay=1e-05 | dropout=0.5


                                                                                             

Epoch 01 | Train 0.1692 | Val 0.2416 | F1 0.9034


                                                                                             

Epoch 02 | Train 0.0984 | Val 0.0495 | F1 0.9828


                                                                                             

Epoch 03 | Train 0.0492 | Val 0.0441 | F1 0.9862


                                                                                             

Epoch 04 | Train 0.0327 | Val 0.0405 | F1 0.9881


                                                                                             

Epoch 05 | Train 0.0260 | Val 0.0426 | F1 0.9880


                                                                                             

Epoch 06 | Train 0.0149 | Val 0.0424 | F1 0.9888


INFO:biLSTM:BiLSTM Encoder initialized | emb_dim=300, hidden_dim=256, layers=2, bidirectional=True, freeze_embeddings=True


Epoch 07 | Train 0.0064 | Val 0.0418 | F1 0.9912
Early stopping at epoch 07 (best F1=0.9881)
 Finished bda3e873 | Best F1=0.9881 | Time=8.06 min

=== Testing Config 6b3b9faa ===
hidden_dim=256 | lr=0.001 | weight_decay=2e-05 | dropout=0.5


                                                                                             

Epoch 01 | Train 0.1690 | Val 0.0921 | F1 0.9715


                                                                                             

Epoch 02 | Train 0.0685 | Val 0.0583 | F1 0.9807


                                                                                             

Epoch 03 | Train 0.0437 | Val 0.0458 | F1 0.9855


                                                                                             

Epoch 04 | Train 0.0347 | Val 0.0407 | F1 0.9862


                                                                                             

Epoch 05 | Train 0.0195 | Val 0.0336 | F1 0.9909


                                                                                             

Epoch 06 | Train 0.0137 | Val 0.0363 | F1 0.9901


                                                                                             

Epoch 07 | Train 0.0157 | Val 0.0509 | F1 0.9872


INFO:biLSTM:BiLSTM Encoder initialized | emb_dim=300, hidden_dim=128, layers=2, bidirectional=True, freeze_embeddings=True


Epoch 08 | Train 0.0061 | Val 0.0422 | F1 0.9904
Early stopping at epoch 08 (best F1=0.9909)
 Finished 6b3b9faa | Best F1=0.9909 | Time=9.21 min

=== Testing Config 85707e3f ===
hidden_dim=128 | lr=0.001 | weight_decay=1e-05 | dropout=0.4


                                                                                             

Epoch 01 | Train 0.1443 | Val 0.0664 | F1 0.9768


                                                                                             

Epoch 02 | Train 0.0681 | Val 0.0516 | F1 0.9832


                                                                                             

Epoch 03 | Train 0.0416 | Val 0.1244 | F1 0.9604


                                                                                             

Epoch 04 | Train 0.0326 | Val 0.0407 | F1 0.9879


                                                                                             

Epoch 05 | Train 0.0200 | Val 0.0393 | F1 0.9878


                                                                                             

Epoch 06 | Train 0.0148 | Val 0.0411 | F1 0.9898


                                                                                             

Epoch 07 | Train 0.0097 | Val 0.0407 | F1 0.9912


INFO:biLSTM:BiLSTM Encoder initialized | emb_dim=300, hidden_dim=256, layers=2, bidirectional=True, freeze_embeddings=True


Epoch 08 | Train 0.0035 | Val 0.0492 | F1 0.9891
Early stopping at epoch 08 (best F1=0.9878)
 Finished 85707e3f | Best F1=0.9878 | Time=4.52 min

=== Testing Config f9e4a5b9 ===
hidden_dim=256 | lr=0.0005 | weight_decay=2e-05 | dropout=0.5


                                                                                             

Epoch 01 | Train 0.1569 | Val 0.0835 | F1 0.9712


                                                                                             

Epoch 02 | Train 0.0759 | Val 0.0535 | F1 0.9817


                                                                                             

Epoch 03 | Train 0.0506 | Val 0.0533 | F1 0.9837


                                                                                             

Epoch 04 | Train 0.0408 | Val 0.0510 | F1 0.9835


                                                                                             

Epoch 05 | Train 0.0333 | Val 0.0523 | F1 0.9846


                                                                                             

Epoch 06 | Train 0.0252 | Val 0.0416 | F1 0.9877


                                                                                             

Epoch 07 | Train 0.0158 | Val 0.0478 | F1 0.9875


                                                                                             

Epoch 08 | Train 0.0154 | Val 0.0396 | F1 0.9884


                                                                                             

Epoch 09 | Train 0.0124 | Val 0.0559 | F1 0.9870


INFO:biLSTM:BiLSTM Encoder initialized | emb_dim=300, hidden_dim=128, layers=2, bidirectional=True, freeze_embeddings=True


Epoch 10 | Train 0.0083 | Val 0.0465 | F1 0.9885
 Finished f9e4a5b9 | Best F1=0.9884 | Time=11.50 min

=== Testing Config 8f080f64 ===
hidden_dim=128 | lr=0.0005 | weight_decay=1e-05 | dropout=0.4


                                                                                             

Epoch 01 | Train 0.1526 | Val 0.0796 | F1 0.9721


                                                                                             

Epoch 02 | Train 0.0710 | Val 0.0549 | F1 0.9801


                                                                                             

Epoch 03 | Train 0.0524 | Val 0.0447 | F1 0.9842


                                                                                             

Epoch 04 | Train 0.0370 | Val 0.0564 | F1 0.9800


                                                                                             

Epoch 05 | Train 0.0277 | Val 0.0349 | F1 0.9885


                                                                                             

Epoch 06 | Train 0.0246 | Val 0.0380 | F1 0.9872


                                                                                             

Epoch 07 | Train 0.0160 | Val 0.0436 | F1 0.9878


INFO:biLSTM:BiLSTM Encoder initialized | emb_dim=300, hidden_dim=128, layers=2, bidirectional=True, freeze_embeddings=True


Epoch 08 | Train 0.0065 | Val 0.0452 | F1 0.9895
Early stopping at epoch 08 (best F1=0.9885)
 Finished 8f080f64 | Best F1=0.9885 | Time=4.55 min

=== Testing Config 1f95bff2 ===
hidden_dim=128 | lr=0.0005 | weight_decay=2e-05 | dropout=0.5


                                                                                             

Epoch 01 | Train 0.1623 | Val 0.0715 | F1 0.9747


                                                                                             

Epoch 02 | Train 0.0729 | Val 0.0568 | F1 0.9816


                                                                                             

Epoch 03 | Train 0.0519 | Val 0.0521 | F1 0.9833


                                                                                             

Epoch 04 | Train 0.0355 | Val 0.0483 | F1 0.9846


                                                                                             

Epoch 05 | Train 0.0307 | Val 0.0456 | F1 0.9843


                                                                                             

Epoch 06 | Train 0.0245 | Val 0.0520 | F1 0.9825


                                                                                             

Epoch 07 | Train 0.0187 | Val 0.0414 | F1 0.9876


                                                                                             

Epoch 08 | Train 0.0142 | Val 0.0419 | F1 0.9883


                                                                                             

Epoch 09 | Train 0.0123 | Val 0.0438 | F1 0.9896


                                                                                             

Epoch 10 | Train 0.0034 | Val 0.0426 | F1 0.9905
Early stopping at epoch 10 (best F1=0.9876)
 Finished 1f95bff2 | Best F1=0.9876 | Time=5.64 min
 Grid Search Complete:
    hidden_dim      lr  weight_decay  dropout    cfg_id   best_F1  epochs_ran
11         256  0.0010       0.00002      0.5  6b3b9faa  0.990862           8
6          256  0.0010       0.00001      0.4  a207c8ec  0.990733          10
3          128  0.0010       0.00002      0.4  85686737  0.990421           9
7          256  0.0005       0.00001      0.4  292efb59  0.989240          10
9          128  0.0005       0.00002      0.4  f1ed75eb  0.989230          10


#### training model on best config

In [19]:
CONFIG = {
    "seed": 42,
    "batch_size_train": 64,
    "batch_size_eval": 128,
    "hidden_dim": 256,
    "num_layers": 2,
    "dropout": 0.5,
    "bidirectional": True,
    "lr": 1e-3,
    "max_epochs": 20,
    "patience": 3,            # early stopping on val F1
    "pad_id": int(pad_id),
    "max_len": 256,
    "num_classes": 2,
    "weight_decay" : 2e-5
}


# Model/optim/scheduler from your CONFIG
model = BiLSTMClassifier(
    embedding_matrix=embedding_matrix,
    pad_id=CONFIG["pad_id"],
    hidden_dim=CONFIG["hidden_dim"],
    num_layers=CONFIG["num_layers"],
    dropout=CONFIG["dropout"],
    bidirectional=CONFIG["bidirectional"],
    num_classes=CONFIG["num_classes"],
).to(device)

criterion = torch.nn.CrossEntropyLoss()  # or weighted, as earlier
optimizer = torch.optim.Adam(
    filter(lambda p: p.requires_grad, model.parameters()),
    lr=CONFIG["lr"], weight_decay=CONFIG.get("weight_decay", 0.0)
)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, mode="min", factor=0.5, patience=1
)


INFO:biLSTM:BiLSTM Encoder initialized | emb_dim=300, hidden_dim=256, layers=2, bidirectional=True, freeze_embeddings=True


In [None]:
best_f1 = -1.0
best_val_loss = float("inf")
start_epoch = 1

# resume if a checkpoint exists
latest = latest_epoch_path()
if latest:
    ckpt = load_ckpt(latest, model, optimizer, scheduler, map_location=device)
    start_epoch = ckpt["epoch"] + 1
    best_f1 = ckpt.get("best_val_f1", -1.0)
    print(f"Resumed from {latest} at epoch {start_epoch-1}, best_val_f1={best_f1:.4f}")

no_improve = 0
history = []  

for epoch in range(start_epoch, CONFIG["max_epochs"] + 1):
    model.train()
    # Optional: re-seed per epoch to keep determinism stable across resumes
    # torch.manual_seed(CONFIG["seed"] + epoch)
    # torch.cuda.manual_seed_all(CONFIG["seed"] + epoch)

    running_loss = 0.0

    # add tqdm progress bar for training loop
    train_pbar = tqdm(train_dl, desc=f"Epoch {epoch:02d} [Train]", leave=False)
    for xb, yb in train_pbar:
        xb, yb = xb.to(device), yb.to(device)
        optimizer.zero_grad(set_to_none=True)
        logits = model(xb)
        loss = criterion(logits, yb)
        loss.backward()
        if "clip_grad_norm" in CONFIG and CONFIG["clip_grad_norm"]:
            torch.nn.utils.clip_grad_norm_(model.parameters(), CONFIG["clip_grad_norm"])
        optimizer.step()

        running_loss += loss.item()
        train_pbar.set_postfix(loss=f"{loss.item():.4f}")  # show current batch loss

    # compute average train loss for the epoch
    train_loss_avg = running_loss / max(1, len(train_dl))

    # compute validation loss (in addition to F1)
    model.eval()
    val_running_loss = 0.0
    with torch.no_grad():
        val_pbar = tqdm(val_dl, desc=f"Epoch {epoch:02d} [Val]", leave=False)
        for xb, yb in val_pbar:
            xb, yb = xb.to(device), yb.to(device)
            logits = model(xb)
            vloss = criterion(logits, yb)
            val_running_loss += vloss.item()
            val_pbar.set_postfix(vloss=f"{vloss.item():.4f}")

    val_loss_avg = val_running_loss / max(1, len(val_dl))

    p, r, val_f1, acc = evaluate_metrics(val_dl, model, device)
    val_loss_avg = val_running_loss / max(1, len(val_dl))
    scheduler.step(val_loss_avg)
    print(f"epoch {epoch:02d} | train_loss={train_loss_avg:.4f} | val_loss={val_loss_avg:.4f} | "
          f"P={p:.4f} | R={r:.4f} | F1={val_f1:.4f}")

    # record metrics
    history.append({
        "epoch": epoch,
        "train_loss": float(train_loss_avg),
        "val_loss": float(val_loss_avg),
        "val_precision": float(p),
        "val_recall": float(r),
        "val_F1": float(val_f1),
        "val_accuracy": float(acc),
    })

    # save per-epoch checkpoint
    save_ckpt(os.path.join(CKPT_DIR, f"epoch_{epoch:02d}.pt"),
              epoch, model, optimizer, scheduler, CONFIG, best_f1, CONFIG["seed"])

    # update best model based on F1 (for saving)
    if val_f1 > best_f1:
        best_f1 = val_f1
        save_ckpt(os.path.join(best_ckpt_dir, "best.pt"),
                  epoch, model, optimizer, scheduler, CONFIG, best_f1, CONFIG["seed"])

    # early stopping based on val loss
    if val_loss_avg < best_val_loss - 1e-5:  # small tolerance
        best_val_loss = val_loss_avg
        no_improve = 0
    else:
        no_improve += 1
        if no_improve >= CONFIG["patience"]:
            print("Early stopping triggered on validation loss.")
            break

# save training results
history_df = pd.DataFrame(history)  
history_path = os.path.join(CKPT_DIR, "training_history.csv")  
history_df.to_csv(history_path, index=False) 
print("Saved training history to:", history_path) 


                                                                                  

epoch 01 | train_loss=0.1933 | val_loss=0.1206 | P=0.9258 | R=0.9906 | F1=0.9571


                                                                                  

epoch 02 | train_loss=0.0907 | val_loss=0.0682 | P=0.9904 | R=0.9686 | F1=0.9794


                                                                                  

epoch 03 | train_loss=0.0587 | val_loss=0.0454 | P=0.9782 | R=0.9895 | F1=0.9838


                                                                                  

epoch 04 | train_loss=0.0398 | val_loss=0.0486 | P=0.9820 | R=0.9855 | F1=0.9837


                                                                                  

epoch 05 | train_loss=0.0303 | val_loss=0.0419 | P=0.9889 | R=0.9850 | F1=0.9870


                                                                                  

epoch 06 | train_loss=0.0259 | val_loss=0.0415 | P=0.9903 | R=0.9831 | F1=0.9867


                                                                                  

epoch 07 | train_loss=0.0204 | val_loss=0.0468 | P=0.9819 | R=0.9941 | F1=0.9880


                                                                                  

epoch 08 | train_loss=0.0162 | val_loss=0.0413 | P=0.9822 | R=0.9941 | F1=0.9881


                                                                                  

epoch 09 | train_loss=0.0139 | val_loss=0.0505 | P=0.9887 | R=0.9864 | F1=0.9876


                                                                                  

epoch 10 | train_loss=0.0124 | val_loss=0.0515 | P=0.9838 | R=0.9960 | F1=0.9899


                                                                                  

epoch 11 | train_loss=0.0044 | val_loss=0.0508 | P=0.9922 | R=0.9873 | F1=0.9898
Early stopping triggered on validation loss.
Saved training history to: checkpoints/training_history.csv
Saved manifest and embedding matrix.


In [21]:
# reloader 

training_results = pd.read_csv('checkpoints/training_history.csv')
training_results

Unnamed: 0,epoch,train_loss,val_loss,val_precision,val_recall,val_F1,val_accuracy
0,1,0.193334,0.120629,0.925756,0.990626,0.957093,0.952989
1,2,0.090746,0.068228,0.990415,0.968596,0.979384,0.978417
2,3,0.058735,0.04545,0.978221,0.989454,0.983805,0.982759
3,4,0.039831,0.04858,0.982018,0.98547,0.983741,0.982759
4,5,0.030287,0.041949,0.988941,0.985001,0.986967,0.986232
5,6,0.025853,0.041528,0.990321,0.983126,0.986711,0.985984
6,7,0.020409,0.046762,0.981944,0.994141,0.988005,0.987224
7,8,0.016213,0.041266,0.982172,0.994141,0.98812,0.987348
8,9,0.01392,0.050546,0.988724,0.986407,0.987565,0.986852
9,10,0.012399,0.051512,0.983796,0.996016,0.989868,0.989209


In [None]:
# import os
# import json
# import torch
# import numpy as np

# # ------------------------------------
# # 1. Define paths
# # ------------------------------------
# best_ckpt_dir = "best_ckpts"   # <-- change this to your actual folder path
# manifest_path = os.path.join(best_ckpt_dir, "manifest.json")

# # ------------------------------------
# # 2. Load manifest and embedding matrix
# # ------------------------------------
# with open(manifest_path, "r") as f:
#     manifest = json.load(f)

# embedding_matrix = np.load(manifest["embedding_matrix_file"])
# print(f"Loaded embedding matrix: {embedding_matrix.shape}")

# # ------------------------------------
# # 3. Rebuild model architecture
# # ------------------------------------
# from biLSTM import BiLSTMClassifier  # <-- replace with your actual module name

# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# model = BiLSTMClassifier(
#     embedding_matrix=embedding_matrix,
#     pad_id=manifest["pad_id"],
#     hidden_dim=manifest["hidden_dim"],
#     num_layers=manifest["num_layers"],
#     dropout=manifest["dropout"],
#     bidirectional=manifest["bidirectional"],
#     num_classes=manifest["num_classes"],
# ).to(device)

# # ------------------------------------
# # 4. Load weights and set to eval mode
# # ------------------------------------
# ckpt = torch.load(manifest["best_ckpt"], map_location=device)
# model.load_state_dict(ckpt["model"], strict=True)
# model.eval()

# print("✅ BiLSTM model successfully reloaded and ready for inference.")

Loaded embedding matrix: (50000, 300)


INFO:biLSTM:BiLSTM Encoder initialized | emb_dim=300, hidden_dim=256, layers=2, bidirectional=True, freeze_embeddings=True
  ckpt = torch.load(manifest["best_ckpt"], map_location=device)


✅ BiLSTM model successfully reloaded and ready for inference.


In [None]:
import numpy as np
import json
from sklearn.metrics import precision_recall_fscore_support

# collect validation probabilities and labels
probs, labels = [], []
model.eval()
with torch.no_grad():
    for xb, yb in val_dl:
        xb = xb.to(device)
        logits = model(xb)                          # [b, 2]
        p = torch.softmax(logits, dim=1)[:, 1]      # prob of class 1
        probs.append(p.cpu().numpy())
        labels.append(yb.numpy())

probs = np.concatenate(probs)
labels = np.concatenate(labels)

# baseline at threshold = 0.5
y_hat_05 = (probs >= 0.5).astype(int)
p05, r05, f105, _ = precision_recall_fscore_support(labels, y_hat_05, average="binary", zero_division=0)

# sweep thresholds to find best f1
thr_grid = np.linspace(0.0, 1.0, 1001)  # 0.001 step
best = {"f1": -1.0, "thr": 0.5, "p": 0.0, "r": 0.0}
for t in thr_grid:
    y_hat = (probs >= t).astype(int)
    p, r, f1, _ = precision_recall_fscore_support(labels, y_hat, average="binary", zero_division=0)
    if f1 > best["f1"]:
        best.update({"f1": float(f1), "thr": float(t), "p": float(p), "r": float(r)})

print("\n=== threshold tuning (validation) ===")
print(f"baseline @ 0.5 ->  f1={f105:.4f}  p={p05:.4f}  r={r05:.4f}")
print(f"tuned    @ {best['thr']:.3f} ->  f1={best['f1']:.4f}  p={best['p']:.4f}  r={best['r']:.4f}")

# # save tuned threshold back to manifest.json (idempotent update)
# with open(manifest_path, "r") as f:
#     manifest = json.load(f)
# manifest["val_threshold"] = best["thr"]
# with open(manifest_path, "w") as f:
#     json.dump(manifest, f, indent=2)

# print(f"✔ saved tuned threshold ({best['thr']:.3f}) to: {manifest_path}")


=== threshold tuning (validation) ===
baseline @ 0.5 ->  f1=0.9899  p=0.9838  r=0.9960
tuned    @ 0.734 ->  f1=0.9901  p=0.9865  r=0.9937
✔ saved tuned threshold (0.734) to: best_ckpts/manifest.json


In [None]:
# === final manifest saving after threshold tuning ===

# save embedding matrix (if not already saved)
embedding_path = os.path.join(best_ckpt_dir, "embedding_matrix.npy")
np.save(embedding_path, embedding_matrix)

# create the manifest dict including tuned threshold
manifest = {
    "seed": CONFIG["seed"],
    "pad_id": CONFIG["pad_id"],
    "max_len": CONFIG["max_len"],
    "hidden_dim": CONFIG["hidden_dim"],
    "num_layers": CONFIG["num_layers"],
    "dropout": CONFIG["dropout"],
    "weight_decay": CONFIG["weight_decay"],
    "bidirectional": CONFIG["bidirectional"],
    "num_classes": CONFIG["num_classes"],

    # model + resources
    "embedding_matrix_file": embedding_path,
    "sp_model_path": "full_220/email_sp.model",
    "best_ckpt": os.path.join(best_ckpt_dir, "best.pt"),

    # add tuned threshold
    "val_threshold": float(best["thr"]),


}

# write manifest to JSON file
manifest_path = os.path.join(best_ckpt_dir, "manifest.json")
with open(manifest_path, "w") as f:
    json.dump(manifest, f, indent=2)

print(f"✅ Saved final manifest with tuned threshold to: {manifest_path}")