In [None]:
%load_ext autoreload
%autoreload 2

import os

# Set project root
repo_name = "\\thesis"
os.chdir(os.getcwd().split(repo_name)[0] + repo_name)
print(f'Changed working directory to: {os.getcwd()}')

#### 1. Corpus Example

In [1]:
import pickle

# Load data
examples = pickle.load(open("data/examples.pkl", "rb"))
vocab = pickle.load(open("data/vocab.pkl", "rb"))

len(examples)

588775

#### 2. Run Experiment

In [2]:
from src.model_training import train_one_bucket, TrainConfig
from src.dataloaders import bucket_examples_by_distance


# Define distance buckets (inclusive ranges)
distance_buckets = [
    (32, 63),
    (64, 127),
    (128, 255),
    (256, 511),
    (512, 1023)
]

# Bucket all examples
bucketed = bucket_examples_by_distance(examples, distance_buckets)

# Print size by bucket dict keys
for bucket in distance_buckets:
    print(f"Bucket {bucket}: {len(bucketed[bucket])} examples")


Bucket (32, 63): 274333 examples
Bucket (64, 127): 232721 examples
Bucket (128, 255): 77093 examples
Bucket (256, 511): 4593 examples
Bucket (512, 1023): 35 examples


In [7]:
import random
import time
from src.model_training import set_seed
from src.model_training import train_one_bucket, train_one_bucket_lstm

print("Starting training on distance buckets...\n")

vals_acc = {}

for i, bucket in enumerate(distance_buckets):

    print(f"Training on bucket {i}")
    print(f"Bucket range: {bucket[0]}–{bucket[1]} tokens")
    # Get first bucket
    bucket = bucketed[distance_buckets[i]]
    examples = len(bucket)

    max_len = distance_buckets[i][1]

    cfg = TrainConfig(
        emb_dim=256,
        hidden_dim=256,
        bidirectional=True,
        max_len=max_len,
        epochs=15,
        alpha=0.95,
        batch_size=512,
        lr=0.0007,
        seed=42,
    )
    set_seed(cfg.seed)
    random.shuffle(bucket)
    print("="*100)
    print(f"Training on bucket {i} FOFENet")
    print("="*100)
    start_time = time.time()
    val_acc_fofe = train_one_bucket(bucket, max_len, vocab, cfg)
    end_time = time.time()
    print(f"Training time: {end_time - start_time:.2f} seconds")
    # Save accuracy
    vals_acc[("fofe", max_len)] = val_acc_fofe

    print("="*100)
    print(f"Training on bucket {i} BiLSTM")
    print("="*100)
    start_time = time.time()
    val_acc_lstm = train_one_bucket_lstm(bucket, max_len, vocab, cfg)
    end_time = time.time()
    print(f"Training time: {end_time - start_time:.2f} seconds")
    # Save accuracy
    vals_acc[("lstm", max_len)] = val_acc_lstm

Starting training on distance buckets...

Training on bucket 0
Bucket range: 32–63 tokens
Training on bucket 0 FOFENet
Number of parameters: 42297346
Class balance: neg=64.87%, pos=35.13%
Training bucket of size 129991 (train 103992, val 25999)
Epoch 1/15 | Train Loss: 0.6821 Train Acc: 56.37% | Val Loss: 0.6733 Val Acc: 59.26% | LR: 0.000700
Epoch 2/15 | Train Loss: 0.6650 Train Acc: 60.05% | Val Loss: 0.6707 Val Acc: 62.48% | LR: 0.000700
Epoch 3/15 | Train Loss: 0.6541 Train Acc: 62.17% | Val Loss: 0.6668 Val Acc: 62.06% | LR: 0.000700
Epoch 4/15 | Train Loss: 0.6423 Train Acc: 63.96% | Val Loss: 0.6679 Val Acc: 57.63% | LR: 0.000700
Epoch 5/15 | Train Loss: 0.6275 Train Acc: 66.01% | Val Loss: 0.6727 Val Acc: 64.07% | LR: 0.000700
Epoch 6/15 | Train Loss: 0.6094 Train Acc: 68.38% | Val Loss: 0.6687 Val Acc: 61.76% | LR: 0.000350
Epoch 7/15 | Train Loss: 0.5715 Train Acc: 72.73% | Val Loss: 0.6906 Val Acc: 63.50% | LR: 0.000350
Epoch 8/15 | Train Loss: 0.5484 Train Acc: 75.27% | Val

KeyboardInterrupt: 