In [1]:
import torch

# Import your custom modules
from config.Experiment_Config import (
    ExperimentConfig,
    ModelSettings,
    TrainingSettings,
    DataSettings,
    ModelSelection,
    TokenizerSettings
)
from models.LSTM.BiLSTM import BiLSTM
from models.LSTM.CNNBiLSTM import CNNBiLSTM
from utils.load_split import loader
from exp.trainer import Trainer
import matplotlib.pyplot as plt
import joblib


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
model_settings = ModelSettings(
    output_dim=38,   # Number of classes
    init_hidden_dim=256,
    num_layers=2,
    embedding_dim=768,
    embedding_type='BERT_base_uncased',
    dropout_rate=0.1,
	weight_init='orthogonal',
	init_gain=1.0,
	init_std=0.02,
)

In [3]:
training_settings = TrainingSettings(
    num_epochs=100,
    batch_size=256,
    learning_rate=0.001,
    weight_decay=0.01,
    gradient_clip=1.0,
    early_stopping_patience=5,
    save_model_dir='saved_models',
    save_results_dir='results',
	optimizer_type='adamw',
	scheduler_patience=5,
)


In [4]:
data_settings = DataSettings(
    which="question",
    train_size=0.8,
    val_size=0.1,
    test_size=0.1,
    batch_size=32,
    shuffle=True,
    drop_last=True
)

In [5]:
model_selection = ModelSelection(
    model_type='BiLSTM',
    use_cnn=True,
    cnn_layers=2
)

In [6]:
tokenizer_settings = TokenizerSettings(
    name="BERT_base_uncased",
    embedding_type="bert",
    truncation="ratio",  # or "equal"
    max_length=512,
    padding=True,
    add_special_tokens=True
)

In [7]:
config = ExperimentConfig(
    model_settings=model_settings,
    training_settings=training_settings,
    data_settings=data_settings,
    model_selection=model_selection,
    tokenizer_settings=tokenizer_settings
)

In [8]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
config.training_settings.device = device

print(f"Using device: {device}")
print(f"Model type: {config.model_selection.model_type}")

Using device: cpu
Model type: BiLSTM


In [9]:
train_loader, val_loader, test_loader, label_encoder = loader(config)
joblib.dump(label_encoder, 'label_encoder.pkl')
num_classes = len(label_encoder.classes_)
config.model_settings.output_dim = num_classes

Token indices sequence length is longer than the specified maximum sequence length for this model (1024 > 512). Running this sequence through the model will result in indexing errors


In [10]:
if config.model_selection.model_type == 'BiLSTM':
    model = BiLSTM(config)
elif config.model_selection.model_type == 'CNNBiLSTM':
    model = CNNBiLSTM(config)

model = model.to(device)

In [11]:
trainer = Trainer(
    model=model,
    train_loader=train_loader,
    val_loader=val_loader,
    test_loader=test_loader,
    config=config
)

# Train model
print("Starting training...")
metrics_history = trainer.train()

Starting training...
Training on device: cpu
Experiment name: BiLSTM_l2_h256_lr0.001_20241218_222922

Epoch 1/100 - train


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
Training:   0%|          | 0/127 [00:00<?, ?it/s]TOKENIZERS_PARALLELISM=(true | false)
We strongly recommend passing in an `attention_mask` since your input_ids may be padded. See https://huggingface.co/docs/transformers/troubleshooting#incorrect-output-when-padding-tokens-arent-masked.
Training:   2%|▏         | 2/127 [00:20<21:10, 10.16s/it]


KeyboardInterrupt: 

In [None]:
%matplotlib inline

In [None]:
plt.figure(figsize=(10, 5))
plt.plot(trainer.train_losses, label='Training Loss')
plt.plot(trainer.val_losses, label='Validation Loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()

# Print final test results
print("\nTest Results:")
test_metrics, _, _ = trainer.evaluate(test_loader, 'test')
for metric, value in test_metrics.items():
    print(f"{metric}: {value:.4f}")