# Experiment 3: Word level embedding

In [None]:
from data_classes.TextLightningDataModule import TextLightningDataModule
from models.ClassifierSystem import LightningClassifier
from data_classes.pretrained_embeddings import get_pretrained_embeddings
from pytorch_lightning.loggers import TensorBoardLogger, CSVLogger
from pytorch_lightning import Trainer
from pytorch_lightning.callbacks.early_stopping import EarlyStopping

from pytorch_lightning.callbacks.gpu_stats_monitor import GPUStatsMonitor
from pytorch_lightning.callbacks import ModelCheckpoint

In [None]:
# Data and model settings
dataset = "IMDB"
num_class = 2
embeddings = ["Glove", "FastText", "Word2Vec"]
max_vectors = 20000
dim = 300
trunc = 234+2*173


# Training settings
max_epochs = 20
patience = 6
monitor = "Val Loss"
lr = 1e-3
batch_size = 32
num_workers = 0
advanced_metrics = False

num_layers = 1
output_layer_type = "linear"
embedding_level = "word"
hidden_size = 100

# Log file:
log_file = "exp3"
log_file_csv = "exp3_csv"


In [None]:
data = {}
vocab = {}
vectors = {}
for embedding in embeddings:
    vocab[embedding], vectors[embedding] = get_pretrained_embeddings(
        embedding=embedding, max_vectors=max_vectors, dim=dim)
    data[embedding] = TextLightningDataModule(vocab[embedding], dataset=dataset, batch_size=batch_size, num_workers=num_workers, trunc=trunc)


In [None]:
for model_type in ["LSTM", "GRU"]:
    for embedding in embeddings:
        name = log_file + "-" + model_type + "-" + embedding
        logger_tensor = TensorBoardLogger(log_file, name=name)
        logger_csv = CSVLogger(log_file_csv, name=name)
        checkpoint_callback = ModelCheckpoint(monitor=monitor)
        trainer = Trainer(max_epochs=max_epochs, gpus=1, auto_select_gpus=True, callbacks=[
                          EarlyStopping(monitor=monitor, patience=patience)], logger=[logger_tensor, logger_csv])
        classifier = LightningClassifier(embedding_level=embedding_level, num_class=num_class, vocab=vocab[embedding], vectors=vectors[
                                         embedding], embedding_size=dim, learning_rate=lr, model_type=model_type, output_layer_type=output_layer_type, advanced_metrics=advanced_metrics, hidden_size=hidden_size)
        trainer.fit(classifier, data[embedding])
        trainer.test(ckpt_path="best")
