In [6]:
from data_classes.TextLightningDataModule import TextLightningDataModule
from models.ClassifierSystem import LightningClassifier
from data_classes.pretrained_embeddings import get_pretrained_embeddings
from pytorch_lightning.loggers import TensorBoardLogger, CSVLogger
from pytorch_lightning import Trainer
from pytorch_lightning.callbacks.early_stopping import EarlyStopping


In [7]:
# Data and model settings
dataset = "IMDB"
num_class = 2
embedding = "Glove"
max_vectors = 20000
dim = 300
trunc=234+2*173


# Training settings
max_epochs=20
patience=6
monitor="Val Loss"
lr=1e-3
batch_size=32
num_workers=0
advanced_metrics=False

num_layers=1
output_layer_type="linear"

# Log file:
log_file = "exp2"
log_file_csv = "exp2_csv"


In [8]:
vocab, vectors = get_pretrained_embeddings(embedding =embedding, max_vectors = max_vectors, dim = dim)

In [9]:
imdb_data = TextLightningDataModule(vocab, dataset=dataset, batch_size=batch_size, num_workers=num_workers, trunc=trunc)

In [10]:
for model_type in ["LSTM", "GRU", "RNN"]:
    for attention_type in [None, "last_hidden_layer", "self"]:
        for hidden_size in [100]:
            if attention_type is None:
                name = log_file + "-" + model_type + "-none-" + str(hidden_size)
            else:
                name = log_file + "-" + model_type + "-" + attention_type + "-" + str(hidden_size)
            logger_tensor = TensorBoardLogger(log_file, name=name)
            logger_csv = CSVLogger(log_file_csv, name=name)
            trainer = Trainer(max_epochs=max_epochs, gpus=1, auto_select_gpus=True, callbacks=[EarlyStopping(monitor=monitor, patience=patience)], logger=[logger_tensor, logger_csv])
            classifier = LightningClassifier(num_class=num_class, vocab=vocab, vectors=vectors, embedding_size=dim, learning_rate=lr, num_layers=num_layers, hidden_size=hidden_size, model_type=model_type, attention_type=attention_type, output_layer_type=output_layer_type, advanced_metrics=advanced_metrics)
            trainer.fit(classifier, imdb_data)
            trainer.test(ckpt_path="best")


In [11]:
# param_count = {}
# pytorch_total_params = sum(p.numel() for p in classifier.parameters() if p.requires_grad)
# param_count[name] = pytorch_total_params

# {'exp2-LSTM-none-100': 161002,
#  'exp2-LSTM-last_hidden_layer-100': 161002,
#  'exp2-LSTM-self-100': 161002,
#  'exp2-GRU-none-100': 120802,
#  'exp2-GRU-last_hidden_layer-100': 120802,
#  'exp2-GRU-self-100': 120802,
#  'exp2-RNN-none-100': 40402,
#  'exp2-RNN-last_hidden_layer-100': 40402,
#  'exp2-RNN-self-100': 40402}