In [1]:
# ## KAGGLE ONLY
# from shutil import copyfile
# copyfile(src="../input/scriptandpickle/generate_dataloaders.py", dst="../working/generate_dataloaders.py")
# copyfile(src="../input/scriptssss/model.py", dst="../working/model.py")
# copyfile(src="../input/newevaluation/evaluation.py", dst="../working/evaluation.py")

# copyfile(src="../input/newfiles/train_dataloader_lstm.p", dst="../working/train_dataloader_lstm.p")
# copyfile(src="../input/newfiles/val_dataloader_lstm.p", dst="../working/val_dataloader_lstm.p")
# copyfile(src="../input/newfiles/dictionary_lstm.p", dst="../working/dictionary.p")
# copyfile(src="../input/newfiles/train_unlabeled_dataloader_lstm.p", dst="../working/train_unlabelled_dataloader_lstm.p")
# copyfile(src="../input/newfiles/train_labeled_dataloader_lstm.p", dst="../working/train_labelled_dataloader_lstm.p")

In [2]:
import numpy as np
import matplotlib.pyplot as plt

import torch
from torch import nn
from torch import optim
from torch.utils.data import DataLoader
import torch.nn.functional as F

#from datasets import get_mnist_dataset, get_data_loader
#from utils import *
#from models import *

import pickle as pkl
import os
import datetime as dt
import pandas as pd
import random

from generate_dataloaders import *

from tqdm import tqdm_notebook as tqdm

import evaluation
import importlib
importlib.reload(evaluation)

<module 'evaluation' from '/Users/elliotsilva/Desktop/DS-GA-1006/FairFrame/evaluation.py'>

## Get Dataloaders

In [3]:
seed = 1029
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)  # if you are using multi-GPU.
np.random.seed(seed)  # Numpy module.
random.seed(seed)  # Python random module.
torch.manual_seed(seed)
torch.backends.cudnn.enabled = False 
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True

if torch.cuda.is_available(): torch.cuda.manual_seed_all(seed)

def _init_fn(worker_id):
    np.random.seed(int(seed))

In [5]:
path = os.getcwd()
data_dir = path + '/'
data_dir = path +'/data/' #Uncomment for local system

#### *Verify filenames are consistent*

In [11]:
train_loader_labelled = pkl.load(open(data_dir + 'train_labeled_dataloader_lstm.p','rb'))
train_loader_unlabelled = pkl.load(open(data_dir + 'train_unlabeled_dataloader_lstm.p','rb'))
val_loader = pkl.load(open(data_dir + 'val_dataloader_lstm.p','rb'))

In [12]:
review_dict = pkl.load(open(data_dir + 'dictionary.p','rb'))

In [13]:
#%conda install pytorch torchvision -c pytorch
## if torch.__version__ is not 1.3.1, run this cell then restart kernel

In [14]:
print(torch.__version__)

1.3.1


## PRE TRAINED WORD EMBEDDINGS 

In [15]:
def get_coefs(word, *arr):
    return word, np.asarray(arr, dtype='float16')

In [16]:
def load_embeddings(path):
    with open(path) as f:
        return dict(get_coefs(*line.strip().split(' ')) for line in tqdm(f))

In [17]:
def build_matrix(review_dict, embedding_index ,dim = 200):
#     embedding_index = load_embeddings(path)
    embedding_matrix = np.zeros((len(review_dict.tokens), dim))
    unknown_words = []
    
    for word, i in review_dict.ids.items():
        try:
            embedding_matrix[i] = embedding_index[word]
        except KeyError:
            unknown_words.append(word)
    return embedding_matrix, unknown_words

In [20]:
## LOCAL - 2nd line // KAGGLE -- 1st line
#glove_twitter = '../input/glove-global-vectors-for-word-representation/glove.twitter.27B.200d.txt' #Change loc for local system
glove_twitter = data_dir + 'glove.twitter.27B.200d.txt'

In [21]:
embedding_index = load_embeddings(glove_twitter)

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))




In [22]:
glove_embedding_index,unknown_words = build_matrix(review_dict, embedding_index)
del embedding_index

In [23]:
len(review_dict.tokens)

16256

In [24]:
len(unknown_words)

4428

In [25]:
# for word in unknown_words:
#     print(word)

In [26]:
review_dict.get_id('great')

34

## Neural Network LSTM Class

NOTE: Data loader is defined as:
- tuple: (tokens, flagged_index, problematic)

In [27]:
def freeze_model(model):
    for param in model.parameters():
        param.requires_grad = False
        
def unfreeze_model(model):
    for param in model.parameters():
        param.requires_grad = True

In [128]:
class LSTM_model(nn.Module):
    """
    LSTM classification model using pretrained glove embeddings
    """
    # NOTE: we can't use linear layer until we take weighted average, otherwise it will
    # remember certain positions incorrectly (ie, 4th word has bigger weights vs 7th word)
    def __init__(self, opts):
        super(LSTM_model, self).__init__()
        self.embedding_matrix = opts['embedding_matrix']
        self.vocab_size = self.embedding_matrix.shape[0]
        self.embed_size = self.embedding_matrix.shape[1]

        self.num_hidden_layers = opts['num_hidden_layers']
        self.hidden_size = opts['hidden_size']
        self.dropout = opts['dropout']
        self.num_classes = 2
        self.lambda_loss = opts['lambda_loss']
        
        self.embed = nn.Embedding(self.vocab_size, self.embed_size, padding_idx=0)    
        self.embed.weight = nn.Parameter(torch.tensor(self.embedding_matrix, dtype=torch.float32))
        self.embed.weight.requires_grad = False

        self.lstm = nn.LSTM(self.embed_size, self.hidden_size, self.num_hidden_layers, batch_first=True, dropout=self.dropout, bidirectional=True, bias=True)
        
        self.projection = nn.Linear(2*self.hidden_size, self.num_classes, bias=True)

    
    def forward(self, tokens, flagged_index):
        batch_size, num_tokens = tokens.shape
        embedding = self.embed(tokens)
#         print(embedding.shape) # below assumes "batch_size x num_tokens x Emb_dim" (VERIFY)
        
        lstm_output = self.lstm(embedding)
        # lstm_output is a tuple containing lstm output and (hidden_state, lstm_cell). 
        # lstm_output[0] would be of shape "batch_size x num_tokens x hidden_size" (VERIFY)
        
        logits = self.projection(lstm_output[0])
        # logits would be of shape "batch_size x num_tokens x num_classes (2)" (VERIFY)
        
        batch_size, _, __ = logits.shape
        
        #selecting the logit at the flagged index
        relevant_logits = logits[list(range(batch_size)),flagged_index]
        # relevant_logits would be of shape "batch_size x num_classes (2)" (VERIFY)
        
        return relevant_logits

## First performing fully supervised learning using the labelled set to train new vector representations

In [43]:
num_gpus = torch.cuda.device_count()
if num_gpus > 0:
    current_device = 'cuda'
else:
    current_device = 'cpu'

opts = {
    'embedding_matrix': glove_embedding_index,
    'num_hidden_layers': 3,
    'hidden_size': 100,
    'num_unfrozen_epochs': 0,
    'dropout': .1
}
model = LSTM_model(opts).to(current_device)

In [44]:
criterion = nn.CrossEntropyLoss(reduction='sum')
#optimizer = torch.optim.Adam(model.parameters(), 0.01, amsgrad=True)

## Supervised model training

In [150]:
def train_supervised_model(model, criterion, train_loader_labelled, valid_loader, num_frozen_epochs=10, num_unfrozen_epochs=0, path_to_save=None, print_every=1000, debug_mode=False):

    train_losses=[]
    val_losses=[]
    num_gpus = torch.cuda.device_count()
    if num_gpus > 0:
        current_device = 'cuda'
    else:
        current_device = 'cpu'
    
    empty_centroids = torch.tensor([])
    # freeze part    
    optimizer = torch.optim.Adam(model.parameters(), 0.01, amsgrad=True)
    
    for epoch in range(num_frozen_epochs):
        print('{} | Epoch {}'.format(dt.datetime.now(), epoch))
        model.train()
        total_epoch_loss = 0
        
        for i,(tokens_labelled, labels, flagged_indices_labelled) in tqdm(enumerate(train_loader_labelled)):
            
            tokens_labelled = tokens_labelled.to(current_device)
            flagged_indices_labelled = flagged_indices_labelled.to(current_device)
            labels = labels.to(current_device)

            # forward pass and compute loss
            logits = model(tokens_labelled,flagged_indices_labelled)
            
            loss = criterion(logits, labels)
        
            # run update step
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            #Add loss to the epoch loss
            total_epoch_loss += loss.detach()

            if i % print_every == 0:
                losses = loss/len(tokens_labelled)
                print('Average training loss at batch ',i,': %.3f' % losses)
            
        total_epoch_loss /= len(train_loader_labelled.dataset)
        total_epoch_loss = total_epoch_loss.detach()
        train_losses.append(total_epoch_loss)
        print('Average training loss after epoch ',epoch,': %.3f' % total_epoch_loss)
        
        # calculate validation loss after every epoch
        total_validation_loss = 0
        for i, (tokens, labels, flagged_indices) in enumerate(valid_loader):
            model.eval()
            tokens = tokens.to(current_device)
            labels = labels.to(current_device)
            flagged_indices = flagged_indices.to(current_device)
            
            # forward pass and compute loss
            logits = model(tokens,flagged_indices)
            
            loss = criterion(logits, labels)
            
            #Add loss to the validation loss
            total_validation_loss += loss

        total_validation_loss /= len(valid_loader.dataset)
        val_losses.append(total_validation_loss)
        print('Average validation loss after epoch ',epoch,': %.3f' % total_validation_loss)
        if debug_mode:
            print('Train result:')
            TP_cluster, FP_cluster, _ =evaluation.main(model, empty_centroids, train_loader_labelled, criterion, data_dir, current_device)
            print()
            print('Validation result:')
            TP_cluster, FP_cluster, _ =evaluation.main(model, empty_centroids, valid_loader, criterion, data_dir, current_device)
        
        if path_to_save == None:
            pass
        else:
            opts = {"embedding_matrix":model.embedding_matrix,\
                    "num_hidden_layers":model.num_hidden_layers,\
                    "hidden_size":model.hidden_size,\
                    "num_classes":model.num_classes}
            torch.save(model.state_dict(), path_to_save + 'model_dict_labelled.pt')
            torch.save(train_losses, path_to_save + 'train_losses_labelled')
            torch.save(val_losses, path_to_save + 'val_losses_labelled')
            torch.save(opts, path_to_save + 'opts_labelled')

    # unfreeze part
    unfreeze_model(model)
    print("*** UNFREEZING ***")    

    optimizer = torch.optim.Adam(model.parameters(), 0.01, amsgrad=True)
    
    for epoch in range(num_unfrozen_epochs):
        print('{} | Epoch {}'.format(dt.datetime.now(), epoch))
        model.train()
        total_epoch_loss = 0

        for i,(tokens_labelled, labels, flagged_indices_labelled) in tqdm(enumerate(train_loader_labelled)):
            
            tokens_labelled = tokens_labelled.to(current_device)
            flagged_indices_labelled = flagged_indices_labelled.to(current_device)
            labels = labels.to(current_device)

            # forward pass and compute loss
            logits = model(tokens_labelled,flagged_indices_labelled)
            
            loss = criterion(logits, labels)
        
            # run update step
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            #Add loss to the epoch loss
            total_epoch_loss += loss.detach()

            if i % print_every == 0:
                losses = loss/len(tokens_labelled)
                print('Average training loss at batch ',i,': %.3f' % losses)
            
        total_epoch_loss /= len(train_loader_labelled.dataset)
        total_epoch_loss = total_epoch_loss.detach()
        train_losses.append(total_epoch_loss)
        print('Average training loss after epoch ',epoch,': %.3f' % total_epoch_loss)
        
        # calculate validation loss after every epoch
        total_validation_loss = 0
        for i, (tokens, labels, flagged_indices) in enumerate(valid_loader):
            model.eval()
            tokens = tokens.to(current_device)
            labels = labels.to(current_device)
            flagged_indices = flagged_indices.to(current_device)
            
            # forward pass and compute loss
            logits = model(tokens,flagged_indices)
            
            loss = criterion(logits, labels)
            
            #Add loss to the validation loss
            total_validation_loss += loss

        total_validation_loss /= len(valid_loader.dataset)
        val_losses.append(total_validation_loss)
        print('Average validation loss after epoch ',epoch,': %.3f' % total_validation_loss)
        if debug_mode:
            print('Train result:')
            TP_cluster, FP_cluster, _ =evaluation.main(model, empty_centroids, train_loader_labelled, criterion, data_dir, current_device)
            print()
            print('Validation result:')
            TP_cluster, FP_cluster, _ =evaluation.main(model, empty_centroids, valid_loader, criterion, data_dir, current_device)
        
        
        if path_to_save == None:
            pass
        else:
            opts = {"embedding_matrix":model.embedding_matrix,\
                    "num_hidden_layers":model.num_hidden_layers,\
                    "hidden_size":model.hidden_size,\
                    "num_classes":model.num_classes}
            torch.save(model.state_dict(), path_to_save + 'model_dict_labelled.pt')
            torch.save(train_losses, path_to_save + 'train_losses_labelled')
            torch.save(val_losses, path_to_save + 'val_losses_labelled')
            torch.save(opts, path_to_save + 'opts_labelled')

    return model, train_losses, val_losses

### Clustering Stuff

In [100]:
class KMeansCriterion(nn.Module):
    
    def __init__(self):
        super().__init__()
    
    def forward(self, embeddings, centroids, labelled = False,  cluster_assignments = None):
        if labelled:
            num_reviews = len(cluster_assignments)
            distances = torch.sum((embeddings[:, None, :] - centroids)**2, 2)
            cluster_distances = distances[list(range(num_reviews)),cluster_assignments]
            loss = cluster_distances.sum()
        else:
            distances = torch.sum((embeddings[:, None, :] - centroids)**2, 2)
            cluster_distances, cluster_assignments = distances.min(1)
            loss = cluster_distances.sum()
        return loss, cluster_assignments

In [101]:
def centroid_init(k, d, dataloader, model, current_device):
    ## Here we ideally don't want to do randomized/zero initialization
    centroid_sums = torch.zeros(k, d).to(current_device)
    centroid_counts = torch.zeros(k).to(current_device)
    for (tokens, labels, flagged_indices) in dataloader:
        # cluster_assignments = torch.LongTensor(tokens.size(0)).random_(k)
        cluster_assignments = labels.to(current_device)
        
        model.eval()
        sentence_embed = model(tokens.to(current_device),flagged_indices.to(current_device))
    
        update_clusters(centroid_sums.detach(), centroid_counts.detach(),
                        cluster_assignments.detach(), sentence_embed.to(current_device).detach())
    
    centroid_means = centroid_sums / centroid_counts[:, None].to(current_device)
    return centroid_means.clone()

def update_clusters(centroid_sums, centroid_counts,
                    cluster_assignments, embeddings):
    k = centroid_sums.size(0)

    centroid_sums.index_add_(0, cluster_assignments, embeddings)
    bin_counts = torch.bincount(cluster_assignments,minlength=k).type(torch.FloatTensor).to(current_device)
    centroid_counts.add_(bin_counts)

## Dataloader stuff

In [40]:
def loadLabelledBatch(train_loader_labelled_iter, train_loader_labelled):
    try:
        tokens, labels, flagged_indices = next(train_loader_labelled_iter)
    except StopIteration:
        train_loader_labelled_iter = iter(train_loader_labelled)
        tokens, labels, flagged_indices = next(train_loader_labelled_iter)

    return tokens, labels, flagged_indices, train_loader_labelled_iter


def loadUnlabelledBatch(train_loader_unlabelled_iter, train_loader_unlabelled):
    try:
        tokens, labels, flagged_indices = next(train_loader_unlabelled_iter)
    except StopIteration:
        train_loader_unlabelled_iter = iter(train_loader_unlabelled)
        tokens, labels, flagged_indices = next(train_loader_unlabelled_iter)

    return tokens, labels, flagged_indices, train_loader_unlabelled_iter

## Unsupervised Training

In [102]:
def train_clusters(model, centroids, criterion, train_loader_labelled, train_loader_unlabelled, valid_loader, num_epochs=15, num_batches = 1000, path_to_save=None, print_every = 1000):

    train_loader_labelled_iter = iter(train_loader_labelled)
    train_loader_unlabelled_iter = iter(train_loader_unlabelled)
    lambda_loss = model.lambda_loss

    train_losses=[]
    val_losses=[]
    num_gpus = torch.cuda.device_count()
    if num_gpus > 0:
        current_device = 'cuda'
    else:
        current_device = 'cpu'
    
    optimizer = torch.optim.Adam(model.parameters(), 0.01, amsgrad=True)
    
    for epoch in range(num_epochs):
        print('{} | Epoch {}'.format(dt.datetime.now(), epoch))
        model.eval() # we're only clustering, not training model
        k, d = centroids.size()
        centroid_sums = torch.zeros_like(centroids).to(current_device)
        centroid_counts = torch.zeros(k).to(current_device)
        total_epoch_loss = 0
        
        for i in tqdm(range(int(num_batches))):
            tokens_labelled, labels, flagged_indices_labelled, train_loader_labelled_iter = loadLabelledBatch(train_loader_labelled_iter, train_loader_labelled)
            tokens_unlabelled, _, flagged_indices_unlabelled, train_loader_unlabelled_iter = loadUnlabelledBatch(train_loader_unlabelled_iter, train_loader_unlabelled)

            tokens_labelled = tokens_labelled.to(current_device)
            labels = labels.to(current_device)
            flagged_indices_labelled = flagged_indices_labelled.to(current_device)
            
            tokens_unlabelled = tokens_unlabelled.to(current_device)
            flagged_indices_unlabelled = flagged_indices_unlabelled.to(current_device)

            # forward pass and compute loss
            sentence_embed_labelled = model(tokens_labelled,flagged_indices_labelled)
            sentence_embed_unlabelled = model(tokens_unlabelled,flagged_indices_unlabelled)
            
            cluster_loss_unlabelled, cluster_assignments_unlabelled = criterion(sentence_embed_unlabelled, centroids.detach())
            cluster_loss_labelled, cluster_assignments_labelled = criterion(sentence_embed_labelled, centroids.detach(), labelled = True, cluster_assignments = labels)
    
            total_batch_loss = cluster_loss_unlabelled.data + lambda_loss * cluster_loss_labelled.data
            
#             #Add loss to the epoch loss
            total_epoch_loss += total_batch_loss.data

#             # store centroid sums and counts in memory for later centering
            update_clusters(centroid_sums.detach(), centroid_counts.detach(),
                            cluster_assignments_labelled.detach(), sentence_embed_labelled.detach())
    
            update_clusters(centroid_sums.detach(), centroid_counts.detach(),
                            cluster_assignments_unlabelled.detach(), sentence_embed_unlabelled.detach())

            if i % print_every == 0:
                losses = total_batch_loss/(len(tokens_labelled)+ len(tokens_unlabelled))
                print('Average training loss at batch ',i,': %.3f' % losses)
            
        total_epoch_loss /= (len(train_loader_labelled.dataset)+len(train_loader_unlabelled.dataset))
        train_losses.append(total_epoch_loss)
        print('Average training loss after epoch ',epoch,': %.3f' % total_epoch_loss)
        
        # update centroids based on assignments from autoencoders
        centroids = centroid_sums / (centroid_counts[:, None] + 1).to(current_device)
        
        # calculate validation loss after every epoch
        total_validation_loss = 0
        for i, (tokens, labels, flagged_indices) in enumerate(valid_loader):
            model.eval()
            tokens = tokens.to(current_device)
            labels = labels.to(current_device)
            flagged_indices = flagged_indices.to(current_device)
            
            # forward pass and compute loss
            sentence_embed = model(tokens,flagged_indices)
            cluster_loss, cluster_assignments = criterion(sentence_embed, centroids)
            
            #Add loss to the validation loss
            total_validation_loss += cluster_loss.data

        total_validation_loss /= len(valid_loader.dataset)
        val_losses.append(total_validation_loss)
        print('Average validation loss after epoch ',epoch,': %.3f' % total_validation_loss)
        
        if path_to_save == None:
            pass
        else:
            opts = {"embedding_matrix":model.embedding_matrix,\
                    "num_hidden_layers":model.num_hidden_layers,\
                    "hidden_size":model.hidden_size,\
                    "num_classes":model.num_classes}
            torch.save(model.state_dict(), path_to_save+'model_dict_unlabelled.pt')
            torch.save(centroids, path_to_save+'centroids_unlabelled')
            torch.save(train_losses, path_to_save+'train_losses_unlabelled')
            torch.save(val_losses, path_to_save+'val_losses_unlabelled')
            torch.save(opts, path_to_save+'opts_unlabelled')
        
    return model, centroids, train_losses, val_losses

# Hyperparameter Tuning

In [103]:
def get_save_directory(opts):
    path = os.getcwd()
    model_folder = 'lstm_unfrozen_model/'
    model_dir = path + '/models/' + model_folder
    
    # subfolder for each hyperparam config
    num_unfrozen_epochs = opts['num_unfrozen_epochs']
    num_hidden_layers = opts['num_hidden_layers']
    hidden_size = opts['hidden_size']
    dropout = opts['dropout']
    lambda_loss = opts['lambda_loss']
    subfolder = "num_unfrozen_epochs="+str(num_unfrozen_epochs) \
                + ",num_hidden_layers="+str(num_hidden_layers) \
                + ",hidden_size="+str(hidden_size) \
                + ",dropout="+str(dropout) \
                + ",lambda="+str(lambda_loss) + '/'
    
    # need to actually create these subfolders lol
    try:
        os.makedirs(model_dir + subfolder) # will throw error if subfolder already exists
    except:
        pass
    
    return model_dir + subfolder

## Phase 1: Supervised Model

In [104]:
def train_config_supervised(opts):
    path_to_save = get_save_directory(opts)
    print(path_to_save)
    
    # supervised part -- embeddings
    model = LSTM_model(opts).to(current_device)
    criterion = nn.CrossEntropyLoss(reduction='sum')
    num_unfrozen_epochs = opts['num_unfrozen_epochs']
    train_supervised_model(model, criterion, train_loader_labelled, val_loader, num_unfrozen_epochs=num_unfrozen_epochs, path_to_save=path_to_save)

In [149]:
#num_hidden_layers_list = [0, 1, 2]
num_hidden_layers_list = [1]
#hidden_sizes = [128, 256, 512]
hidden_sizes = [128]
#dropouts = [0, .2, .4, .6, .8]
dropouts = [0]
num_unfrozen_epochs_list = [1, 2]
lambda_loss = None  # NOT TRAINING THIS YET

for num_hidden_layers in num_hidden_layers_list:
    for hidden_size in hidden_sizes:
        for dropout in dropouts:
            for num_unfrozen_epochs in num_unfrozen_epochs_list:
                opts = {
                    'embedding_matrix': glove_embedding_index,
                    'num_hidden_layers': num_hidden_layers,
                    'hidden_size': hidden_size,
                    'dropout': dropout,
                    'num_unfrozen_epochs': num_unfrozen_epochs,
                    'lambda_loss': lambda_loss
                }
                train_config_supervised(opts)

/Users/elliotsilva/Desktop/DS-GA-1006/FairFrame/models/lstm_unfrozen_model/num_unfrozen_epochs=1,num_hidden_layers=1,hidden_size=128,dropout=0,lambda=None/
2019-12-09 00:28:58.871606 | Epoch 0


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Average training loss at batch  0 : 0.681

Average training loss after epoch  0 : 0.305
Average validation loss after epoch  0 : 0.285
2019-12-09 00:29:08.868317 | Epoch 1


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Average training loss at batch  0 : 0.290

Average training loss after epoch  1 : 0.153
Average validation loss after epoch  1 : 0.345
2019-12-09 00:29:18.431704 | Epoch 2


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Average training loss at batch  0 : 0.156

Average training loss after epoch  2 : 0.071
Average validation loss after epoch  2 : 0.428
2019-12-09 00:29:27.717276 | Epoch 3


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Average training loss at batch  0 : 0.024

Average training loss after epoch  3 : 0.043
Average validation loss after epoch  3 : 0.550
2019-12-09 00:29:37.047992 | Epoch 4


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Average training loss at batch  0 : 0.013

Average training loss after epoch  4 : 0.028
Average validation loss after epoch  4 : 0.588
2019-12-09 00:29:46.375049 | Epoch 5


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Average training loss at batch  0 : 0.007

Average training loss after epoch  5 : 0.019
Average validation loss after epoch  5 : 0.677
2019-12-09 00:29:55.664964 | Epoch 6


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Average training loss at batch  0 : 0.020

Average training loss after epoch  6 : 0.018
Average validation loss after epoch  6 : 0.639
2019-12-09 00:30:04.997294 | Epoch 7


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Average training loss at batch  0 : 0.002

Average training loss after epoch  7 : 0.029
Average validation loss after epoch  7 : 0.569
2019-12-09 00:30:14.361884 | Epoch 8


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Average training loss at batch  0 : 0.090

Average training loss after epoch  8 : 0.098
Average validation loss after epoch  8 : 0.468
2019-12-09 00:30:23.736264 | Epoch 9


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Average training loss at batch  0 : 0.014

Average training loss after epoch  9 : 0.040
Average validation loss after epoch  9 : 0.551
2019-12-09 00:30:33.063394 | Epoch 0


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Average training loss at batch  0 : 0.011

Average training loss after epoch  0 : 0.050
Average validation loss after epoch  0 : 0.630
/Users/elliotsilva/Desktop/DS-GA-1006/FairFrame/models/lstm_unfrozen_model/num_unfrozen_epochs=2,num_hidden_layers=1,hidden_size=128,dropout=0,lambda=None/
2019-12-09 00:30:48.780527 | Epoch 0


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Average training loss at batch  0 : 0.693

Average training loss after epoch  0 : 0.310
Average validation loss after epoch  0 : 0.336
2019-12-09 00:30:58.060573 | Epoch 1


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Average training loss at batch  0 : 0.225

Average training loss after epoch  1 : 0.162
Average validation loss after epoch  1 : 0.309
2019-12-09 00:31:07.411366 | Epoch 2


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Average training loss at batch  0 : 0.049

Average training loss after epoch  2 : 0.081
Average validation loss after epoch  2 : 0.414
2019-12-09 00:31:16.680821 | Epoch 3


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Average training loss at batch  0 : 0.093

Average training loss after epoch  3 : 0.040
Average validation loss after epoch  3 : 0.459
2019-12-09 00:31:26.924357 | Epoch 4


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Average training loss at batch  0 : 0.043

Average training loss after epoch  4 : 0.033
Average validation loss after epoch  4 : 0.633
2019-12-09 00:31:36.995673 | Epoch 5


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Average training loss at batch  0 : 0.003

Average training loss after epoch  5 : 0.020
Average validation loss after epoch  5 : 0.522
2019-12-09 00:31:46.349656 | Epoch 6


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Average training loss at batch  0 : 0.004

Average training loss after epoch  6 : 0.017
Average validation loss after epoch  6 : 0.581
2019-12-09 00:31:57.015397 | Epoch 7


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Average training loss at batch  0 : 0.001

Average training loss after epoch  7 : 0.013
Average validation loss after epoch  7 : 0.638
2019-12-09 00:32:06.781533 | Epoch 8


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Average training loss at batch  0 : 0.001

Average training loss after epoch  8 : 0.010
Average validation loss after epoch  8 : 0.621
2019-12-09 00:32:16.092525 | Epoch 9


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Average training loss at batch  0 : 0.001

Average training loss after epoch  9 : 0.007
Average validation loss after epoch  9 : 0.659
2019-12-09 00:32:25.654975 | Epoch 0


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Average training loss at batch  0 : 0.003

Average training loss after epoch  0 : 0.062
Average validation loss after epoch  0 : 0.702
2019-12-09 00:32:41.158434 | Epoch 1


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Average training loss at batch  0 : 0.048

Average training loss after epoch  1 : 0.025
Average validation loss after epoch  1 : 0.721


## Phase 2: Unsupervised / Clustering Model

In [151]:
def train_config_unsupervised(opts):    
    # get load directory
    opts_load = opts.copy()
    opts_load['lambda_loss'] = None
    path_to_load = get_save_directory(opts_load)
    print(path_to_load)
    
    # load 
    model = LSTM_model(opts)
    model.load_state_dict(torch.load(path_to_load+'model_dict_labelled.pt',map_location=lambda storage, loc: storage))
    model = model.to(current_device)
    
    # get save directory
    path_to_save = get_save_directory(opts)
    print(path_to_save)
    
    # unsupervised part -- assign clusters to unlabelled data
    model.projection = nn.Identity()
    centroids = centroid_init(2, 2*model.hidden_size, train_loader_labelled, model, current_device)
    criterion = KMeansCriterion().to(current_device)    
    num_batches = int(len(train_loader_unlabelled.dataset)/train_loader_unlabelled.batch_size)+1
    train_clusters(model, centroids, criterion, train_loader_labelled, train_loader_unlabelled, val_loader, num_epochs=3, num_batches=num_batches, path_to_save=path_to_save)

In [152]:
num_hidden_layers = 1  # BEST PARAM
hidden_size = 128  # BEST PARAM
dropout = 0  # BEST PARAM
num_unfrozen_epochs = [0, 1, 2]
lambda_losses = [.1, .5, 1, 5, 10, 25]

for num_unfrozen_epochs in num_unfrozen_epochs_list:
    for lambda_loss in lambda_losses:
        opts = {
            'embedding_matrix': glove_embedding_index,
            'num_hidden_layers': num_hidden_layers,
            'hidden_size': hidden_size,
            'dropout': dropout,
            'num_unfrozen_epochs': num_unfrozen_epochs,
            'lambda_loss': lambda_loss
        }
        train_config_unsupervised(opts)

/Users/elliotsilva/Desktop/DS-GA-1006/FairFrame/models/lstm_unfrozen_model/num_unfrozen_epochs=1,num_hidden_layers=1,hidden_size=128,dropout=0,lambda=None/
/Users/elliotsilva/Desktop/DS-GA-1006/FairFrame/models/lstm_unfrozen_model/num_unfrozen_epochs=1,num_hidden_layers=1,hidden_size=128,dropout=0,lambda=0.1/
2019-12-09 00:36:08.576457 | Epoch 0


HBox(children=(IntProgress(value=0, max=3191), HTML(value='')))

Average training loss at batch  0 : 10.458
Average training loss at batch  1000 : 10.298
Average training loss at batch  2000 : 10.177
Average training loss at batch  3000 : 10.791

Average training loss after epoch  0 : 20.362
Average validation loss after epoch  0 : 20.678
2019-12-09 00:37:49.385312 | Epoch 1


HBox(children=(IntProgress(value=0, max=3191), HTML(value='')))

Average training loss at batch  0 : 8.557
Average training loss at batch  1000 : 11.090
Average training loss at batch  2000 : 10.576
Average training loss at batch  3000 : 10.173

Average training loss after epoch  1 : 20.218
Average validation loss after epoch  1 : 20.662
2019-12-09 00:39:21.361801 | Epoch 2


HBox(children=(IntProgress(value=0, max=3191), HTML(value='')))

Average training loss at batch  0 : 10.775
Average training loss at batch  1000 : 10.020
Average training loss at batch  2000 : 11.811
Average training loss at batch  3000 : 10.806

Average training loss after epoch  2 : 20.197
Average validation loss after epoch  2 : 20.657
/Users/elliotsilva/Desktop/DS-GA-1006/FairFrame/models/lstm_unfrozen_model/num_unfrozen_epochs=1,num_hidden_layers=1,hidden_size=128,dropout=0,lambda=None/
/Users/elliotsilva/Desktop/DS-GA-1006/FairFrame/models/lstm_unfrozen_model/num_unfrozen_epochs=1,num_hidden_layers=1,hidden_size=128,dropout=0,lambda=0.5/
2019-12-09 00:40:56.720367 | Epoch 0


HBox(children=(IntProgress(value=0, max=3191), HTML(value='')))

Average training loss at batch  0 : 13.642
Average training loss at batch  1000 : 14.347
Average training loss at batch  2000 : 15.805
Average training loss at batch  3000 : 15.718

Average training loss after epoch  0 : 28.105
Average validation loss after epoch  0 : 20.678
2019-12-09 00:42:37.763666 | Epoch 1


HBox(children=(IntProgress(value=0, max=3191), HTML(value='')))

Average training loss at batch  0 : 15.609
Average training loss at batch  1000 : 14.709
Average training loss at batch  2000 : 15.290
Average training loss at batch  3000 : 14.056

Average training loss after epoch  1 : 27.979
Average validation loss after epoch  1 : 20.662
2019-12-09 00:44:09.456702 | Epoch 2


HBox(children=(IntProgress(value=0, max=3191), HTML(value='')))

Average training loss at batch  0 : 14.643
Average training loss at batch  1000 : 16.235
Average training loss at batch  2000 : 13.916
Average training loss at batch  3000 : 14.994

Average training loss after epoch  2 : 27.968
Average validation loss after epoch  2 : 20.658
/Users/elliotsilva/Desktop/DS-GA-1006/FairFrame/models/lstm_unfrozen_model/num_unfrozen_epochs=1,num_hidden_layers=1,hidden_size=128,dropout=0,lambda=None/
/Users/elliotsilva/Desktop/DS-GA-1006/FairFrame/models/lstm_unfrozen_model/num_unfrozen_epochs=1,num_hidden_layers=1,hidden_size=128,dropout=0,lambda=1/
2019-12-09 00:45:48.256897 | Epoch 0


HBox(children=(IntProgress(value=0, max=3191), HTML(value='')))

Average training loss at batch  0 : 19.272
Average training loss at batch  1000 : 19.966
Average training loss at batch  2000 : 20.129
Average training loss at batch  3000 : 19.259

Average training loss after epoch  0 : 37.785
Average validation loss after epoch  0 : 20.678
2019-12-09 00:47:26.238151 | Epoch 1


HBox(children=(IntProgress(value=0, max=3191), HTML(value='')))

Average training loss at batch  0 : 22.167
Average training loss at batch  1000 : 20.611
Average training loss at batch  2000 : 19.633
Average training loss at batch  3000 : 19.722

Average training loss after epoch  1 : 37.685
Average validation loss after epoch  1 : 20.662
2019-12-09 00:48:54.648849 | Epoch 2


HBox(children=(IntProgress(value=0, max=3191), HTML(value='')))

Average training loss at batch  0 : 20.790
Average training loss at batch  1000 : 20.635
Average training loss at batch  2000 : 19.039
Average training loss at batch  3000 : 21.841

Average training loss after epoch  2 : 37.673
Average validation loss after epoch  2 : 20.658
/Users/elliotsilva/Desktop/DS-GA-1006/FairFrame/models/lstm_unfrozen_model/num_unfrozen_epochs=1,num_hidden_layers=1,hidden_size=128,dropout=0,lambda=None/
/Users/elliotsilva/Desktop/DS-GA-1006/FairFrame/models/lstm_unfrozen_model/num_unfrozen_epochs=1,num_hidden_layers=1,hidden_size=128,dropout=0,lambda=5/
2019-12-09 00:50:25.168589 | Epoch 0


HBox(children=(IntProgress(value=0, max=3191), HTML(value='')))

Average training loss at batch  0 : 64.098
Average training loss at batch  1000 : 62.903
Average training loss at batch  2000 : 68.055
Average training loss at batch  3000 : 61.556

Average training loss after epoch  0 : 115.213
Average validation loss after epoch  0 : 20.678
2019-12-09 00:51:57.799536 | Epoch 1


HBox(children=(IntProgress(value=0, max=3191), HTML(value='')))

Average training loss at batch  0 : 65.160
Average training loss at batch  1000 : 60.811
Average training loss at batch  2000 : 59.951
Average training loss at batch  3000 : 61.458

Average training loss after epoch  1 : 115.280
Average validation loss after epoch  1 : 20.662
2019-12-09 00:53:25.894222 | Epoch 2


HBox(children=(IntProgress(value=0, max=3191), HTML(value='')))

Average training loss at batch  0 : 60.373
Average training loss at batch  1000 : 55.099
Average training loss at batch  2000 : 57.614
Average training loss at batch  3000 : 63.965

Average training loss after epoch  2 : 115.397
Average validation loss after epoch  2 : 20.657
/Users/elliotsilva/Desktop/DS-GA-1006/FairFrame/models/lstm_unfrozen_model/num_unfrozen_epochs=1,num_hidden_layers=1,hidden_size=128,dropout=0,lambda=None/
/Users/elliotsilva/Desktop/DS-GA-1006/FairFrame/models/lstm_unfrozen_model/num_unfrozen_epochs=1,num_hidden_layers=1,hidden_size=128,dropout=0,lambda=10/
2019-12-09 00:54:59.096287 | Epoch 0


HBox(children=(IntProgress(value=0, max=3191), HTML(value='')))

Average training loss at batch  0 : 109.407
Average training loss at batch  1000 : 112.742
Average training loss at batch  2000 : 99.766
Average training loss at batch  3000 : 118.571

Average training loss after epoch  0 : 211.999
Average validation loss after epoch  0 : 20.678
2019-12-09 00:56:33.597416 | Epoch 1


HBox(children=(IntProgress(value=0, max=3191), HTML(value='')))

Average training loss at batch  0 : 114.171
Average training loss at batch  1000 : 117.940
Average training loss at batch  2000 : 113.929
Average training loss at batch  3000 : 108.838

Average training loss after epoch  1 : 212.346
Average validation loss after epoch  1 : 20.662
2019-12-09 00:58:03.836690 | Epoch 2


HBox(children=(IntProgress(value=0, max=3191), HTML(value='')))

Average training loss at batch  0 : 113.033
Average training loss at batch  1000 : 109.292
Average training loss at batch  2000 : 117.583
Average training loss at batch  3000 : 112.066

Average training loss after epoch  2 : 212.503
Average validation loss after epoch  2 : 20.657
/Users/elliotsilva/Desktop/DS-GA-1006/FairFrame/models/lstm_unfrozen_model/num_unfrozen_epochs=1,num_hidden_layers=1,hidden_size=128,dropout=0,lambda=None/
/Users/elliotsilva/Desktop/DS-GA-1006/FairFrame/models/lstm_unfrozen_model/num_unfrozen_epochs=1,num_hidden_layers=1,hidden_size=128,dropout=0,lambda=25/
2019-12-09 00:59:34.988257 | Epoch 0


HBox(children=(IntProgress(value=0, max=3191), HTML(value='')))

Average training loss at batch  0 : 261.934
Average training loss at batch  1000 : 260.648
Average training loss at batch  2000 : 277.752
Average training loss at batch  3000 : 262.470

Average training loss after epoch  0 : 502.339
Average validation loss after epoch  0 : 20.678
2019-12-09 01:01:07.076695 | Epoch 1


HBox(children=(IntProgress(value=0, max=3191), HTML(value='')))

Average training loss at batch  0 : 300.714
Average training loss at batch  1000 : 266.296
Average training loss at batch  2000 : 251.443
Average training loss at batch  3000 : 287.236

Average training loss after epoch  1 : 503.437
Average validation loss after epoch  1 : 20.662
2019-12-09 01:02:34.930097 | Epoch 2


HBox(children=(IntProgress(value=0, max=3191), HTML(value='')))

Average training loss at batch  0 : 264.626
Average training loss at batch  1000 : 257.472
Average training loss at batch  2000 : 262.942
Average training loss at batch  3000 : 261.561

Average training loss after epoch  2 : 503.842
Average validation loss after epoch  2 : 20.658
/Users/elliotsilva/Desktop/DS-GA-1006/FairFrame/models/lstm_unfrozen_model/num_unfrozen_epochs=2,num_hidden_layers=1,hidden_size=128,dropout=0,lambda=None/
/Users/elliotsilva/Desktop/DS-GA-1006/FairFrame/models/lstm_unfrozen_model/num_unfrozen_epochs=2,num_hidden_layers=1,hidden_size=128,dropout=0,lambda=0.1/
2019-12-09 01:04:05.423199 | Epoch 0


HBox(children=(IntProgress(value=0, max=3191), HTML(value='')))

Average training loss at batch  0 : 10.092
Average training loss at batch  1000 : 9.752
Average training loss at batch  2000 : 12.706
Average training loss at batch  3000 : 11.478

Average training loss after epoch  0 : 20.333
Average validation loss after epoch  0 : 20.933
2019-12-09 01:05:37.321527 | Epoch 1


HBox(children=(IntProgress(value=0, max=3191), HTML(value='')))

Average training loss at batch  0 : 10.196
Average training loss at batch  1000 : 11.106
Average training loss at batch  2000 : 11.360
Average training loss at batch  3000 : 10.336

Average training loss after epoch  1 : 20.205
Average validation loss after epoch  1 : 20.926
2019-12-09 01:07:05.007493 | Epoch 2


HBox(children=(IntProgress(value=0, max=3191), HTML(value='')))

Average training loss at batch  0 : 11.797
Average training loss at batch  1000 : 10.429
Average training loss at batch  2000 : 11.278
Average training loss at batch  3000 : 11.364

Average training loss after epoch  2 : 20.196
Average validation loss after epoch  2 : 20.924
/Users/elliotsilva/Desktop/DS-GA-1006/FairFrame/models/lstm_unfrozen_model/num_unfrozen_epochs=2,num_hidden_layers=1,hidden_size=128,dropout=0,lambda=None/
/Users/elliotsilva/Desktop/DS-GA-1006/FairFrame/models/lstm_unfrozen_model/num_unfrozen_epochs=2,num_hidden_layers=1,hidden_size=128,dropout=0,lambda=0.5/
2019-12-09 01:08:35.856179 | Epoch 0


HBox(children=(IntProgress(value=0, max=3191), HTML(value='')))

Average training loss at batch  0 : 14.143
Average training loss at batch  1000 : 16.244
Average training loss at batch  2000 : 14.469
Average training loss at batch  3000 : 15.278

Average training loss after epoch  0 : 28.296
Average validation loss after epoch  0 : 20.933
2019-12-09 01:10:08.364955 | Epoch 1


HBox(children=(IntProgress(value=0, max=3191), HTML(value='')))

Average training loss at batch  0 : 14.145
Average training loss at batch  1000 : 15.592
Average training loss at batch  2000 : 15.177
Average training loss at batch  3000 : 14.386

Average training loss after epoch  1 : 28.186
Average validation loss after epoch  1 : 20.926
2019-12-09 01:11:36.722506 | Epoch 2


HBox(children=(IntProgress(value=0, max=3191), HTML(value='')))

Average training loss at batch  0 : 15.770
Average training loss at batch  1000 : 16.713
Average training loss at batch  2000 : 15.918
Average training loss at batch  3000 : 13.550

Average training loss after epoch  2 : 28.180
Average validation loss after epoch  2 : 20.924
/Users/elliotsilva/Desktop/DS-GA-1006/FairFrame/models/lstm_unfrozen_model/num_unfrozen_epochs=2,num_hidden_layers=1,hidden_size=128,dropout=0,lambda=None/
/Users/elliotsilva/Desktop/DS-GA-1006/FairFrame/models/lstm_unfrozen_model/num_unfrozen_epochs=2,num_hidden_layers=1,hidden_size=128,dropout=0,lambda=1/
2019-12-09 01:13:08.083324 | Epoch 0


HBox(children=(IntProgress(value=0, max=3191), HTML(value='')))

Average training loss at batch  0 : 21.400
Average training loss at batch  1000 : 21.675
Average training loss at batch  2000 : 21.894
Average training loss at batch  3000 : 20.666

Average training loss after epoch  0 : 38.251
Average validation loss after epoch  0 : 20.933
2019-12-09 01:14:40.582917 | Epoch 1


HBox(children=(IntProgress(value=0, max=3191), HTML(value='')))

Average training loss at batch  0 : 17.973
Average training loss at batch  1000 : 19.087
Average training loss at batch  2000 : 20.538
Average training loss at batch  3000 : 20.766

Average training loss after epoch  1 : 38.159
Average validation loss after epoch  1 : 20.925
2019-12-09 01:16:10.307604 | Epoch 2


HBox(children=(IntProgress(value=0, max=3191), HTML(value='')))

Average training loss at batch  0 : 18.848
Average training loss at batch  1000 : 21.696
Average training loss at batch  2000 : 21.045
Average training loss at batch  3000 : 21.062

Average training loss after epoch  2 : 38.158
Average validation loss after epoch  2 : 20.924
/Users/elliotsilva/Desktop/DS-GA-1006/FairFrame/models/lstm_unfrozen_model/num_unfrozen_epochs=2,num_hidden_layers=1,hidden_size=128,dropout=0,lambda=None/
/Users/elliotsilva/Desktop/DS-GA-1006/FairFrame/models/lstm_unfrozen_model/num_unfrozen_epochs=2,num_hidden_layers=1,hidden_size=128,dropout=0,lambda=5/
2019-12-09 01:17:41.319883 | Epoch 0


HBox(children=(IntProgress(value=0, max=3191), HTML(value='')))

Average training loss at batch  0 : 63.229
Average training loss at batch  1000 : 59.722
Average training loss at batch  2000 : 65.646
Average training loss at batch  3000 : 63.266

Average training loss after epoch  0 : 117.873
Average validation loss after epoch  0 : 20.933
2019-12-09 01:19:09.967835 | Epoch 1


HBox(children=(IntProgress(value=0, max=3191), HTML(value='')))

Average training loss at batch  0 : 65.676
Average training loss at batch  1000 : 62.451
Average training loss at batch  2000 : 64.659
Average training loss at batch  3000 : 62.502

Average training loss after epoch  1 : 117.984
Average validation loss after epoch  1 : 20.925
2019-12-09 01:20:34.144908 | Epoch 2


HBox(children=(IntProgress(value=0, max=3191), HTML(value='')))

Average training loss at batch  0 : 56.239
Average training loss at batch  1000 : 66.258
Average training loss at batch  2000 : 62.828
Average training loss at batch  3000 : 69.486

Average training loss after epoch  2 : 117.985
Average validation loss after epoch  2 : 20.924
/Users/elliotsilva/Desktop/DS-GA-1006/FairFrame/models/lstm_unfrozen_model/num_unfrozen_epochs=2,num_hidden_layers=1,hidden_size=128,dropout=0,lambda=None/
/Users/elliotsilva/Desktop/DS-GA-1006/FairFrame/models/lstm_unfrozen_model/num_unfrozen_epochs=2,num_hidden_layers=1,hidden_size=128,dropout=0,lambda=10/
2019-12-09 01:22:04.947643 | Epoch 0


HBox(children=(IntProgress(value=0, max=3191), HTML(value='')))

Average training loss at batch  0 : 113.278
Average training loss at batch  1000 : 114.374
Average training loss at batch  2000 : 110.533
Average training loss at batch  3000 : 117.456

Average training loss after epoch  0 : 217.400
Average validation loss after epoch  0 : 20.933
2019-12-09 01:23:38.276124 | Epoch 1


HBox(children=(IntProgress(value=0, max=3191), HTML(value='')))

Average training loss at batch  0 : 124.965
Average training loss at batch  1000 : 121.659
Average training loss at batch  2000 : 110.655
Average training loss at batch  3000 : 105.254

Average training loss after epoch  1 : 217.742
Average validation loss after epoch  1 : 20.926
2019-12-09 01:25:08.835685 | Epoch 2


HBox(children=(IntProgress(value=0, max=3191), HTML(value='')))

Average training loss at batch  0 : 122.250
Average training loss at batch  1000 : 108.827
Average training loss at batch  2000 : 109.424
Average training loss at batch  3000 : 116.583

Average training loss after epoch  2 : 217.841
Average validation loss after epoch  2 : 20.924
/Users/elliotsilva/Desktop/DS-GA-1006/FairFrame/models/lstm_unfrozen_model/num_unfrozen_epochs=2,num_hidden_layers=1,hidden_size=128,dropout=0,lambda=None/
/Users/elliotsilva/Desktop/DS-GA-1006/FairFrame/models/lstm_unfrozen_model/num_unfrozen_epochs=2,num_hidden_layers=1,hidden_size=128,dropout=0,lambda=25/
2019-12-09 01:26:44.184437 | Epoch 0


HBox(children=(IntProgress(value=0, max=3191), HTML(value='')))

Average training loss at batch  0 : 258.041
Average training loss at batch  1000 : 238.021
Average training loss at batch  2000 : 280.207
Average training loss at batch  3000 : 268.174

Average training loss after epoch  0 : 515.960
Average validation loss after epoch  0 : 20.933
2019-12-09 01:28:22.534709 | Epoch 1


HBox(children=(IntProgress(value=0, max=3191), HTML(value='')))

Average training loss at batch  0 : 282.869
Average training loss at batch  1000 : 269.541
Average training loss at batch  2000 : 284.544
Average training loss at batch  3000 : 291.659

Average training loss after epoch  1 : 516.999
Average validation loss after epoch  1 : 20.925
2019-12-09 01:29:55.711269 | Epoch 2


HBox(children=(IntProgress(value=0, max=3191), HTML(value='')))

Average training loss at batch  0 : 285.559
Average training loss at batch  1000 : 300.334
Average training loss at batch  2000 : 272.638
Average training loss at batch  3000 : 258.166

Average training loss after epoch  2 : 517.236
Average validation loss after epoch  2 : 20.924


# Evaluate Model

In [142]:
num_gpus = torch.cuda.device_count()
if num_gpus > 0:
    current_device = 'cuda'
else:
    current_device = 'cpu'

## Phase 1: Supervised Model

In [153]:
def evaluate_config_supervised(opts,verbose=True):
    path_to_save = get_save_directory(opts)
    #print(path_to_save)
    
    model = LSTM_model(opts) #change here depending on model
    model.load_state_dict(torch.load(path_to_save+'model_dict_labelled.pt',map_location=lambda storage, loc: storage))
    model = model.to(current_device)
    criterion = nn.CrossEntropyLoss(reduction='sum')
    criterion = criterion.to(current_device)
    
    empty_centroids = torch.tensor([])
    TP_cluster, FP_cluster, results_dict = evaluation.main(model, empty_centroids, val_loader, criterion, data_dir, current_device, verbose)
    results_dict.update(opts)
    return TP_cluster, FP_cluster, results_dict


In [154]:
#num_hidden_layers_list = [0, 1, 2]
num_hidden_layers_list = [1]
#hidden_sizes = [128, 256, 512]
hidden_sizes = [128]
#dropouts = [0, .2, .4, .6, .8]
dropouts = [0]
num_unfrozen_epochs_list = [0,1,2]
lambda_loss = None  # NOT TRAINING THIS YET

results_df = pd.DataFrame()
for num_hidden_layers in num_hidden_layers_list:
    for hidden_size in hidden_sizes:
        for dropout in dropouts:
            for num_unfrozen_epochs in num_unfrozen_epochs_list:
                opts = {
                    'embedding_matrix': glove_embedding_index,
                    'num_hidden_layers': num_hidden_layers,
                    'hidden_size': hidden_size,
                    'dropout': dropout,
                    'num_unfrozen_epochs': num_unfrozen_epochs,
                    'lambda_loss': lambda_loss
                }
                _, _, results_dict = evaluate_config_supervised(opts,False)
                results_df = results_df.append(results_dict,ignore_index=True)
                
results_df = results_df[['num_hidden_layers','hidden_size','dropout','num_unfrozen_epochs','Accuracy','F1 score','Precision','Recall',
                        'TP_rate','FP_rate','FN_rate','TN_rate']]

In [155]:
results_df

Unnamed: 0,num_hidden_layers,hidden_size,dropout,num_unfrozen_epochs,Accuracy,F1 score,Precision,Recall,TP_rate,FP_rate,FN_rate,TN_rate
0,1.0,128.0,0.0,0.0,0.791677,0.817824,0.935205,0.726623,0.935205,0.064795,0.351852,0.648148
1,1.0,128.0,0.0,1.0,0.784668,0.811883,0.929336,0.720787,0.929336,0.070664,0.36,0.64
2,1.0,128.0,0.0,2.0,0.781915,0.809302,0.925532,0.719008,0.925532,0.074468,0.361702,0.638298


## Phase 2: Clustering / Unsupervised

In [156]:
def evaluate_config_supervised(opts,verbose=True):
    path_to_save = get_save_directory(opts)
    #print(path_to_save)
    
    model = LSTM_model(opts) #change here depending on model
    model.projection = nn.Identity()
    model.load_state_dict(torch.load(path_to_save+'model_dict_unlabelled.pt',map_location=lambda storage, loc: storage))
    model = model.to(current_device)
    criterion = KMeansCriterion()
    criterion = criterion.to(current_device)
    centroids = torch.load(path_to_save+'centroids_unlabelled',map_location=lambda storage, loc: storage)
    
    TP_cluster, FP_cluster, results_dict = evaluation.main(model, centroids, val_loader, criterion, data_dir, current_device, verbose)
    results_dict.update(opts)
    return TP_cluster, FP_cluster, results_dict


In [157]:
num_hidden_layers = 1  # BEST PARAM
hidden_size = 128  # BEST PARAM
dropout = 0  # BEST PARAM
num_unfrozen_epoch = 0  # BEST PARAM
lambda_losses = [.1, .5, 1, 5, 10, 25]

results_df2 = results_df.copy()
for lambda_loss in lambda_losses:
    opts = {
        'embedding_matrix': glove_embedding_index,
        'num_hidden_layers': num_hidden_layers,
        'hidden_size': hidden_size,
        'dropout': dropout,
        'num_unfrozen_epochs': num_unfrozen_epochs,
        'lambda_loss': lambda_loss
    }
    _, _, results_dict = evaluate_config_supervised(opts,False)
    results_df = results_df.append(results_dict,ignore_index=True)
                
results_df2 = results_df[['lambda_loss','num_hidden_layers','hidden_size','dropout','num_unfrozen_epochs','Accuracy','F1 score','Precision','Recall',
                        'TP_rate','FP_rate','FN_rate','TN_rate']]

In [158]:
results_df2

Unnamed: 0,lambda_loss,num_hidden_layers,hidden_size,dropout,num_unfrozen_epochs,Accuracy,F1 score,Precision,Recall,TP_rate,FP_rate,FN_rate,TN_rate
0,,1.0,128.0,0.0,0.0,0.791677,0.817824,0.935205,0.726623,0.935205,0.064795,0.351852,0.648148
1,,1.0,128.0,0.0,1.0,0.784668,0.811883,0.929336,0.720787,0.929336,0.070664,0.36,0.64
2,,1.0,128.0,0.0,2.0,0.781915,0.809302,0.925532,0.719008,0.925532,0.074468,0.361702,0.638298
3,0.1,1.0,128.0,0.0,2.0,0.747522,0.790355,0.951835,0.675719,0.951835,0.048165,0.45679,0.54321
4,0.5,1.0,128.0,0.0,2.0,0.747522,0.790355,0.951835,0.675719,0.951835,0.048165,0.45679,0.54321
5,1.0,1.0,128.0,0.0,2.0,0.747522,0.790355,0.951835,0.675719,0.951835,0.048165,0.45679,0.54321
6,5.0,1.0,128.0,0.0,2.0,0.747522,0.790355,0.951835,0.675719,0.951835,0.048165,0.45679,0.54321
7,10.0,1.0,128.0,0.0,2.0,0.747522,0.790355,0.951835,0.675719,0.951835,0.048165,0.45679,0.54321
8,25.0,1.0,128.0,0.0,2.0,0.747522,0.790355,0.951835,0.675719,0.951835,0.048165,0.45679,0.54321
