## Scroll down and check your arguments before running

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
%cd '/content/drive/My Drive/Text-Based Anomaly Detection/github/'
!pwd

In [None]:
!pip install tensorboardX
!pip install transformers

In [None]:
import os
import json
import time
import torch
import argparse
import numpy as np
from multiprocessing import cpu_count
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
from tensorboardX import SummaryWriter
from torch.utils.data import DataLoader
from collections import OrderedDict, defaultdict

In [None]:
from ptb import PTB
from utils import to_var, expierment_name
from model import SentenceVAE

In [None]:
def main(args):
    ts = time.strftime('%Y-%b-%d-%H:%M:%S', time.gmtime())
    t1 = time.time() #starting time

    splits = ['train', 'valid'] + (['test'] if args.test else [])

    datasets = OrderedDict()


    for split in splits:
        datasets[split] = PTB(
            data_dir=args.data_dir,
            split=split,
            create_data=args.create_data,
            max_sequence_length=args.max_sequence_length,
            min_occ=args.min_occ

        )


    params = dict(
        vocab_size=datasets['train'].vocab_size,
        sos_idx=datasets['train'].sos_idx,
        eos_idx=datasets['train'].eos_idx,
        pad_idx=datasets['train'].pad_idx,
        unk_idx=datasets['train'].unk_idx,
        max_sequence_length=args.max_sequence_length,
        embedding_size=args.embedding_size,
        rnn_type=args.rnn_type,
        hidden_size=args.hidden_size,
        word_dropout=args.word_dropout,
        embedding_dropout=args.embedding_dropout,
        latent_size=args.latent_size,
        num_layers=args.num_layers,
        bidirectional=args.bidirectional
    )
    model = SentenceVAE(**params)

    if torch.cuda.is_available():
        model = model.cuda()

    print(model)
    model_save_folder = "Saved_models"
    dump_folder = "dump"

    if args.tensorboard_logging:
        writer = SummaryWriter(os.path.join(args.logdir, expierment_name(args, ts)))
        writer.add_text("model", str(model))
        writer.add_text("args", str(args))
        writer.add_text("ts", ts)

    #save_model_path = os.path.join(args.save_model_path, ts)
    save_model_path = args.save_model_path + "/" + model_save_folder

    #os.makedirs(save_model_path)
    #os.mkdir(save_model_path)

    with open(os.path.join(save_model_path, 'model_params.json'), 'w') as f:
        json.dump(params, f, indent=4)

    def kl_anneal_function(anneal_function, step, k, x0):
        if anneal_function == 'logistic':
            return float(1/(1+np.exp(-k*(step-x0))))
        elif anneal_function == 'linear':
            return min(1, step/x0)

    NLL = torch.nn.NLLLoss(ignore_index=datasets['train'].pad_idx, reduction='sum')
    def loss_fn(logp, target, length, mean, logv, anneal_function, step, k, x0):

        # cut-off unnecessary padding from target, and flatten
        target = target[:, :torch.max(length).item()].contiguous().view(-1)
        logp = logp[:, :torch.max(length).item(),:].contiguous().view(-1, logp.size(2))

        # Negative Log Likelihood
        NLL_loss = NLL(logp, target.type(torch.long))

        # KL Divergence
        KL_loss = -0.5 * torch.sum(1 + logv - mean.pow(2) - logv.exp())
        KL_weight = kl_anneal_function(anneal_function, step, k, x0)

        return NLL_loss, KL_loss, KL_weight

    optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate)

    tensor = torch.cuda.FloatTensor if torch.cuda.is_available() else torch.Tensor
    #tensor = torch.cuda.FloatTensor if False else torch.Tensor
    step = 0
    
    lossar_train = np.empty((0,3), float)
    lossar_validation = np.zeros((0,3), float)
    lossar_validation_acum = np.zeros((0,3), float)
    lossar_test = np.zeros((0,3), float)
    lossar_test_acum = np.zeros((0,3), float)
    
    for epoch in range(args.epochs):

        for split in splits:

            data_loader = DataLoader(
                dataset=datasets[split],
                batch_size=args.batch_size,
                shuffle=split=='train',
                num_workers=cpu_count(),
                pin_memory=torch.cuda.is_available()
            )

            tracker = defaultdict(tensor)

            # Enable/Disable Dropout
            if split == 'train':
                model.train()
            else:
                model.eval()

            for iteration, batch in enumerate(data_loader):

                batch_size = batch['input'].size(0)

                for k, v in batch.items():
                    if torch.is_tensor(v):
                        batch[k] = to_var(v)

                # Forward pass
                logp, mean, logv, z = model(batch['input'], batch['length'])

                # loss calculation
                NLL_loss, KL_loss, KL_weight = loss_fn(logp, batch['target'],
                    batch['length'], mean, logv, args.anneal_function, step, args.k, args.x0)

                loss = (NLL_loss + KL_weight * KL_loss) / batch_size

                # backward + optimization
                if split == 'train':
                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()
                    step += 1
                    lossar_train = np.append(lossar_train,np.array([[len(lossar_train),loss.item(),batch_size]]),axis=0)


                # bookkeepeing
                tracker['ELBO'] = torch.cat((tracker['ELBO'], loss.data.view(1, -1)), dim=0)

                if args.tensorboard_logging:
                    writer.add_scalar("%s/ELBO" % split.upper(), loss.item(), epoch*len(data_loader) + iteration)
                    writer.add_scalar("%s/NLL Loss" % split.upper(), NLL_loss.item() / batch_size,
                                      epoch*len(data_loader) + iteration)
                    writer.add_scalar("%s/KL Loss" % split.upper(), KL_loss.item() / batch_size,
                                      epoch*len(data_loader) + iteration)
                    writer.add_scalar("%s/KL Weight" % split.upper(), KL_weight,
                                      epoch*len(data_loader) + iteration)

                if iteration % args.print_every == 0 or iteration+1 == len(data_loader):
                    print("%s Batch %04d/%i, Loss %9.4f, NLL-Loss %9.4f, KL-Loss %9.4f, KL-Weight %6.3f, time passed %6.1f"
                          % (split.upper(), iteration, len(data_loader)-1, loss.item(), NLL_loss.item()/batch_size,
                          KL_loss.item()/batch_size, KL_weight, time.time()-t1))

                if split == 'valid':
                    if 'target_sents' not in tracker:
                        tracker['target_sents'] = list()
                    lossar_validation = np.append(lossar_validation,np.array([[epoch,loss.item(),batch_size]]),axis=0)
                
                if split == 'test':
                    lossar_test = np.append(lossar_test,np.array([[epoch,loss.item(),batch_size]]),axis=0)


            print("%s Epoch %02d/%i, Mean ELBO %9.4f" % (split.upper(), epoch, args.epochs, tracker['ELBO'].mean()))

            if args.tensorboard_logging:
                writer.add_scalar("%s-Epoch/ELBO" % split.upper(), torch.mean(tracker['ELBO']), epoch)

            # save a dump of all sentences and the encoded latent space
            if split == 'valid':
                dump = {'target_sents': tracker['target_sents'], 'z': tracker['z'].tolist()}
                if not os.path.exists(os.path.join('dumps', dump_folder)):
                    os.makedirs('dumps/'+dump_folder)
                with open(os.path.join('dumps/'+dump_folder+'/valid_E%i.json' % epoch), 'w') as dump_file:
                    json.dump(dump,dump_file)
                    
                current=lossar_validation[:,0]==epoch
                nevents=sum(lossar_validation[current,2])
                loss_mu = sum(lossar_validation[current,1]*lossar_validation[current,2])/nevents
                loss_sigma = np.sqrt(sum((lossar_validation[:,2]*(lossar_validation[:,1]-loss_mu)**2)))/nevents #Uncertainty on mean
                print("Mean loss and uncertainty of epoch: %4.2f +- %2.1f \n" % (loss_mu, loss_sigma)) #Sigma here is uncertainty on mean not std! Multiply by sqrt(nevents) to get std
                lossar_validation_acum = np.append(lossar_validation_acum,np.array([[lossar_train[-1,0],loss_mu, loss_sigma]]),axis=0)

                plt.errorbar(lossar_validation_acum[:,0], lossar_validation_acum[:,1], yerr=lossar_validation_acum[:,2], label="validation")
                if not args.test:
                    plt.title("Training and validation loss")
                    plt.xlabel("batchnumber")
                    plt.ylabel("Loss")
                    plt.legend()
                    plt.show()
                    
            if split == 'test':
                current=lossar_test[:,0]==epoch
                nevents=sum(lossar_test[current,2])
                loss_mu = sum(lossar_test[current,1]*lossar_test[current,2])/nevents
                loss_sigma = np.sqrt(sum((lossar_test[:,2]*(lossar_test[:,1]-loss_mu)**2)))/nevents #Uncertainty on mean
                print("Mean loss and uncertainty of epoch: %4.2f +- %2.1f \n" % (loss_mu, loss_sigma)) #Sigma here is uncertainty on mean not std! Multiply by sqrt(nevents) to get std
                lossar_test_acum = np.append(lossar_test_acum,np.array([[lossar_train[-1,0],loss_mu, loss_sigma]]),axis=0)

                plt.errorbar(lossar_test_acum[:,0], lossar_test_acum[:,1], yerr=lossar_test_acum[:,2], label="test")
                plt.title("Training, validation and test loss")
                plt.xlabel("batchnumber")
                plt.ylabel("Loss")
                plt.legend()
                plt.show()

            # save checkpoint
            if split == 'train':
                nevents = 0 #I made the formulars to take into account that last batch has different size
                loss_mu = 0 #And I use last 200 of train (we can change this number) as the model used on the 
                i=0 # first batch is different from the next and so on.
                while nevents<=200:
                    i+=1
                    loss_mu += lossar_train[-i,1]*lossar_train[-i,2]
                    nevents += lossar_train[-i,2]
                    if i==len(lossar_train):
                        break
                loss_mu = loss_mu/nevents
                # loss_sigma = np.sqrt(sum((lossar[-i:,2]*(lossar[-i:,1]-loss_mu)**2))/nevents)
                loss_sigma = np.sqrt(sum((lossar_train[-i:,2]*(lossar_train[-i:,1]-loss_mu)**2)))/nevents
                print("Mean loss and uncertainty of 200 last: %4.2f +- %2.1f" % (loss_mu, loss_sigma)) #Sigma here is uncertainty on mean not std! Multiply by sqrt(nevents) to get std

                checkpoint_path = os.path.join(save_model_path, "E%i.pytorch" % epoch)
                torch.save(model.state_dict(), checkpoint_path)
                print("Model saved at %s" % checkpoint_path)
                
                plt.figure(figsize=(6,4))
                plt.plot(lossar_train[:,0],lossar_train[:,1],label="training")


In [None]:
parser = argparse.ArgumentParser()

parser.add_argument('--data_dir', type=str, default='data')
parser.add_argument('--create_data', action='store_true')
parser.add_argument('--max_sequence_length', type=int, default=30)
parser.add_argument('--min_occ', type=int, default=1) # It's not been used
parser.add_argument('--test', action='store_true')

parser.add_argument('-ep', '--epochs', type=int, default=1)
parser.add_argument('-bs', '--batch_size', type=int, default=64)
parser.add_argument('-lr', '--learning_rate', type=float, default=0.001)

# For BERT pre-trained model hyperparameters check: https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-config.json
parser.add_argument('-vs', '--vocab_size', type=int, default=30522) # Cannot be changed for the moment
parser.add_argument('-eb', '--embedding_size', type=int, default=768) # Cannot be changed for the moment
parser.add_argument('-rnn', '--rnn_type', type=str, default='gru')
parser.add_argument('-hs', '--hidden_size', type=int, default=256)
parser.add_argument('-nl', '--num_layers', type=int, default=1)
#parser.add_argument('-bi', '--bidirectional', action='store_true')
parser.add_argument('-bi', '--bidirectional', type=bool, default=True)
parser.add_argument('-ls', '--latent_size', type=int, default=16)
parser.add_argument('-wd', '--word_dropout', type=float, default=0)
parser.add_argument('-ed', '--embedding_dropout', type=float, default=0.5)

parser.add_argument('-af', '--anneal_function', type=str, default='logistic')
parser.add_argument('-k', '--k', type=float, default=0.0025)
parser.add_argument('-x0', '--x0', type=int, default=4000)

parser.add_argument('-v', '--print_every', type=int, default=50)
parser.add_argument('-tb', '--tensorboard_logging', action='store_true')
parser.add_argument('-log', '--logdir', type=str, default='logs')
parser.add_argument('-bin', '--save_model_path', type=str, default='bin')

## Modify your arguments here:

In [None]:
args = parser.parse_args("--data_dir data/shortData --max_sequence_length 60".split())

args.rnn_type = args.rnn_type.lower()
args.anneal_function = args.anneal_function.lower()

assert args.rnn_type in ['rnn', 'lstm', 'gru']
assert args.anneal_function in ['logistic', 'linear']
assert 0 <= args.word_dropout <= 1

main(args)