In [None]:
!pip install pytorch-lightning

In [None]:
!pip install py-rouge

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import math
import Data_processing as dpros
import transformer_model as TM
import test_functions as test_func
import statistics
from pytorch_lightning.callbacks import ModelCheckpoint
import pytorch_lightning as pl
import nltk
import numpy as np
import nltk
from tqdm import tqdm
import pandas as pd
import torch.utils.data as utils
from torch.utils.data import Dataset, DataLoader
import nltk.translate.bleu_score as bleu

# Transformer

In [None]:

class Transformer_light(pl.LightningModule):

    def __init__(self, input_dim, dim_model, h_dim_v, dim_k, dim_v, dim_ff, seq_len, num_heads, num_layers):

        super(Transformer_light, self).__init__()

        self.n_warmup_steps = 16000
        self.init_lr = 0.5

        self.encoder = TM.Trans_encoder(input_dim, dim_model, h_dim_v, dim_k, dim_v, dim_ff, seq_len, num_heads, num_layers)
        self.decoder = TM.Trans_decoder(input_dim, dim_model, h_dim_v, dim_k, dim_v, dim_ff, seq_len, num_heads, num_layers)

        self.embedding = nn.Embedding(input_dim, dim_model)

        self.linear_out = nn.Linear(dim_model, input_dim)
    
    ## the forward method that makes the operations of the transformer and outputs the predictions

    def forward(self, input, target, input_mask, target_mask):

        input = self.embedding(input)
        target = self.embedding(target)

        enc_sentence = self.encoder(input, input_mask)

        dec_sentence = self.decoder(target, enc_sentence, input_mask, target_mask)

        result = self.linear_out(dec_sentence)

        return result

    ## definition of the training step:

    def training_step(self, batch, batch_idx):
        
        src, trg = batch

        trg_1 = trg[:, :-1]
        trg_2 = trg[:, 1:]

        ## creates the masks and makes the forward pass

        input_mask, target_mask = dpros.create_masks(src, trg_1, word2id)
        decoder_logit = self.forward(src, trg_1, input_mask, target_mask)

        ## calculate the loss

        loss = criterion(decoder_logit.contiguous().view(-1, input_dim), trg_2.contiguous().view(-1))

        return {'loss': loss}

    ## definition of the validation step:
    
    def validation_step(self, batch, batch_idx):
        
        src, trg = batch

        trg_1 = trg[:, :-1]
        trg_2 = trg[:, 1:]

        ## creates the masks and makes the forward pass

        input_mask, target_mask = dpros.create_masks(src, trg_1, word2id)
        decoder_logit = self.forward(src, trg_1, input_mask, target_mask)

        ## calculate the loss
        
        val_loss = criterion(decoder_logit.contiguous().view(-1, input_dim), trg_2.contiguous().view(-1))

        return {'val_loss': val_loss}

    ## at the end of the validation step, print the average loss
    
    def validation_epoch_end(self, outputs):
        
        avg_loss = torch.stack([x['val_loss'] for x in outputs]).mean()
        print("Val loss: ", avg_loss)
        return {'val_loss': avg_loss}
    
    ## definition of the test step:
    
    def test_step(self, batch, batch_idx):
        
        src, trg = batch

        trg_1 = trg[:, :-1]
        trg_2 = trg[:, 1:]

        ## creates the masks and makes the forward pass

        input_mask, target_mask = dpros.create_masks(src, trg_1, word2id)
        decoder_logit = self.forward(src, trg_1, input_mask, target_mask)

        ## calculate the loss
        
        test_loss = criterion(decoder_logit.contiguous().view(-1, input_dim), trg_2.contiguous().view(-1))

        return {'test_loss': test_loss}
    
    ## at the end of the test step, print the average loss
    
    def test_epoch_end(self, outputs):
        
        avg_loss = torch.stack([x['test_loss'] for x in outputs]).mean()
        return {'test_loss': avg_loss}
    
    def configure_optimizers(self):
        return optim.Adam(self.parameters(), betas=(0.9, 0.98), eps=1e-09)

    ## defines the optimizer step with the warmup learning rate. After the warmup steps, the learning rate is 0.0001
    
    def optimizer_step(self, current_epoch, batch_nb, optimizer, optimizer_i, second_order_closure, using_native_amp, on_tpu=False, using_lbfgs=False):

        if (self.trainer.global_step < self.n_warmup_steps) and (self.trainer.global_step > 0):
          lr_scale = (dim_model ** -0.5) * min(self.trainer.global_step ** (-0.5), self.trainer.global_step * (self.n_warmup_steps ** (-1.5)))
          for pg in optimizer.param_groups:
              pg['lr'] = lr_scale * self.init_lr
        else:
          for pg in optimizer.param_groups:
            pg['lr'] = 0.0001

        optimizer.step()
        optimizer.zero_grad()
    
    def train_dataloader(self):
        
        train_loader = DataLoader(training_set, batch_size=dpros.batch_size, shuffle=True, num_workers=3)
        
        return train_loader
    
    def val_dataloader(self):
        
        valid_loader = DataLoader(valid_set, batch_size=dpros.batch_size, num_workers=3)
        
        return valid_loader
    
    def test_dataloader(self):
        
        test_loader = DataLoader(test_set, batch_size=dpros.batch_size, num_workers=3)
        
        return test_loader
        

## Data processing

In [None]:

FILENAME = 'chat_emotion.txt'

lines = dpros.read_lines(filename=FILENAME)

# make every character lower case
lines = [ line.lower() for line in lines ]

lines = [ dpros.filter_line(line, dpros.EN_WHITELIST) for line in lines ]

qlines, asentence = dpros.filter_data(lines)

alines = []
tag = []

# separate the emotion from the sentence
for sentence in asentence:
    alines.append(sentence[:-2])
    tag.append(sentence[-1])

qtokenized = [ wordlist.split(' ') for wordlist in qlines ]
atokenized = [ wordlist.split(' ') for wordlist in alines ]

idx2w, w2idx = dpros.index_(qtokenized + atokenized, vocab_size=dpros.VOCAB_SIZE)

idx_q, idx_a = dpros.zero_pad_trans(qtokenized, atokenized, w2idx)

metadata = {'w2idx': w2idx, 'idx2w': idx2w, 'limit': dpros.limit}


In [None]:

word2id = metadata['w2idx']
id2word = metadata['idx2w']


## Train model

In [None]:

## divide the sentences in train, test and validation. Then, add the emotion tokens to the source sentences and finally create the dataset objects for the three sets:

test_index = int(0.05 * len(idx_q))
train_index = int(len(idx_q) - test_index)
valid_index = int(train_index - int(0.95 * train_index))
train_index = int(0.95 * train_index)

X_train = [idx_q[i] for i in range(0, train_index)]
y_train = [idx_a[i] for i in range(0, train_index)]
tag_train = [tag[i] for i in range(0, train_index)]

X_valid = [idx_q[i] for i in range(train_index + 1, train_index + valid_index)]
y_valid = [idx_a[i] for i in range(train_index + 1, train_index + valid_index)]
tag_valid = [tag[i] for i in range(train_index + 1, train_index + valid_index)]

X_test = [idx_q[i] for i in range(train_index + valid_index + 1, len(idx_q))]
y_test = [idx_a[i] for i in range(train_index + valid_index + 1, len(idx_q))]
tag_test = [tag[i] for i in range(train_index + valid_index + 1, len(idx_q))]


X_train_emotion = dpros.emotion_pad_trans(X_train, tag_train, word2id)
X_valid_emotion = dpros.emotion_pad_trans(X_valid, tag_valid, word2id)
X_test_emotion = dpros.emotion_pad_trans(X_test, tag_test, word2id)

training_set = utils.TensorDataset(torch.LongTensor(X_train_emotion), torch.LongTensor(y_train))
valid_set = utils.TensorDataset(torch.LongTensor(X_valid_emotion), torch.LongTensor(y_valid))
test_set = utils.TensorDataset(torch.LongTensor(X_test_emotion), torch.LongTensor(y_test))



In [None]:

input_dim = len(word2id)
seq_len = 30
dim_model = 512
num_heads = 8
dim_k = dim_model / num_heads
dim_v = dim_model / num_heads
dim_ff = 2048
h_dim_v = num_heads * dim_v
num_layers = 6
target_pad = word2id['<pad>']


In [None]:
criterion = nn.CrossEntropyLoss().cuda()

In [None]:

light_model = Transformer_light(input_dim, dim_model, h_dim_v, dim_k, dim_v, dim_ff, seq_len, num_heads, num_layers)

## we initialize the parameters of the model

for p in light_model.parameters():
    if p.dim() > 1:
        nn.init.xavier_uniform_(p)

checkpoint_callback = ModelCheckpoint(filepath='drive/My Drive/Colab Notebooks/Trans_checkpoint/', save_top_k=1, verbose=True, monitor='val_loss',mode='min')

trainer = pl.Trainer(gpus=1, max_epochs=10, log_save_interval=100000, progress_bar_refresh_rate=0, weights_summary=None, log_gpu_memory=None, default_root_dir='drive/My Drive/Colab Notebooks/Trans_checkpoint/', checkpoint_callback=checkpoint_callback)

## trainer = pl.Trainer(resume_from_checkpoint='drive/My Drive/Colab Notebooks/Trans_checkpoint/epoch=12.ckpt', gpus=1, max_epochs=13, progress_bar_refresh_rate=0, log_save_interval=100000, weights_summary=None, log_gpu_memory=None, default_root_dir='drive/My Drive/Colab Notebooks/Trans_checkpoint/', checkpoint_callback=checkpoint_callback)


In [None]:

trainer.fit(light_model)
trainer.test(light_model)


## Evaluation metrics

In [None]:

test_loader = DataLoader(test_set, batch_size=dpros.batch_size, num_workers=3, drop_last=True)

answers, predict = test_func.create_answers_preds_trans(light_model, test_loader, word2id, id2word)

print("Created successfully!")


Created successfully!


In [None]:

for i in range(0, len(answers)):

    answers[i] = answers[i].split('<pad>')[0]
    predict[i] = predict[i].split('<pad>')[0]


### BLEU

In [None]:

test_func.bleu_scores(answers, predict)


### ROUGE

In [None]:

test_func.rouge_scores(answers, predict)


### Embedding average metric

In [None]:

test_func.word_embedding_scores_trans(answers, predict, light_model, word2id)


In [None]:

## this is for creating the predictions for each emotion and store them in separate files

for tag in range(9):
    X_test_emotion = dpros.emotion_pad_trans(X_test, tag, word2id)
    test_set = utils.TensorDataset(torch.LongTensor(X_test_emotion), torch.LongTensor(y_test))
    test_loader = DataLoader(test_set, batch_size=dpros.batch_size, num_workers=3, drop_last=True)
    sources, predict = test_func.create_sources_preds_trans(light_model, test_loader, word2id, id2word)

    print("Emotion " + str(tag))

    df = pd.DataFrame({'Sources': [''.join(source_test) for source_test in sources], 'Predictions': [''.join(predicted_test) for predicted_test in predict]})
    df.to_csv('Transformer' + str(tag) + '.csv', encoding='utf-8', index=False)
