In [None]:
import pickle
import random
from tqdm import tqdm
import numpy as np
import torch
from torch import optim
import torch.nn.functional as F
from sklearn.model_selection import train_test_split
from torch.utils.data import TensorDataset, DataLoader
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

In [None]:
class Encoder(torch.nn.Module):
    def __init__(self, input_size, hidden_size, embedding_matrix):
        super(Encoder, self).__init__()
        self.embedding = torch.nn.Embedding.from_pretrained(embedding_matrix, freeze = False)
        self.enc_rnn = torch.nn.GRU(input_size, hidden_size, batch_first = False, num_layers = 2, dropout = 0.75)
        
    def forward(self, input):
        seq, bat = input.size()
        enc_input = self.embedding(input)
        output, hidden = self.enc_rnn(enc_input)
        return output, hidden

In [None]:
class AttnDecoderRNN(torch.nn.Module):
    def __init__(self, input_size, hidden_size, attention_dim, input_seq_len, output_seq_len, embedding_matrix, vocab_size):
        super(AttnDecoderRNN, self).__init__()
        self.input_seq = input_seq_len
        self.output_seq = output_seq_len

        self.embedding = torch.nn.Embedding.from_pretrained(embedding_matrix, freeze = False)
        self.time_step_attn_layer = torch.nn.Linear(2*hidden_size, attention_dim)
        self.input_seq_attn_layer = torch.nn.Linear(hidden_size, attention_dim)
        self.alpha_attn = torch.nn.Linear(2*attention_dim, 1)
        self.dec_rnn = torch.nn.GRU(input_size, hidden_size, batch_first=False, num_layers = 2, dropout = 0.75)
        self.out = torch.nn.Linear(hidden_size, vocab_size)
        self.nl = torch.nn.ELU()

    def forward(self, input, hidden, encoder_outputs):
        embedded = self.embedding(input)
        _, bat, embed = embedded.size()
        _, _, hid = hidden.size()
         
        attn_hidden = hidden.permute(1,0,2).reshape(-1, 2*hid)
        time_step_attn = self.time_step_attn_layer(attn_hidden)
        time_step_attn = self.nl(time_step_attn)
        alphas = torch.zeros(self.input_seq, bat, device=device)

        for alpha in range(self.input_seq):

            input_attn = self.input_seq_attn_layer(encoder_outputs[alpha])
            input_attn = self.nl(input_attn)
            input_timestep_attn = torch.cat((input_attn, time_step_attn), 1)
            alphas[alpha] = (self.alpha_attn(input_timestep_attn).view(-1))

        alphas = alphas.permute(1,0)
        alphas = F.log_softmax(alphas, dim = 1).view(bat, -1, 1) 
        attn_applied = torch.bmm(encoder_outputs.permute(1,2,0), alphas).permute(2,0,1)
        
        dec_input = torch.cat((embedded, attn_applied), 2)
        output, hidden = self.dec_rnn(dec_input, hidden)

        output = F.log_softmax(self.out(output.view(bat,-1)), dim=1)
        return output, hidden, alphas

In [None]:
teacher_forcing_ratio = 0.25
SOS_token = 0
EOS_token = 1

def train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion):

    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()

    loss = 0

    encoder_outputs, encoder_hidden = encoder(input_tensor)
    decoder_input = torch.zeros((1,input_tensor.size()[1]), device=device, dtype= torch.long)
    decoder_hidden = encoder_hidden

    use_teacher_forcing = 1
    
    for di in range(9):
        decoder_output, decoder_hidden, _ = decoder(decoder_input, decoder_hidden, encoder_outputs)
        loss += criterion(decoder_output, target_tensor[di])
        if use_teacher_forcing:
            decoder_input = target_tensor[di].view(1,-1)
        else:
            _, topi = decoder_output.topk(1)
            decoder_input = topi.detach().view(1,-1)
        
        _, topi = torch.topk(decoder_output, 2)
        lol = {}
        mul = 1
        for val in topi.view(-1):
            if val.item() not in lol: lol[val.item()]=1
            else: lol[val.item()]+=1
            mul = max(mul, lol[val.item()])
            
    loss *= 0.1*(mul**2)
 
    loss.backward()

    encoder_optimizer.step()
    decoder_optimizer.step()

    return loss.item()

In [None]:
def trainIters(encoder, decoder, data_loader, epochs, learning_rate=0.001):
    
    encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate)
    criterion = torch.nn.CrossEntropyLoss()
    
    for epo in range(epochs):
        
        for batch_idx, (input_tensor, target_tensor) in enumerate(data_loader):
            input_tensor = input_tensor.permute(1,0)
            target_tensor = target_tensor.permute(1,0)
            
            loss = train(input_tensor.to(device), target_tensor.to(device), encoder, decoder, 
                         encoder_optimizer, decoder_optimizer, criterion)
            
            if (batch_idx+1) % 50 == 0:
                print(f'Epoch [{epo+1}/{epochs}], Batch [{batch_idx+1}/{len(data_loader)}], Loss: {loss:.4f}')
            
        print(f'\n=================== EPOCH [{epo+1}/{epochs}] FINISHED ===================\n') 
        torch.save(encoder.state_dict(), E_PATH)
        torch.save(attn_decoder.state_dict(), D_PATH)
        print("=================== MODELS SAVED =====================")
        encoder.load_state_dict(torch.load(E_PATH))
        attn_decoder.load_state_dict(torch.load(D_PATH))
        print("=================== MODELS LOADED ====================\n")

In [None]:
# Load dictionaries pkl file
with open('/kaggle/input/preprocessed-v2/word2index.pickle', 'rb') as fp:
    word2index = pickle.load(fp)
    
with open('/kaggle/input/preprocessed-v2/index2word.pickle', 'rb') as fp:
    index2word = pickle.load(fp)
    
# Load Dataset
X = torch.load('/kaggle/input/preprocessed-v2/articles.pt')
y = torch.load('/kaggle/input/preprocessed-v2/headlines.pt')
embedding_weights_matrix = torch.load('/kaggle/input/preprocessed-v2/embeddings.pt')

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

BATCH_SIZE = 64

train_data = TensorDataset(X_train, y_train)
train_loader = DataLoader(train_data, batch_size = BATCH_SIZE, shuffle=True)

test_data = TensorDataset(X_test, y_test)
test_loader = DataLoader(test_data, batch_size = BATCH_SIZE, shuffle=False)

In [None]:
EMBEDDING_SIZE = 200
ATTENTION_DIM = 128
HIDDEN_SIZE = 256
VOCAB_SIZE = len(word2index)
E_PATH="encoder.pt"
D_PATH="attn_decoder.pt"

In [None]:
encoder = Encoder(EMBEDDING_SIZE, HIDDEN_SIZE, embedding_weights_matrix).to(device)
attn_decoder = AttnDecoderRNN(EMBEDDING_SIZE + HIDDEN_SIZE, HIDDEN_SIZE, ATTENTION_DIM, 60, 10, embedding_weights_matrix, VOCAB_SIZE).to(device)
trainIters(encoder, attn_decoder, train_loader, 20)

In [None]:
EMBEDDING_SIZE = 200
ATTENTION_DIM = 128
HIDDEN_SIZE = 256
VOCAB_SIZE = len(word2index)
E_PATH="encoder.pt"
D_PATH="attn_decoder.pt"
encoder = Encoder(EMBEDDING_SIZE, HIDDEN_SIZE, embedding_weights_matrix).to(device)
attn_decoder = AttnDecoderRNN(EMBEDDING_SIZE + HIDDEN_SIZE, HIDDEN_SIZE, ATTENTION_DIM, 60, 10, embedding_weights_matrix, VOCAB_SIZE).to(device)
encoder.load_state_dict(torch.load('/kaggle/working/encoder.pt'))
attn_decoder.load_state_dict(torch.load('/kaggle/working/attn_decoder.pt'))

In [None]:
evaluate(1030)

In [None]:
def evaluate(index):
    art, head = test_data[index]
    print("\n ARTICLE IS :: \n")
    for word in art:
        print(index2word[word.item()], end = ' ')
        if(index2word[word.item()]=='<EOS>'): break
    print("\n\n HEADLINE IS :: \n")
    for word in head:
        print(index2word[word.item()], end = ' ')
        if(index2word[word.item()]=='<EOS>'): break 
            
    print("\n\n PREDICTED HEADLINE IS :: \n")
    input_tensor = art.reshape(-1,1).to(device)
    
    encoder_outputs, encoder_hidden = encoder(input_tensor)
    decoder_input = torch.zeros((1,input_tensor.size()[1]), device=device, dtype= torch.long)
    decoder_hidden = encoder_hidden
    attention = torch.zeros(16, 62) 
    for di in range(16):
        decoder_output, decoder_hidden = attn_decoder(decoder_input, decoder_hidden, encoder_outputs)
        _, topi = decoder_output.topk(1)
        decoder_input = topi.detach().view(1,-1)
        print(index2word[topi[0].item()], end = ' ')
        if(index2word[topi[0].item()]=='<EOS>'): break
    
        

In [None]:
def eval_test():
    all_preds = []
    all_targets = []
    for index in tqdm(range(1000)):
        art, head = test_data[index]
        input_tensor = art.reshape(-1,1)
        target_tensor = head.reshape(-1,1)

        encoder_outputs, encoder_hidden = encoder(input_tensor.to(device))
        decoder_input = torch.zeros((1,input_tensor.size()[1]), device=device, dtype= torch.long)
        decoder_hidden = encoder_hidden
        target = [index2word[el.item()] for el in target_tensor[0:10]]
        predicted =  []
        for di in range(10):
            decoder_output, decoder_hidden, _ = attn_decoder(decoder_input, decoder_hidden, encoder_outputs)
            _, topi = decoder_output.topk(1)
            decoder_input = topi.detach().view(1,-1)
            predicted.append(index2word[topi[0].item()])
        all_preds.append(' '.join(predicted))
        all_targets.append(' '.join(target))
    return all_preds, all_targets

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import nltk
!pip install rouge
from rouge import Rouge
def cosine_sim(generated_headlines, reference_headlines):
    sims = []
    for gen, ref in zip(generated_headlines, reference_headlines):
        vectorizer = TfidfVectorizer()
        tfidf_matrix = vectorizer.fit_transform([gen, ref])
        sims.append(cosine_similarity(tfidf_matrix[0], tfidf_matrix[1])[0][0])
    return max(sims)
def bleu_score(generated_headlines, reference_headlines):
    generated_tokens = [nltk.word_tokenize(headline) for headline in generated_headlines]
    reference_tokens = [nltk.word_tokenize(headline) for headline in reference_headlines]
    return nltk.translate.bleu_score.corpus_bleu(generated_tokens, reference_tokens)
def rogue_score(generated_headlines, reference_headlines):
    rouge = Rouge()
    return rouge.get_scores(generated_headlines, reference_headlines, avg=True)

In [None]:
print('bleu_score', bleu_score(generated_headlines, reference_headlines))

In [None]:
# generated_headlines, reference_headlines = eval_test()
print('cosine_sim', cosine_sim(generated_headlines, reference_headlines))
print('bleu_score', bleu_score(generated_headlines, reference_headlines))
print('rogue_score', rogue_score(generated_headlines, reference_headlines))