## Textual Entailment Generation system training

In [None]:
import time, torch
import pandas as pd
import torch.nn as nn
import warnings, random
warnings.filterwarnings("ignore")
import matplotlib
matplotlib.rcParams.update({'figure.figsize': (16, 12), 'font.size': 14})
%matplotlib inline

from gte_seq2seq import Seq2Seq
from utils_model import *
from training_gte import *

config = {"BATCH_SIZE" : 32,
          "HID_DIM" : 512,
          "N_LAYERS" : 2,
          "ATTN_TYPE" : "luong",
          "ATTN_FUNC" : "dot",
          "DROPOUT" : 0.2}

#### Load datasets and Build the vocabulary.
(specify the folder where the training partition of SNLI dataset is saved) 

In [None]:
train_data = EntailmentDataset.load_dataset(r"D:\Huawei Share\Download", split="train")
dev_data = EntailmentDataset.load_dataset(r"D:\Huawei Share\Download", split="dev")

voc = Vocabulary.build_vocabulary(train_data)

#### Load the pretrained Word2Vec model

In [None]:
import gensim.downloader as api
wv = api.load('word2vec-google-news-300')
w2v_embeddings, oov = create_w2v_matrix(voc, wv)

#### Initiate the Seq2Seq model and choose the loss and the optimizer to use

In [None]:
SEED = 1234
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.backends.cudnn.deterministic = True


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"You are using {device} device")

model = Seq2Seq(pretrained_w2v = w2v_embeddings,
                hidden_size = config["HID_DIM"],
                n_layers = config["N_LAYERS"],
                attn_type = config["ATTN_TYPE"],
                attn_func = config["ATTN_FUNC"],
                dropout = config["DROPOUT"])

model = model.to(device)
model.apply(init_weights)
print(f'The seq2seq model has {count_parameters(model):,} trainable parameters')

optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.NLLLoss(ignore_index = 0)

model

#### Train the model

In [None]:
torch.cuda.empty_cache()

train_history = []
valid_history = []
N_EPOCHS = 1

for epoch in range(N_EPOCHS):
    
    start_time = time.time()

    train_loss = train(model, voc, train_data, optimizer, criterion, device, train_history, valid_history)
    valid_loss = evaluate(model, voc, dev_data, criterion, device)
    
    end_time = time.time()
    epoch_mins, epoch_secs = epoch_time(start_time, end_time)

    train_history.append(train_loss)
    valid_history.append(valid_loss)
    print(f'Epoch: {epoch+1:02} | Time: {epoch_mins}m {epoch_secs}s')
    print(f'\tTrain Loss: {train_loss:.3f} ')

##### Generate inference

In [None]:
from display_results import display_attention
prem = "Three puppies are in the tub being sprayed with water by a person."
hypo, attention = predict(prem, model, device, max_len=20)
display_attention(prem, hypo, attention)