In [1]:
import os, sys
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable

import torchtext
from torchtext.legacy.datasets import Multi30k
from torchtext.legacy.data import Field, BucketIterator

import matplotlib.pyplot as plt
import matplotlib.ticker as ticker

import spacy
import numpy as np
import pandas as pd

import random
import math
import time

from multiHop_QA.configures import Config_path,Config_output_path,Hyparams_transformers
from multiHop_QA.model import Encoder,Decoder,Seq2Seq

config = Config_path()
config_output = Config_output_path()

# USE_CUDA = True
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [2]:
class RelVocabs():
    def __init__(self,train_combination,test_combination):
        self.train = train_combination
        self.test = test_combination

    def __load_relation(self):
        relation_train = pd.read_csv(self.train)
        relation_test = pd.read_csv(self.test)
        rel2id = {}
        id2rel = {}
        special_char = np.array(['PAD', 'BOS', 'EOS', 'UNK'])
        relations = pd.concat([pd.Series(special_char), relation_train['relationship'],
                               relation_test['relationship']],
                              axis=0)
        relations = relations.unique()
        idx = 0
        for relation in relations:
            rel2id[relation] = idx
            id2rel[idx] = relation
            idx += 1
        return rel2id, id2rel

    def get_rl_vocabs(self):
        return self.__load_relation()


class QuesVocabs():
    def __init__(self,train_combination,test_combination):
        self.train = train_combination
        self.test = test_combination

    def __load_mask_q(self):
        mask_q_train = pd.read_csv(self.train)
        mask_q_test = pd.read_csv(self.test)
        q2id = {}
        id2q = {}
        special_char = np.array(['PAD', 'BOS', 'EOS', 'UNK'])
        mask_qs = pd.concat([pd.Series(special_char), mask_q_train['q_space'],
                             mask_q_test['q_space']],
                            axis=0)
        mask_qs = mask_qs.str.split(expand=True).stack()
        mask_qs = mask_qs.unique()
        idx = 0
        for mask_q in mask_qs:
            q2id[mask_q] = idx
            id2q[idx] = mask_q
            idx += 1
        return q2id, id2q

    def get_ques_vocabs(self):
        return self.__load_mask_q()


class LoadData:
    def __init__(self,train_combination,test_combination,q2id,rel2id):
        self.train = train_combination
        self.test = test_combination
        self.q2i = q2id
        self.rel2id = rel2id

    def __split_data(self):
        train = pd.read_csv(self.train)
        test = pd.read_csv(self.test)
        split = test.shape[0] // 2
        train_q = self.read_docs_to_seqs(train['q_space'].values, self.q2i)
        train_rel = self.read_docs_to_seqs(train['relationship'].values, self.rel2id,is_rel = True)
        val_q = self.read_docs_to_seqs(test['q_space'][0:split].values, self.q2i)
        val_rel = self.read_docs_to_seqs(test['relationship'][0:split].values, self.rel2id,is_rel = True)
        test_q = self.read_docs_to_seqs(test['q_space'][split:-1].values, self.q2i)
        test_rel = self.read_docs_to_seqs(test['relationship'][split:-1].values, self.rel2id,is_rel = True)
        return train_q, train_rel, val_q, val_rel, test_q, test_rel

    def read_docs_to_seqs(self, docs, w2id,is_rel = False):
        seqs = []
        for doc in docs:
            if doc == "":
                continue
            words = doc.split(" ")
            # if is_rel:
            #     seq = [w2id["BOS"]]
            # else:
            #     seq = []
            seq = [w2id["BOS"]]
            for word in words:
                if word in w2id:
                    seq.append(w2id[word])
            # seq = [w2id[word] for word in words if word in w2id]
            seq.append(w2id["EOS"])
            seqs.append(seq)
        return seqs

    def get_mask_data(self):
        return self.__split_data()


def get_batch(pairs, batch_size):
    if batch_size is not None:
        rand_list = [random.randint(0, len(pairs) - 1) for i in range(batch_size)]
        pairs_batch = [pairs[rand] for rand in rand_list]
    else:
        pairs_batch = pairs
    # pairs_batch = sorted(pairs_batch, key=lambda p:len(p[0]), reverse=True) # sort based on input len, to use pack function of pytorch

    qu_batch = [pair[0] for pair in pairs_batch]
    rl_batch = [pair[1] for pair in pairs_batch]
    qu_lengths = [len(seq) for seq in qu_batch]
    rl_lengths = [len(seq) for seq in rl_batch]
    max_q_length = max(qu_lengths)
    max_r_length = max(rl_lengths)

    seqs_padded = []
    for seq in qu_batch:
        seqs_padded.append(seq + [q2id["PAD"] for pad_num in range(max_q_length - len(seq))])
    qu_batch = seqs_padded
    seqs_padded = []
    for seq in rl_batch:
        seqs_padded.append(seq + [rel2id["PAD"] for pad_num in range(max_r_length - len(seq))])
    rl_batch = seqs_padded
    qu_batch = Variable(torch.LongTensor(qu_batch)) #.transpose(0, 1)
    # (batch_size x max_len) tensors, transpose into (max_len x batch_size)
    rl_batch = Variable(torch.LongTensor(rl_batch)) #.transpose(0, 1)
    qu_batch = qu_batch.to(device)
    rl_batch = rl_batch.to(device)
    return qu_batch, qu_lengths, rl_batch, rl_lengths


def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)


def initialize_weights(m):
    if hasattr(m, 'weight') and m.weight.dim() > 1:
        nn.init.xavier_uniform_(m.weight.data)


def train(model, train_pairs, optimizer, criterion, clip): #####
    model.train()
    epoch_loss = 0
    # for i, batch in enumerate(iterator):
    #     src = batch.src
    #     trg = batch.trg
    src, q_lengths, trg, r_lengths = get_batch(train_pairs, hper_params.BATCH_SIZE)
    optimizer.zero_grad()
    output, _ = model(src, trg[:, :-1])
    # output = [batch size, trg len - 1, output dim]
    # trg = [batch size, trg len]
    output_dim = output.shape[-1]
    output = output.contiguous().view(-1, output_dim)
    trg = trg[:, 1:].contiguous().view(-1)
    # output = [batch size * trg len - 1, output dim]
    # trg = [batch size * trg len - 1]
    loss = criterion(output, trg)
    loss.backward()
    torch.nn.utils.clip_grad_norm_(model.parameters(), clip)
    optimizer.step()
    epoch_loss += loss.item()
    return epoch_loss


def evaluate(model, val_pairs, criterion): #####
    model.eval()
    epoch_loss = 0
    with torch.no_grad():
        # for i, batch in enumerate(iterator):
        #     src = batch.src
        #     trg = batch.trg
            src, q_lengths, trg, r_lengths = get_batch(train_pairs, hper_params.BATCH_SIZE)
            output, _ = model(src, trg[:, :-1])
            # output = [batch size, trg len - 1, output dim]
            # trg = [batch size, trg len]
            output_dim = output.shape[-1]
            output = output.contiguous().view(-1, output_dim)
            trg = trg[:, 1:].contiguous().view(-1)
            # output = [batch size * trg len - 1, output dim]
            # trg = [batch size * trg len - 1]
            loss = criterion(output, trg)
            epoch_loss += loss.item()
    return epoch_loss


def epoch_time(start_time, end_time): #####
    elapsed_time = end_time - start_time
    elapsed_mins = int(elapsed_time / 60)
    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
    return elapsed_mins, elapsed_secs

In [4]:
train_combination = config.train_combination_path
test_combination = config.test_combination_path

m_q_vocabs = QuesVocabs(train_combination,test_combination)
q2id, id2q = m_q_vocabs.get_ques_vocabs()

rel_vocabs = RelVocabs(train_combination,test_combination)
rel2id, id2rel = rel_vocabs.get_rl_vocabs()

loadData = LoadData(train_combination,test_combination,q2id,rel2id)
train_q, train_r, val_q, val_r, test_q, test_r = loadData.get_mask_data()

train_pairs = [(s_seq, t_seq) for s_seq, t_seq in zip(train_q, train_r)]
val_pairs = [(s_seq, t_seq) for s_seq, t_seq in zip(val_q, val_r)]
test_pairs = [(s_seq, t_seq) for s_seq, t_seq in zip(test_q, test_r)]

#################################################################################
# Initialize hyper-parameters
hper_params = Hyparams_transformers()
model_name = "0727_transformers_best-model.pt"
INPUT_DIM = len(q2id) ##
OUTPUT_DIM = len(rel2id) ##
best_valid_loss = float('inf')

q_batch, q_lengths, r_batch, r_lengths = get_batch(train_pairs, hper_params.BATCH_SIZE)

# Initialize Model
enc = Encoder(INPUT_DIM,
              hper_params.HID_DIM,
              hper_params.ENC_LAYERS,
              hper_params.ENC_HEADS,
              hper_params.ENC_PF_DIM,
              hper_params.ENC_DROPOUT,
              device)

dec = Decoder(OUTPUT_DIM,
              hper_params.HID_DIM,
              hper_params.DEC_LAYERS,
              hper_params.DEC_HEADS,
              hper_params.DEC_PF_DIM,
              hper_params.DEC_DROPOUT,
              device)

SRC_PAD_IDX = q2id['PAD']
TRG_PAD_IDX = rel2id['PAD']

model = Seq2Seq(enc, dec, SRC_PAD_IDX, TRG_PAD_IDX, device).to(device)  # input/target pad index
# show model's parameters quantity
print(f'The model has {count_parameters(model):,} trainable parameters')
# Initialize model's weights
model.apply(initialize_weights)
# apply learning rate
optimizer = torch.optim.Adam(model.parameters(), lr=hper_params.LEARNING_RATE)
# use loss entropy for backpropagation
criterion = nn.CrossEntropyLoss(ignore_index=TRG_PAD_IDX)

# training loop
for epoch in range(hper_params.N_EPOCHS):

    start_time = time.time()

    train_loss = train(model, train_pairs, optimizer, criterion, hper_params.CLIP)
    valid_loss = evaluate(model, val_pairs, criterion)

    end_time = time.time()

    epoch_mins, epoch_secs = epoch_time(start_time, end_time)

    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        # torch.save(model.state_dict(),
        #            os.path.join(config_output.transformers_path,model_name))
        torch.save(model,
                   os.path.join(config_output.transformers_path, model_name))

    print(f'Epoch: {epoch + 1:02} | Time: {epoch_mins}m {epoch_secs}s')
    print(f'\tTrain Loss: {train_loss:.3f} | Train PPL: {math.exp(train_loss):7.3f}')
    print(f'\t Val. Loss: {valid_loss:.3f} |  Val. PPL: {math.exp(valid_loss):7.3f}')

# # print test score
# # model.load_state_dict(
# #     torch.load(os.path.join(config_output.transformers_path,model_name)))
# model_ = torch.load(os.path.join(config_output.transformers_path, '0727_transformers_best-model.pt'))
# test_loss = evaluate(model_, test_pairs, criterion)
# print(f'| Test Loss: {test_loss:.3f} | Test PPL: {math.exp(test_loss):7.3f} |')

The model has 7,040,245 trainable parameters


FileNotFoundError: [Errno 2] No such file or directory: 'G:\\My Drive\\RR-project\\NL2GraphQuery-workplace\\code\\NLtoGQ\\models\\0727_transformers_best-model.pt'