In [4]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import torch
from torch.jit import script, trace
import torch.nn as nn
from torch import optim
import torch.nn.functional as F
import csv
import random
import re
import os
import unicodedata
import codecs
from io import open
import itertools
import math
import pandas as pd
import logging
import collections
import numpy as np
from model.model import QAMatching

logging.basicConfig(level=logging.INFO)
USE_CUDA = torch.cuda.is_available()
logging.info('USE_CUDA: {}'.format(USE_CUDA))
device = torch.device("cuda" if USE_CUDA else "cpu")

df_train = pd.read_csv('./data/WikiQACorpus/WikiQA-train.tsv', sep='\t')
df_train.head(12)

INFO:root:USE_CUDA: True


Unnamed: 0,QuestionID,Question,DocumentID,DocumentTitle,SentenceID,Sentence,Label
0,Q1,how are glacier caves formed?,D1,Glacier cave,D1-0,A partly submerged glacier cave on Perito More...,0
1,Q1,how are glacier caves formed?,D1,Glacier cave,D1-1,The ice facade is approximately 60 m high,0
2,Q1,how are glacier caves formed?,D1,Glacier cave,D1-2,Ice formations in the Titlis glacier cave,0
3,Q1,how are glacier caves formed?,D1,Glacier cave,D1-3,A glacier cave is a cave formed within the ice...,1
4,Q1,how are glacier caves formed?,D1,Glacier cave,D1-4,"Glacier caves are often called ice caves , but...",0
5,Q2,How are the directions of the velocity and for...,D2,Circular motion,D2-0,"In physics , circular motion is a movement of ...",0
6,Q2,How are the directions of the velocity and for...,D2,Circular motion,D2-1,"It can be uniform, with constant angular rate ...",0
7,Q2,How are the directions of the velocity and for...,D2,Circular motion,D2-2,The rotation around a fixed axis of a three-di...,0
8,Q2,How are the directions of the velocity and for...,D2,Circular motion,D2-3,The equations of motion describe the movement ...,0
9,Q2,How are the directions of the velocity and for...,D2,Circular motion,D2-4,Examples of circular motion include: an artifi...,0


In [5]:
sub = r"[^A-Za-z]+"
df_train.loc[:,'Question'] = df_train.loc[:, 'Question'].apply(lambda x: re.sub(sub, ' ', x).strip().lower())
df_train.loc[:,'Sentence'] = df_train.loc[:, 'Sentence'].apply(lambda x: re.sub(sub, ' ', x).strip().lower())
df_train.head()

Unnamed: 0,QuestionID,Question,DocumentID,DocumentTitle,SentenceID,Sentence,Label
0,Q1,how are glacier caves formed,D1,Glacier cave,D1-0,a partly submerged glacier cave on perito more...,0
1,Q1,how are glacier caves formed,D1,Glacier cave,D1-1,the ice facade is approximately m high,0
2,Q1,how are glacier caves formed,D1,Glacier cave,D1-2,ice formations in the titlis glacier cave,0
3,Q1,how are glacier caves formed,D1,Glacier cave,D1-3,a glacier cave is a cave formed within the ice...,1
4,Q1,how are glacier caves formed,D1,Glacier cave,D1-4,glacier caves are often called ice caves but t...,0


In [6]:
class Voc:
    def __init__(self):
        self.token2index = {'<PAD>':0, '<SOS>':1, '<EOS>':2, '<UNK>':3}
        self.index2token = {v:k for k, v in self.token2index.items()}
        self.voclen = len(self.token2index)
        self.__lookslike_len__ = 10
    
    def extend_vocab(self, iterable):
        if not isinstance(iterable, collections.Iterable):
            raise ValueError('Value must be an iterable.')
        else:
            iterable = set(iterable)
            iterable = iterable - self.token2index.keys()
            ids = range(self.voclen, len(iterable)+self.voclen)
            self.token2index.update(dict(zip(iterable, ids)))
            self.index2token = {v:k for k, v in self.token2index.items()}
            self.voclen = len(self.token2index)
            
    def __call__(self):
        print('Vocabulary size: ', self.voclen)
        print('token2index looks like: ', list(self.token2index.items())[:self.__lookslike_len__], ', ...')
        print('index2token looks like: ', list(self.index2token.items())[:self.__lookslike_len__], ', ...')

In [7]:
df_train.head()

Unnamed: 0,QuestionID,Question,DocumentID,DocumentTitle,SentenceID,Sentence,Label
0,Q1,how are glacier caves formed,D1,Glacier cave,D1-0,a partly submerged glacier cave on perito more...,0
1,Q1,how are glacier caves formed,D1,Glacier cave,D1-1,the ice facade is approximately m high,0
2,Q1,how are glacier caves formed,D1,Glacier cave,D1-2,ice formations in the titlis glacier cave,0
3,Q1,how are glacier caves formed,D1,Glacier cave,D1-3,a glacier cave is a cave formed within the ice...,1
4,Q1,how are glacier caves formed,D1,Glacier cave,D1-4,glacier caves are often called ice caves but t...,0


In [8]:
voc = Voc()
for colname in ['Question', 'Sentence']:
    print('Name of column: ', colname)
    s = map(lambda x: x.split(), df_train.loc[:, colname])
    s = set(itertools.chain.from_iterable(s))
    print('Size before vocabulary extending: ', voc.voclen)
    voc.extend_vocab(s)
    print('Size after vocabulary extending: ', voc.voclen)

Name of column:  Question
Size before vocabulary extending:  4
Size after vocabulary extending:  3947
Name of column:  Sentence
Size before vocabulary extending:  3947
Size after vocabulary extending:  29340


In [9]:
%%time
df_train.loc[:, 'Question_encoded'] = df_train.loc[:, 'Question'].apply(
    lambda x: [voc.token2index.get(i, voc.token2index['<UNK>']) for i in x.split()])
df_train.loc[:, 'Sentence_encoded'] = df_train.loc[:, 'Sentence'].apply(
    lambda x: [voc.token2index.get(i, voc.token2index['<UNK>']) for i in x.split()])

CPU times: user 548 ms, sys: 12 ms, total: 560 ms
Wall time: 574 ms


In [10]:
df_train.head()

Unnamed: 0,QuestionID,Question,DocumentID,DocumentTitle,SentenceID,Sentence,Label,Question_encoded,Sentence_encoded
0,Q1,how are glacier caves formed,D1,Glacier cave,D1-0,a partly submerged glacier cave on perito more...,0,"[2635, 1998, 1538, 1362, 1585]","[2177, 18880, 21271, 1538, 8823, 3434, 19986, ..."
1,Q1,how are glacier caves formed,D1,Glacier cave,D1-1,the ice facade is approximately m high,0,"[2635, 1998, 1538, 1362, 1585]","[418, 401, 21439, 414, 19346, 3589, 3240]"
2,Q1,how are glacier caves formed,D1,Glacier cave,D1-2,ice formations in the titlis glacier cave,0,"[2635, 1998, 1538, 1362, 1585]","[401, 22027, 3788, 418, 13645, 1538, 8823]"
3,Q1,how are glacier caves formed,D1,Glacier cave,D1-3,a glacier cave is a cave formed within the ice...,1,"[2635, 1998, 1538, 1362, 1585]","[2177, 1538, 8823, 414, 2177, 8823, 1585, 1582..."
4,Q1,how are glacier caves formed,D1,Glacier cave,D1-4,glacier caves are often called ice caves but t...,0,"[2635, 1998, 1538, 1362, 1585]","[1538, 1362, 1998, 1069, 2143, 401, 1362, 2110..."


In [11]:
Q_MAXLEN = df_train.Question_encoded.apply(len).max()
S_MAXLEN = df_train.Sentence_encoded.apply(len).max()

def pad_sequence(seq, max_len, padding=0):
    seq = seq + [padding] * (max_len - len(seq))
    assert len(seq) == max_len
    return np.array(seq)

df_train.loc[:, 'Question_encoded'] = df_train.Question_encoded.apply(lambda x: pad_sequence(x, Q_MAXLEN))
df_train.loc[:, 'Sentence_encoded'] = df_train.Sentence_encoded.apply(lambda x: pad_sequence(x, S_MAXLEN))

In [12]:
pos_ans = df_train.loc[df_train.Label == 1].drop_duplicates(subset=['QuestionID'])
neg_ans = df_train.loc[df_train.Label == 0].drop_duplicates(subset=['QuestionID'])

df_train_cut = pd.merge(pos_ans, neg_ans, how='inner', on='QuestionID', suffixes=('_pos', '_neg'))
print('Shape: ', df_train_cut.shape)
df_train_cut.head()

Shape:  (856, 17)


Unnamed: 0,QuestionID,Question_pos,DocumentID_pos,DocumentTitle_pos,SentenceID_pos,Sentence_pos,Label_pos,Question_encoded_pos,Sentence_encoded_pos,Question_neg,DocumentID_neg,DocumentTitle_neg,SentenceID_neg,Sentence_neg,Label_neg,Question_encoded_neg,Sentence_encoded_neg
0,Q1,how are glacier caves formed,D1,Glacier cave,D1-3,a glacier cave is a cave formed within the ice...,1,"[2635, 1998, 1538, 1362, 1585, 0, 0, 0, 0, 0, ...","[2177, 1538, 8823, 414, 2177, 8823, 1585, 1582...",how are glacier caves formed,D1,Glacier cave,D1-0,a partly submerged glacier cave on perito more...,0,"[2635, 1998, 1538, 1362, 1585, 0, 0, 0, 0, 0, ...","[2177, 18880, 21271, 1538, 8823, 3434, 19986, ..."
1,Q16,how much is tablespoon of water,D16,Tablespoon,D16-0,this tablespoon has a capacity of about ml,1,"[2635, 3161, 414, 3348, 3104, 92, 0, 0, 0, 0, ...","[789, 3348, 1120, 2177, 966, 3104, 1206, 20749...",how much is tablespoon of water,D16,Tablespoon,D16-1,measuring spoons,0,"[2635, 3161, 414, 3348, 3104, 92, 0, 0, 0, 0, ...","[21548, 19784, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0..."
2,Q17,how much are the harry potter movies worth,D17,Harry Potter,D17-13,the series also originated much tie in merchan...,1,"[2635, 3161, 1998, 418, 1482, 1736, 3925, 582,...","[418, 1440, 6776, 1230, 3161, 6409, 3788, 2195...",how much are the harry potter movies worth,D17,Harry Potter,D17-0,harry potter is a series of seven fantasy nove...,0,"[2635, 3161, 1998, 418, 1482, 1736, 3925, 582,...","[1482, 1736, 414, 2177, 1440, 3104, 10983, 132..."
3,Q18,how a rocket engine works,D18,Rocket engine,D18-3,a rocket engine or simply rocket is a jet engi...,1,"[2635, 2177, 1439, 798, 429, 0, 0, 0, 0, 0, 0,...","[2177, 1439, 798, 2610, 5012, 1439, 414, 2177,...",how a rocket engine works,D18,Rocket engine,D18-0,rs being tested at nasa s stennis space center,0,"[2635, 2177, 1439, 798, 429, 0, 0, 0, 0, 0, 0,...","[6671, 1117, 27309, 1005, 18429, 3697, 13336, ..."
4,Q21,how are cholera and typhus transmitted and pre...,D21,Cholera,D21-2,transmission occurs primarily by drinking wate...,1,"[2635, 1998, 1041, 1615, 2222, 3746, 1615, 246...","[3760, 19144, 28736, 1236, 2041, 92, 2610, 785...",how are cholera and typhus transmitted and pre...,D21,Cholera,D21-0,cholera is an infection in the small intestine...,0,"[2635, 1998, 1041, 1615, 2222, 3746, 1615, 246...","[1041, 414, 1491, 19566, 3788, 418, 3765, 3213..."


In [13]:
vocab_size = voc.voclen
embed_size = 256
hidden_size = 32

man = QAMatching(vocab_size, embed_size, hidden_size)

  "num_layers={}".format(dropout, num_layers))
  "num_layers={}".format(dropout, num_layers))


In [14]:
def hinge_loss(sim_true, sim_false, M=0.5):
    loss = max((0, M - sim_true + sim_false))
    return loss

In [15]:
man.zero_grad()
man.cuda(device)
loss = 0
for _df in np.split(df_train_cut, 8):
    print(loss)
    q_pos = torch.tensor(np.array(_df.Question_encoded_pos.tolist()))
    q_neg = torch.tensor(np.array(_df.Question_encoded_neg.tolist()))
    a_pos = torch.tensor(np.array(_df.Sentence_encoded_pos.tolist()))
    a_neg = torch.tensor(np.array(_df.Sentence_encoded_neg.tolist()))
    
    # Set device options
    input_q_pos = q_pos.to(device)
    input_q_neg = q_neg.to(device)
    input_a_pos = a_pos.to(device)
    input_a_neg = a_neg.to(device)
    
    lens_q = [len(i) for i in _df.Question_encoded_pos]
    lens_a = [len(i) for i in _df.Sentence_encoded_pos]
    
    sim_pos, _ = man(input_q_pos, lens_q, input_a_pos, lens_a, training=False)
    sim_neg, _ = man(input_q_neg, lens_q, input_a_neg, lens_a, training=False)
    
    batch_loss = torch.tensor([hinge_loss(st, sf) for st,sf in zip(sim_pos, sim_neg)])
    loss = torch.mean(batch_loss).to(device)
    print(loss)
    loss.backward()

#     # Clip gradients: gradients are modified in place
#     _ = torch.nn.utils.clip_grad_norm_(man.parameters(), clip)

#     # Adjust model weights
#     optimizer.step()

0
tensor(0.4845, device='cuda:0')


RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn

In [3]:
learning_rate = 0.001
optimizer = optim.Adam(man.parameters(), lr=learning_rate)

NameError: name 'optim' is not defined

In [None]:
# Perform backpropatation
loss.backward()

# Clip gradients: gradients are modified in place
_ = torch.nn.utils.clip_grad_norm_(man.parameters(), clip)

# Adjust model weights
optimizer.step()

In [None]:
# a = inputs_q[:3]
# b = [len(i) for i in inputs_q[:3]]
# c = inputs_a[:3]
# d = [len(i) for i in inputs_a[:3]]

a = input_variable_q[:3]
b = [len(i) for i in input_variable_q[:3]]
c = input_variable_a[:3]
d = [len(i) for i in input_variable_a[:3]]

man(a,b,c,d, training=False)

In [None]:
def hinge_loss(sim_true, sim_false, M=0.5):
    return max((0, M - sim_true + sim_false))

In [None]:
def train(input_variable, lengths, target_variable, mask, max_target_len, encoder, decoder, embedding,
          encoder_optimizer, decoder_optimizer, batch_size, clip, max_length=MAX_LENGTH):

    # Zero gradients
    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()

    # Set device options
    input_variable = input_variable.to(device)
    lengths = lengths.to(device)
    target_variable = target_variable.to(device)
    mask = mask.to(device)

    # Initialize variables
    loss = 0
    print_losses = []
    n_totals = 0

    

    # Perform backpropatation
    loss.backward()

    # Clip gradients: gradients are modified in place
    _ = torch.nn.utils.clip_grad_norm_(encoder.parameters(), clip)
    _ = torch.nn.utils.clip_grad_norm_(decoder.parameters(), clip)

    # Adjust model weights
    encoder_optimizer.step()
    decoder_optimizer.step()

    return sum(print_losses) / n_totals