In [1]:
import pandas as pd
import numpy as np
import operator
import ast
from torch.autograd import Variable
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
import torch
import torch.nn as nn
import string
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence 
import spacy
import re

## Import Data

In [2]:
data = pd.read_csv('combined-newsqa-data-v1.csv')
print(data.columns)
print(type(data['is_question_bad'][0]))

# remove Q/A pairs that are invalid or missing
#data = data[(data.is_question_bad=='0.0') & (data.is_answer_absent=='0.0')]
data.head()

Index(['story_id', 'question', 'answer_char_ranges', 'is_answer_absent',
       'is_question_bad', 'validated_answers', 'story_text'],
      dtype='object')
<class 'str'>


Unnamed: 0,story_id,question,answer_char_ranges,is_answer_absent,is_question_bad,validated_answers,story_text
0,./cnn/stories/42d01e187213e86f5fe617fe32e716ff...,What was the amount of children murdered?,294:297|None|None,0.0,0.0,"{""none"": 1, ""294:297"": 2}","NEW DELHI, India (CNN) -- A high court in nort..."
1,./cnn/stories/c48228a52f26aca65c31fad273e66164...,Where was one employee killed?,34:60|1610:1618|34:60,0.0,0.0,,(CNN) -- Fighting in the volatile Sudanese reg...
2,./cnn/stories/c65ed85800e4535f4bbbfa2c34d7d963...,who did say South Africa did not issue a visa ...,103:127|114:127|839:853,0.0,0.0,"{""839:853"": 1, ""103:127"": 2}",Johannesburg (CNN) -- Miffed by a visa delay t...
3,./cnn/stories/0cf66b646e9b32076513c050edf32a79...,How many years old was the businessman?,538:550|538:550,0.0,0.0,,(CNN) -- England international footballer Ste...
4,./cnn/stories/13012604e3203c18df09289dfedd14cd...,What frightened the families?,690:742|688:791|630:646,0.0,0.0,"{""688:791"": 2, ""690:742"": 1}","BAGHDAD, Iraq (CNN) -- At least 6,000 Christi..."


In [3]:
print(data['is_question_bad'][0])

0.0


In [4]:
# 119,633 Q/A's , 12088 articles
print(len(data))
print(len(data['story_text'].unique()))


119633
12088


In [5]:
first_doc = data['story_text'][0]
first_doc

'NEW DELHI, India (CNN) -- A high court in northern India on Friday acquitted a wealthy businessman facing the death sentence for the killing of a teen in a case dubbed "the house of horrors."\n\n\n\nMoninder Singh Pandher was sentenced to death by a lower court in February.\n\n\n\nThe teen was one of 19 victims -- children and young women -- in one of the most gruesome serial killings in India in recent years.\n\n\n\nThe Allahabad high court has acquitted Moninder Singh Pandher, his lawyer Sikandar B. Kochar told CNN.\n\n\n\nPandher and his domestic employee Surinder Koli were sentenced to death in February by a lower court for the rape and murder of the 14-year-old.\n\n\n\nThe high court upheld Koli\'s death sentence, Kochar said.\n\n\n\nThe two were arrested two years ago after body parts packed in plastic bags were found near their home in Noida, a New Delhi suburb. Their home was later dubbed a "house of horrors" by the Indian media.\n\n\n\nPandher was not named a main suspect by 

In [6]:
first_doc[294:297]

'19 '

In [7]:
second_doc = data['story_text'][2]
second_doc

'Johannesburg (CNN) -- Miffed by a visa delay that led the Dalai Lama to cancel a trip to South Africa, Archbishop Desmond Tutu lashed out at his government Tuesday, saying it had acted worse than apartheid regimes and had forgotten all that the nation stood for.\n\n\n\n\n\n"When we used to apply for passports under the apartheid government, we never knew until the last moment what their decision was," Tutu said at a news conference. "Our government is worse than the apartheid government because at least you were expecting it from the apartheid government.\n\n\n\n\n\n"I have to say that I can\'t believe this. I really can\'t believe this," Tutu said. "You have to wake me up and tell me this is actually happening here."\n\n\n\n\n\nThe Dalai Lama scrapped his planned trip to South Africa this week after the nation failed to issue him a visa in time, his spokesman said.\n\n\n\n\n\nVisa applications for him and his entourage were submitted to the South African High Commission in New Delhi,

In [8]:
print(second_doc[103:127])
print(second_doc[114:127])
print(second_doc[839:853])

Archbishop Desmond Tutu 
Desmond Tutu 
his spokesman 


Answers given by different human reviewers are in the answer_char_ranges column and the validated_answers column. These values are string index ranges within the document that represent the answer. For each row, we need to determine which character range is the best answer to use and extract it. There are a couple ways we could approach this problem, the first is to have a model that takes the question(str), the story(str), spits out an answer(str).

or we could have it spit back out the string indexes themselves. 

In [9]:
# Remove uneeded columns
df = data[['question','validated_answers','story_text']]
df = df.dropna()
df = df.iloc[0:50000] # reduce size for development purposes

df.head()

Unnamed: 0,question,validated_answers,story_text
0,What was the amount of children murdered?,"{""none"": 1, ""294:297"": 2}","NEW DELHI, India (CNN) -- A high court in nort..."
2,who did say South Africa did not issue a visa ...,"{""839:853"": 1, ""103:127"": 2}",Johannesburg (CNN) -- Miffed by a visa delay t...
4,What frightened the families?,"{""688:791"": 2, ""690:742"": 1}","BAGHDAD, Iraq (CNN) -- At least 6,000 Christi..."
6,Who is hiring?,"{""301:324"": 2}",CNN affiliates report on where job seekers are...
8,Iran criticizes who?,"{""63:97"": 2}","TEHRAN, Iran (CNN) -- Iran's parliament speake..."


In [10]:
# loops through the rows and prints the question along with the first answer given
start_truth = []
end_truth = []
for i, row in df.iterrows():
    try:
        answers = ast.literal_eval(row['validated_answers'])
        sorted_ans = sorted(answers.items(), key=operator.itemgetter(1), reverse=True)
        #print(sorted_ans)
        start, end = sorted_ans[0][0].split(':')
        start_truth.append(int(start))
        end_truth.append(int(end))
    except ValueError:
        start_truth.append(np.nan)
        end_truth.append(np.nan)
        pass
df['start_truth'] = start_truth
df['end_truth'] = end_truth
df = df.dropna()
print(len(df))
df.head()

42317


Unnamed: 0,question,validated_answers,story_text,start_truth,end_truth
0,What was the amount of children murdered?,"{""none"": 1, ""294:297"": 2}","NEW DELHI, India (CNN) -- A high court in nort...",294.0,297.0
2,who did say South Africa did not issue a visa ...,"{""839:853"": 1, ""103:127"": 2}",Johannesburg (CNN) -- Miffed by a visa delay t...,103.0,127.0
4,What frightened the families?,"{""688:791"": 2, ""690:742"": 1}","BAGHDAD, Iraq (CNN) -- At least 6,000 Christi...",688.0,791.0
6,Who is hiring?,"{""301:324"": 2}",CNN affiliates report on where job seekers are...,301.0,324.0
8,Iran criticizes who?,"{""63:97"": 2}","TEHRAN, Iran (CNN) -- Iran's parliament speake...",63.0,97.0


In [11]:
final = df.drop(columns=['validated_answers'])
final['ss'] = np.zeros(len(final))
final.head()

Unnamed: 0,question,story_text,start_truth,end_truth,ss
0,What was the amount of children murdered?,"NEW DELHI, India (CNN) -- A high court in nort...",294.0,297.0,0.0
2,who did say South Africa did not issue a visa ...,Johannesburg (CNN) -- Miffed by a visa delay t...,103.0,127.0,0.0
4,What frightened the families?,"BAGHDAD, Iraq (CNN) -- At least 6,000 Christi...",688.0,791.0,0.0
6,Who is hiring?,CNN affiliates report on where job seekers are...,301.0,324.0,0.0
8,Iran criticizes who?,"TEHRAN, Iran (CNN) -- Iran's parliament speake...",63.0,97.0,0.0


## Char-level embeddings

In [12]:
full_text = ''
m = 0
len_nums = []

for col in ['question','story_text']:
    for text in final[col]:
        m = max(m, len(text))
        full_text += text.lower()
    
    len_nums.append(m)
    m = 0
    
# get the set of all characters
characters = tuple(set(full_text))
characters[:5]

('h', ']', '?', 'à', 'k')

In [13]:
# use enumeration to give the characters integer values
int2char = dict(enumerate(characters))

# create the look up dictionary from characters to the assigned integers
char2int = {char: index for index, char in int2char.items()}
print(char2int)

{'h': 0, ']': 1, '?': 2, 'à': 3, 'k': 4, '+': 5, 'ú': 6, '\xad': 7, 'ç': 8, '€': 9, '\n': 10, 'ò': 11, '7': 12, '¶': 13, '@': 14, '3': 15, ')': 16, 'ġ': 17, '^': 18, '\xa0': 19, 'ã': 20, 't': 21, 'q': 22, '6': 23, '&': 24, 'e': 25, 'ó': 26, 'ø': 27, '¾': 28, 'l': 29, '#': 30, '«': 31, '‰': 32, ',': 33, 'b': 34, 'c': 35, '¿': 36, '±': 37, 'w': 38, '!': 39, 'x': 40, '>': 41, '»': 42, '½': 43, 'y': 44, 'g': 45, '"': 46, 'ł': 47, 'ñ': 48, '„': 49, 's': 50, 'þ': 51, 'å': 52, '/': 53, '´': 54, 'é': 55, 'ô': 56, ':': 57, '-': 58, '%': 59, 'n': 60, 'ħ': 61, 'f': 62, '°': 63, 'â': 64, '$': 65, 'v': 66, '®': 67, '9': 68, 'á': 69, '.': 70, '¬': 71, 'æ': 72, '[': 73, 'z': 74, 'º': 75, '¹': 76, 'o': 77, '£': 78, 'ï': 79, '4': 80, '¨': 81, '—': 82, 'd': 83, 'ş': 84, 'ö': 85, '”': 86, '×': 87, '′': 88, '(': 89, '\u202a': 90, 'ù': 91, '•': 92, 'ÿ': 93, '·': 94, '*': 95, '¢': 96, '¼': 97, '|': 98, '1': 99, '²': 100, 'r': 101, '`': 102, 'è': 103, '<': 104, 'm': 105, '8': 106, '_': 107, 'û': 108, '\u202c

In [14]:
q_max, story_max = len_nums
print(q_max, story_max)

233 11349


In [15]:
final['question'] = final['question'].apply(lambda x: np.array([char2int[char] for char in x.lower()]))
final['story_text'] = final['story_text'].apply(lambda x: np.array([char2int[char] for char in x.lower()]))

In [16]:
final.head()

Unnamed: 0,question,story_text,start_truth,end_truth,ss
0,"[38, 0, 132, 21, 120, 38, 132, 50, 120, 21, 0,...","[60, 25, 38, 120, 83, 25, 29, 0, 114, 33, 120,...",294.0,297.0,0.0
2,"[38, 0, 77, 120, 83, 114, 83, 120, 50, 132, 44...","[139, 77, 0, 132, 60, 60, 25, 50, 34, 119, 101...",103.0,127.0,0.0
4,"[38, 0, 132, 21, 120, 62, 101, 114, 45, 0, 21,...","[34, 132, 45, 0, 83, 132, 83, 33, 120, 114, 10...",688.0,791.0,0.0
6,"[38, 0, 77, 120, 114, 50, 120, 0, 114, 101, 11...","[35, 60, 60, 120, 132, 62, 62, 114, 29, 114, 1...",301.0,324.0,0.0
8,"[114, 101, 132, 60, 120, 35, 101, 114, 21, 114...","[21, 25, 0, 101, 132, 60, 33, 120, 114, 101, 1...",63.0,97.0,0.0


In [17]:
def find_s(seq):
    for i, char in enumerate(seq):
        if char == 0:
            continue
        else: return i

In [18]:
padding_start = False

for i, row in final.iterrows():
    for col, N in zip(['question','story_text'], [q_max, story_max]):
        enc = np.zeros(N, dtype=np.int32)
        
        # row[col] = np.array([char2int[char] for char in row[col].lower()])
        l = min(N, len(row[col]))
        
        if padding_start:
            enc[:l] = row[col][:l]
        else:
            enc[N-l:] = row[col][:l]
        
        final.at[i, col] = enc
        final.at[i, 'ss'] = N-l

final.head()

Unnamed: 0,question,story_text,start_truth,end_truth,ss
0,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",294.0,297.0,10114.0
2,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",103.0,127.0,7243.0
4,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",688.0,791.0,9077.0
6,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",301.0,324.0,5733.0
8,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",63.0,97.0,8539.0


In [19]:
print(final.iloc[1,1])
print(find_s(final.iloc[1,1]))
# find_s([0,0,0,0,0,0,1,2,4])

[  0   0   0 ... 101  21  70]
7243


## Train/Test Split

In [20]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(final[['question', 'story_text', 'ss']], 
                                                    final[['start_truth','end_truth']], 
                                                    test_size=0.3, 
                                                    random_state=42)

In [21]:
print(X_train.index)
X_train.head()

Int64Index([ 82545,  14722,  19613,  44594,  57438,   6939,  52626,  19717,
             13048,  56506,
            ...
            112796,  44232,  60295, 102334,  46461,  17310,  31261, 104913,
              2295,  43564],
           dtype='int64', length=29621)


Unnamed: 0,question,story_text,ss
82545,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",9793.0
14722,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",7772.0
19613,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",6076.0
44594,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",9152.0
57438,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",8835.0


In [22]:
min(y_train.end_truth)

6.0

In [23]:
len(X_train)

29621

In [24]:
y_train.head()

Unnamed: 0,start_truth,end_truth
82545,1015.0,1051.0
14722,1751.0,1765.0
19613,1321.0,1333.0
44594,755.0,801.0
57438,211.0,245.0


## Dataset

In [25]:
class NewsQADataset(Dataset):
    def __init__(self, X, y, N=400, padding_start=False):
        self.start = y['start_truth'].values
        self.end = y['end_truth'].values
        self.question = X['question'].values
        self.story_text = X['story_text'].values
        self.ss = X['ss'].values
        
    def __len__(self):
        return len(self.question)
    
    def __getitem__(self, idx):
        #print(self.question)
        #qs = np.array([find_s(seq) for seq in self.question])
        # ss = np.array([find_s(seq) for seq in self.story_text])
        return self.question[idx], self.story_text[idx], self.start[idx] + self.ss[idx], self.end[idx] + self.ss[idx]

In [26]:
train_ds = NewsQADataset(X_train, y_train)
test_ds = NewsQADataset(X_test, y_test)

In [27]:
train_ds[0]

(array([  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0

In [28]:
batch_size = 50
train_dl = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
test_dl = DataLoader(test_ds, batch_size=batch_size)

In [29]:
q, s, start, end = next(iter(train_dl))
print(q.shape, s.shape, start.shape, end.shape)
q, s, start, end

torch.Size([50, 233]) torch.Size([50, 11349]) torch.Size([50]) torch.Size([50])


(tensor([[  0,   0,   0,  ..., 105,  25,   2],
         [  0,   0,   0,  ..., 132, 114,  60],
         [  0,   0,   0,  ...,  21,  77,   2],
         ...,
         [  0,   0,   0,  ...,  21,  44,   2],
         [  0,   0,   0,  ..., 132,  44,   2],
         [  0,   0,   0,  ..., 132,  21,   2]], dtype=torch.int32),
 tensor([[  0,   0,   0,  ...,  50,  70,  46],
         [  0,   0,   0,  ...,  29,  77,  38],
         [  0,   0,   0,  ...,  60,  45,  70],
         ...,
         [  0,   0,   0,  ...,  25,  83,  70],
         [  0,   0,   0,  ..., 119,  25,  70],
         [  0,   0,   0,  ...,  29,  44,  70]], dtype=torch.int32),
 tensor([ 9496.,  9084.,  8804.,  8127.,  9927., 10976.,  5826.,  8769.,  8969.,
         10295.,  8669.,  9940.,  9821., 10312., 10371.,  4693.,  9238., 10495.,
          8307.,  9324.,  9183., 11125.,  6887.,  9086.,  5320.,  1748.,  7726.,
          8561.,  9315., 10555.,  9511., 10162.,  8461.,  8207.,  4198.,  9742.,
         10750., 10174., 10635., 10004.,  

## Modeling

In [30]:
class GRUModel(torch.nn.Module) :
    def __init__(self, vocab_size, embedding_dim, hidden_dim, q_max, story_max):
        super(GRUModel, self).__init__()
        self.hidden_dim = hidden_dim
        self.embeddings = nn.Embedding(vocab_size, embedding_dim, padding_idx=0)
        self.dropout = nn.Dropout(0.5)
        self.gru = nn.GRU(embedding_dim, hidden_dim, batch_first=True)
        self.linear2 = nn.Linear(200, story_max)
        self.linear_start = nn.Linear(story_max, story_max+1)
        self.linear_end = nn.Linear(story_max, story_max+1)
        
    def forward(self, x1, x2):
        
        x1 = self.embeddings(x1)
        x1 = self.dropout(x1)
        out_pack1, ht1= self.gru(x1)
        
        x2 = self.embeddings(x2)
        x2 = self.dropout(x2)
        out_pack2, ht2= self.gru(x2)
        
        
        x = torch.cat((ht1[-1], ht2[-1]),1)
        
        x = F.relu(self.linear2(x))
        start = self.linear_start(x)
        end = self.linear_end(x)
 
        return start, end

I've taken the functions from her last name classification model because that uses character-level embeddings as well. I'm using a naive version that just concats the question with the story and uses that as input into the RNN. I used that with regressing 2 outputs (start, end) and I got it to train but not sure how well it's actually doing. I think we correct way to do this is to treat it as a classification problem where each index of the story is a categroy and we use those predictions

In [31]:
def get_optimizer(model, lr = 0.1, wd = 0.0001):
    parameters = filter(lambda p: p.requires_grad, model.parameters())
    optim = torch.optim.Adam(parameters, lr=lr, weight_decay=wd)
    return optim

def vectorize(labels, size=7361):
    result = []
    for s in labels:
        #print(s)
        row = np.zeros(size)
        row[int(s)] = 1
        result.append(row)
        
    return result

def custom_metric(start_real, end_real, start_pred, end_pred):
    # len(intersection) / len(union)
    vals = []
    for s1, e1, s2, e2 in zip(start_real, end_real, start_pred, end_pred):
        real_range = set(range(s1, e1+1, 1))
        pred_range = set(range(s2, e2+1, 1))
        intersection = real_range.intersection(pred_range)
        union = real_range.union(pred_range)
        vals.append(len(intersection)/len(union))
        
    return np.mean(vals)
        

custom_metric([1,2,3],[2,3,4],[1,2,3],[3,4,5])

0.6666666666666666

In [34]:
def train_epocs(model, epochs=10, lr=0.0001):
    parameters = filter(lambda p: p.requires_grad, model.parameters())
    optimizer = torch.optim.Adam(parameters, lr=lr)
    for i in range(epochs):
        model.train()
        sum_loss = 0.0
        total = 0
        for q, s, start_, end_ in train_dl:
            #start_vec = vectorize(start_)
            #end_vec = vectorize(end_)
            q = q.long().cuda()
            s = s.long().cuda()
            start_ = start_.long().cuda()
            end_ = end_.long().cuda()

            start, end = model(q, s)
            loss_start = F.cross_entropy(start, start_)

            loss_end = F.cross_entropy(end, end_)
            loss = loss_start + loss_end
        
            loss.backward()
            optimizer.step()
            sum_loss += loss.item()*end_.shape[0]
            total += end_.shape[0]
        
        torch.cuda.empty_cache()
        val_loss, val_score = val_metrics(model, test_dl)
        #if i % 5 == 1:
        print("train loss %.3f val loss %.3f and val score %.3f" % (sum_loss/total, val_loss, val_score))
            
def val_metrics(model, valid_dl):
    model.eval()
    correct = 0
    total = 0
    sum_loss = 0.0
    scores = []
    for q, s, start_, end_ in valid_dl:
        q = q.long().cuda()
        s = s.long().cuda()
        start_ = start_.long().cuda()
        end_ = end_.long().cuda()
        
        start, end = model(q, s)
        #print(start, end)
        #print(start.shape, end.shape)
        
        loss_start = F.cross_entropy(start, start_)
        #print(loss_start)
        
        loss_end = F.cross_entropy(end, end_)
        #print(loss_end)
        
        loss = loss_start + loss_end
        #print(loss)
        #
        start_i = start.argmax(1)
        end_i = end.argmax(1)
        #for st, en in zip(start_i, end_i):
        
        score = custom_metric(start_, end_, start_i, end_i)
        scores.append(score)
        
        total += end_.shape[0]
        sum_loss += loss.item()*end_.shape[0]
    
    print(f'Predicted: {start_i}, Real: {start_}')
    #print(f'Real: {start_}, {end_}')
    return sum_loss/total, np.mean(scores)

In [33]:
vocab_size = len(char2int)
hidden_size = 100
n_classes = 2
emb_size = 10
model = GRUModel(vocab_size, emb_size, hidden_size, q_max, story_max).cuda()

# val_metrics(model, test_dl)

In [35]:
train_epocs(model)

Predicted: tensor([ 9424, 10136,  9424,  9424,  9424, 10136,  9424,  9424, 10136,  9424,
         9424,  9424,  9424,  9424,  9424, 10136,  9424,  9424,  9424,  9704,
        10136,  9424,  9424,  9424,  9424,  9424,  9424,  9424, 10136, 10136,
         9424,  9424,  9424,  9424,  9424,  9424,  9424,  9424, 10136,  9424,
         9424,  9424,  9424,  9424,  9704,  9424], device='cuda:0'), Real: tensor([ 6696,  6404,  9871,  8991,  9806, 11216, 10037,  9071,  9820,  3792,
         9559, 10224,  6549,  8876, 10574,  5702,  6889,  6615, 10178, 10788,
         6055,  3723, 10318, 10645,  6921,  9784,  8100,  6798, 11177, 10502,
         3379, 10579,  8444,  8400,  7827,  7059,  9841,  7972,  9225, 10330,
         8608,  9427,  8824,  4944, 10474, 10859], device='cuda:0')
train loss 18.260 val loss 18.056 and val score 0.007
Predicted: tensor([ 7085,  9638,  9638,  8863,  9380,  7085,  7085,  9638,  7085,  9638,
         7085,  8457,  8863,  9638,  9638,  9520, 10260,  9638, 10260,  9709,
 

In [None]:
torch.cuda.empty_cache()