In [6]:
import os

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from collections import Counter
import numpy as np
import random
from torch.nn.utils import clip_grad_norm_
from torch.utils.data import DataLoader

In [7]:
import random

SEED = 1234

random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [8]:
train_path = "data/senti.train.tsv"
dev_path = "data/senti.dev.tsv"
test_path = "data/senti.test.tsv"

In [9]:
from nltk.stem import WordNetLemmatizer
#from nltk.corpus import wordnet
def getCleanData(x):

    # Reduce each word into common base
    lem = WordNetLemmatizer()
    x = [lem.lemmatize(word) for word in x] 
    x = [lem.lemmatize(word,'v') for word in x]
    x = [lem.lemmatize(word,'r') for word in x]
    return x

In [10]:
def read_corpus(path):
    with open(path, 'r', encoding='utf-8') as f:
        sents = []
        labels = []
        for line in f.readlines():
            sent = line.split('\t')[0].lower()
            sent = getCleanData(sent.split(' '))
            label = line.split('\t')[1].strip('\n')
            sents.append(sent)
            labels.append(label)
    return sents, labels

def build_vocab(sents):
    dic = {}
    word_counter = Counter()
    dic['PAD'] = 0
    dic['UNK'] = 1
    for sent in sents:
        for word in sent:
            word_counter[word] += 1
    itos = [w for w, c in word_counter.items()]
    for w in itos:
        dic[w] = len(dic)
    return dic

def vectorize(sents):
    vecs = [[wtoi.get(word, wtoi.get("UNK")) for word in sent] for sent in sents]
    return vecs

In [11]:
train_data,train_label = read_corpus(train_path)
dev_data,dev_label = read_corpus(dev_path)
test_data,test_label = read_corpus(test_path)

In [12]:
wtoi= build_vocab(train_data)
itow = dict((v,k) for k, v in wtoi.items())

In [11]:
len(wtoi)

12179

In [17]:
train_set = vectorize(train_data)
dev_set = vectorize(dev_data)
test_set = vectorize(test_data)

In [18]:
print('train set size: {} \ndev set size: {} \ntest set size: {}'.format(len(train_data),len(dev_data),len(test_data)))
print('vocab size: ', len(wtoi))

train set size: 67349 
dev set size: 872 
test set size: 1821
vocab size:  12179


In [19]:
class LoadData(torch.utils.data.Dataset):
    def __init__(self, data, labels):
       # super(LoadData.self).__init__()
        self.data = data
        self.labels = labels

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        X = self.data[idx]
        Y = int(self.labels[idx])
        return X, Y

In [20]:
def collate_fn(batch):
    
    batch = list(zip(*batch))
    
   # lengths = torch.LongTensor([len(t) for t in batch[0]]).to(device)
    inputs = [torch.LongTensor(t).to(device) for t in batch[0]]
    inputs = torch.nn.utils.rnn.pad_sequence(inputs, batch_first=True) 
    labels = torch.LongTensor(batch[1]).to(device)
    mask = (inputs != 0).to(device)
    
    return inputs, labels, mask

In [21]:
trains = LoadData(train_set, train_label)
devs = LoadData(dev_set, dev_label)
tests = LoadData(test_set, test_label)

In [18]:
trains[9]

([75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 64], 1)

In [23]:
train_loader = torch.utils.data.DataLoader(
                    dataset=trains,
                    batch_size=batch_size,
                    shuffle=True,
                    collate_fn=collate_fn)
dev_loader = torch.utils.data.DataLoader(
                    dataset=devs,
                    batch_size=batch_size,
                    shuffle=True,
                    collate_fn=collate_fn)
test_loader = torch.utils.data.DataLoader(
                    dataset=tests,
                    batch_size=batch_size,
                    shuffle=False,
                    collate_fn=collate_fn)

In [150]:
next(iter(train_loader))

(tensor([[5562,    0,    0,  ...,    0,    0,    0],
         [1791,  470,   69,  ...,    0,    0,    0],
         [   6,  584,   20,  ...,    0,    0,    0],
         ...,
         [6292,   52,    6,  ...,    0,    0,    0],
         [  75,   55, 2819,  ...,    0,    0,    0],
         [  79,   55, 2282,  ...,    0,    0,    0]]),
 tensor([1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1,
         1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1,
         0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0]),
 tensor([[ True, False, False,  ..., False, False, False],
         [ True,  True,  True,  ..., False, False, False],
         [ True,  True,  True,  ..., False, False, False],
         ...,
         [ True,  True,  True,  ..., False, False, False],
         [ True,  True,  True,  ..., False, False, False],
         [ True,  True,  True,  ..., False, False, False]]))

In [24]:
x,y,mask = next(iter(train_loader))
print('input shape: {},\nlabel shpae{},\nmask shape:{}'.format(x.shape, y.shape, mask.shape))

input shape: torch.Size([64, 36]),
label shpaetorch.Size([64]),
mask shape:torch.Size([64, 36])


In [22]:
vocab_size = len(wtoi)
batch_size = 64
emb_size = 200
pad_idx = wtoi['PAD']
output_size = 1
print(vocab_size,pad_idx)

12179 0


### Self Attention Model

In [55]:
class SelfAttModel(nn.Module):
    def __init__(self, vocab_size, emb_size, output_size, pad_idx, dropout=0.2):
        super(SelfAttModel, self).__init__()
        self.embed = nn.Embedding(vocab_size, emb_size, padding_idx=pad_idx)
        self.linear = nn.Linear(emb_size, output_size)
        self.dropout = nn.Dropout(dropout)
        
        self.embed.weight.data.uniform_(-0.1, 0.1) 
        self.linear.weight.data.uniform_(-0.1, 0.1) 
    
    def forward(self, inputs, mask): #(bsz,seq_len)
        # (batch_size, seq_len, emb)
        t_emb = self.dropout(self.embed(inputs))
        s_emb = self.dropout(self.embed(inputs))
        mask = mask.to(float) 
        h_att = self.attention(t_emb, s_emb, mask)    
        out = self.linear(self.dropout(h_att)).squeeze(-1) #(batch_size)
        
        return out#(batch_zize), 
    
    def attention(self, emb_t, emb_s, mask=None):        
        alpha_ts = torch.bmm(emb_s, emb_t.transpose(1,2))  #(batch_size, seq_len, emb)*(batch_size, emb, seq_len)->(bsz, s_l, s_l)
        mask = mask.unsqueeze(-1)
        if mask is not None:
            alpha_ts.masked_fill_(mask == 0, -float('inf'))
            
        alpha_t = F.softmax(alpha_ts, dim=1) #(batch_size, seq_len, seq_len）
            
        h_s= torch.bmm(
                   alpha_t,               # (batch_size, seq_len, seq_len)* (batch_size, seq_len, emb)
                   emb_t).sum(1)               # ->(batch_size, seq_len, emb)->(batch_size, emb)
        return h_s

In [56]:
model = SelfAttModel(vocab_size, emb_size, output_size, pad_idx,dropout=0.2)
model=model.to(device)

In [57]:
output = model(x,mask)
print(output, output.shape)

tensor([ 0.0871,  0.8914,  0.8402,  0.3348,  1.6772,  0.7354,  0.7958,  0.4505,
        -0.3472, -0.2269, -0.7542,  1.2144,  1.0502,  0.7954,  1.2808,  0.9274,
        -0.8962,  0.6087, -0.5044,  0.8192,  0.9652,  0.1594,  2.1019,  0.5001,
         0.8966,  0.0722,  1.1370,  0.1226,  1.4524,  0.3237,  0.9240,  0.3178,
         0.5811,  0.1556,  2.1860,  2.6571,  1.1220,  0.4282,  1.3611,  0.1531,
         0.4693,  0.7239,  1.1937,  0.8048,  0.4401,  0.6421,  0.2642, -0.0731,
        -0.3627, -1.1998,  0.1753,  0.4559,  1.1988,  0.2474,  0.9815,  0.5812,
         0.4295,  0.6089,  1.6582,  1.0623,  0.3997,  0.9274,  0.6610, -0.1955],
       device='cuda:0', grad_fn=<SqueezeBackward1>) torch.Size([64])


In [58]:
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.BCEWithLogitsLoss()

In [59]:
def binary_acc(y_cap, y):
    preds = torch.round(torch.sigmoid(y_cap))
    correct = (preds == y).float()
    acc = correct.sum() / len(correct)
    return acc

In [60]:
def train(model, data, optimizer, criterion):
    epoch_loss, epoch_acc = 0., 0.
    model.train()

    for _, (inputs, labels, mask) in enumerate(data):
        outputs = model(inputs, mask)  # (batch_size)
        loss = criterion(outputs, labels.float()) 
        acc = binary_acc(outputs, labels)
        
        # sgd
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
#         print("batch loss: {}".format(loss.item()))
        
        #epoch_loss += loss.item() * len(labels)
        #epoch_acc += acc.item() * len(labels)
        #total_len += len(labels)
        epoch_loss += loss.item() 
        epoch_acc += acc.item() 
        epoch_len = len(data)
        
    return epoch_loss / epoch_len, epoch_acc / epoch_len

In [61]:
def evaluate(model, data, criterion):
    epoch_loss, epoch_acc = 0., 0.
    model.eval()
    total_len = 0.
    for  _, (inputs, labels, mask) in enumerate(data):
        outputs = model(inputs, mask) 
    
        with torch.no_grad():
            preds = model(inputs, mask)
        loss = criterion(outputs, labels.float()) 
        acc = binary_acc(outputs, labels)
        
       # epoch_loss += loss.item() * len(labels)
        #epoch_acc += acc.item() * len(labels)
        #total_len += len(labels)
        
        epoch_loss += loss.item() 
        epoch_acc += acc.item() 
        epoch_len = len(data)
        
    model.train()
   
    return epoch_loss / epoch_len, epoch_acc / epoch_len

In [62]:
N_EPOCHS = 6
best_valid_acc = 0.
for epoch in range(N_EPOCHS):
    train_loss, train_acc = train(model, train_loader, optimizer, criterion)
    valid_loss, valid_acc = evaluate(model, dev_loader, criterion)
    
    if valid_acc > best_valid_acc:
        best_valid_acc = valid_acc
        torch.save(model.state_dict(), "selfatt-model.pth")
        
    print("Epoch", epoch, "Train Loss", train_loss, "Train Acc", train_acc)
    print("Epoch", epoch, "Valid Loss", valid_loss, "Valid Acc", valid_acc)

Epoch 0 Train Loss 0.43349806607788444 Train Acc 0.792583830789057
Epoch 0 Valid Loss 0.47747619450092316 Valid Acc 0.8162946445601327
Epoch 1 Train Loss 0.2613647497787095 Train Acc 0.8958029496250896
Epoch 1 Valid Loss 0.5788553889308657 Valid Acc 0.8234374991485051
Epoch 2 Train Loss 0.22281245582322223 Train Acc 0.9143942477356675
Epoch 2 Valid Loss 0.6551072171756199 Valid Acc 0.8216517865657806
Epoch 3 Train Loss 0.20196614357848905 Train Acc 0.9224671290697422
Epoch 3 Valid Loss 0.7302461309092385 Valid Acc 0.8029017874172756
Epoch 4 Train Loss 0.18909954800423506 Train Acc 0.9278393931198664
Epoch 4 Valid Loss 0.7500532673937934 Valid Acc 0.8209821454116276
Epoch 5 Train Loss 0.18210907613704685 Train Acc 0.9306594254510921
Epoch 5 Valid Loss 0.8082605685506549 Valid Acc 0.7991071428571429


### Test

In [63]:
model.load_state_dict(torch.load('selfatt-model.pth'))
test_loss, test_acc = evaluate(model,test_loader, criterion)
print(test_loss, test_acc)

0.5428818048074328 0.8139863260861101


In [58]:
def print_mistake(model, data):
    model.eval()
    total_len = 0.
    for  _, (inputs, labels, mask) in enumerate(data):
        outputs = model(inputs, mask) 
    
        with torch.no_grad():
            preds = model(inputs, mask)
            preds = torch.round(torch.sigmoid(preds))
        
    wrong = (preds != labels)
    mistakes = inputs[wrong]
    correct = labels[wrong]
    for err, l in zip(mistakes,correct):
        sent = [itow[w.item()] for w in err]
        print(sent, l.item())

In [59]:
print_mistake(model, test_loader)

['windtalker', 'blow', 'this', 'way', 'and', 'that', ',', 'but', 'there', "'s", 'no', 'mistake', 'the', 'filmmaker', 'in', 'the', 'tall', 'UNK', ',', 'true', 'to', 'himself', '.', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD'] 1
['the', 'UNK', 'bomb', 'of', 'reggio', "'s", 'image', 'and', 'glass', "'", 'evocative', 'music', '...', 'ultimately', 'leaf', 'viewer', 'with', 'the', 'task', 'of', 'divine', 'mean', '.', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD'] 0
['i', 'keep', 'think', 'over', 'and', 'over', 'again', ',', "'", 'i', 'should', 'be', 'enjoy', 'this', '.', "'", 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD'] 0
['the', 'pivotal', 'narrative', 'point', 'be', 'so', 'ripe', 'the', 'film', 'ca', "n't", 'help', 'but', 'go', 'soft', 'and', 'UNK', '.', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD'

### Residual connection Attention Model

In [64]:
class ResAttenModel(nn.Module):
    def __init__(self, vocab_size, emb_size, output_size, pad_idx, dropout=0.5):
        super(ResAttenModel, self).__init__()
        self.embed = nn.Embedding(vocab_size, emb_size, padding_idx=pad_idx)
        self.linear = nn.Linear(emb_size, output_size)
        self.dropout = nn.Dropout(dropout)
        
        self.embed.weight.data.uniform_(-0.1, 0.1) 
        self.linear.weight.data.uniform_(-0.1, 0.1) 
    
    def forward(self, inputs, mask): #(bsz*seq_len)
        # (batch_size, seq_len, emb)
        # (batch_size, seq_len, emb)
        t_emb = self.dropout(self.embed(inputs))
        s_emb = self.dropout(self.embed(inputs))
        mask = mask.float()
        mask = mask.unsqueeze(-1) 
        h_self = self.attention(t_emb, s_emb, mask)    
        h_avg = self.avg(t_emb, mask)
        h_att = h_self + h_avg
        out = self.linear(self.dropout(h_att)).squeeze(-1) #(batch_size)
        return out

    def avg(self, x_emb, mask): #(bsz*seq_len)
        embedded = x_emb * mask # (batch_size, seq_len, embed_size)   
        # do avg
        sent_emb = embedded.sum(1) / (mask.sum(1) + 1e-9)  #(batch_size, embed_size)
        return sent_emb

    def attention(self, emb_t, emb_s, mask=None):        
        alpha_ts = torch.bmm(emb_s, emb_t.transpose(1,2))  #(batch_size, seq_len, emb)*(batch_size, emb, seq_len)->(bsz, s_l, s_l)
        if mask is not None:
            alpha_ts.masked_fill_(mask == 0, -float('inf'))
            
        alpha_t = F.softmax(alpha_ts, dim=1) #(batch_size, seq_len, seq_len）           
        h_s= torch.bmm(
                   alpha_t,               # (batch_size, seq_len, seq_len)* (batch_size, seq_len, emb)
                   emb_t).sum(1)               # ->(batch_size, seq_len, emb)->(batch_size, emb)
        return h_s

In [65]:
model = ResAttenModel(vocab_size, emb_size, output_size, pad_idx,dropout=0.2)
model=model.to(device)

In [70]:
output = model(x,mask)
print(output, output.shape)

tensor([-1.5124, -1.5075, -1.6653, -1.7425, -1.4250, -1.2171, -0.9983, -0.7353,
        -2.4146, -0.5225,  0.0321, -1.1645, -1.2169, -1.7647, -1.2304, -0.1848,
        -2.3924, -2.1925, -1.0917, -1.8886, -1.1039, -0.8171, -3.1461, -0.8945,
        -2.0495, -3.1795,  0.0865, -1.5898, -1.3833, -1.1655, -2.1365, -1.3975,
        -1.2058, -0.8670, -2.5356, -3.6437, -0.9945, -1.0295, -0.9632, -1.7956,
        -1.8482, -1.6941, -1.1927, -0.7095, -1.1730, -0.8387, -0.8639, -1.6272,
         0.2305, -3.1605, -0.5030, -1.7661, -1.2841, -1.3544, -1.1169, -0.8245,
        -2.0047, -1.6963, -2.1098, -0.2487, -0.8453, -0.8668, -0.7798, -0.2529],
       grad_fn=<SqueezeBackward1>) torch.Size([64])


In [66]:
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.BCEWithLogitsLoss()
N_EPOCHS = 6
best_valid_acc = 0.
for epoch in range(N_EPOCHS):
    train_loss, train_acc = train(model, train_loader, optimizer, criterion)
    valid_loss, valid_acc = evaluate(model, dev_loader, criterion)
    
    if valid_acc > best_valid_acc:
        best_valid_acc = valid_acc
        torch.save(model.state_dict(), "resatt-model.pth")
        
    print("Epoch", epoch, "Train Loss", train_loss, "Train Acc", train_acc)
    print("Epoch", epoch, "Valid Loss", valid_loss, "Valid Acc", valid_acc)

Epoch 0 Train Loss 0.416235354367258 Train Acc 0.8024973997250129
Epoch 0 Valid Loss 0.46533111163548063 Valid Acc 0.8176339311259133
Epoch 1 Train Loss 0.24871548343050062 Train Acc 0.9018132716049383
Epoch 1 Valid Loss 0.5385044547063964 Valid Acc 0.8154017882687705
Epoch 2 Train Loss 0.21176491007704115 Train Acc 0.9161324786324786
Epoch 2 Valid Loss 0.6278615423611232 Valid Acc 0.8107142874172756
Epoch 3 Train Loss 0.19421044020698622 Train Acc 0.9249169040835707
Epoch 3 Valid Loss 0.6692943679434913 Valid Acc 0.8093750008514949
Epoch 4 Train Loss 0.18202288390437082 Train Acc 0.9296207264957265
Epoch 4 Valid Loss 0.6923431605100632 Valid Acc 0.8033482134342194
Epoch 5 Train Loss 0.17386746837103922 Train Acc 0.933508428300095
Epoch 5 Valid Loss 0.7529235567365374 Valid Acc 0.7910714277199337


### Test

In [67]:
model.load_state_dict(torch.load('resatt-model.pth'))
test_loss, test_acc = evaluate(model,test_loader, criterion)
print(test_loss, test_acc)

0.4404615770126211 0.8230343337716728


### Attention model with positional encoding

positional encoding 代码参考了http://nlp.seas.harvard.edu/2018/04/03/attention.html 

In [68]:
class NewAttModel(nn.Module):
    def __init__(self, vocab_size, emb_size, output_size, pad_idx, dropout=0.5):
        super(NewAttModel, self).__init__()
        self.embed = nn.Embedding(vocab_size, emb_size, padding_idx=pad_idx)
        self.position = PositionalEncoding(emb_size)
        self.linear = nn.Linear(emb_size, output_size)
        self.dropout = nn.Dropout(dropout)
        
        self.embed.weight.data.uniform_(-0.1, 0.1) 
        self.linear.weight.data.uniform_(-0.1, 0.1) 
    
    def forward(self, inputs, mask): #(bsz,seq_len)
        # (batch_size, seq_len, emb)
        embeded = self.embed(inputs)
        embeded = self.position(embeded)
        t_emb = self.dropout(embeded)
        s_emb = self.dropout(embeded)
        
        mask = mask.to(float) 
        h_att = self.attention(t_emb, s_emb, mask)    
        out = self.linear(self.dropout(h_att)).squeeze(-1) #(batch_size)
        
        return out#(batch_zize), 
    
    def attention(self, emb_t, emb_s, mask=None):        
        alpha_ts = torch.bmm(emb_s, emb_t.transpose(1,2))  #(batch_size, seq_len, emb)*(batch_size, emb, seq_len)->(bsz, s_l, s_l)
        mask = mask.unsqueeze(-1)
        if mask is not None:
            alpha_ts.masked_fill_(mask == 0, -float('inf'))
            
        alpha_t = F.softmax(alpha_ts, dim=1) #(batch_size, seq_len, seq_len）
            
        h_s= torch.bmm(
                   alpha_t,               # (batch_size, seq_len, seq_len)* (batch_size, seq_len, emb)
                   emb_t).sum(1)               # ->(batch_size, seq_len, emb)->(batch_size, emb)
        return h_s

In [69]:
import math
from torch.autograd import Variable

class PositionalEncoding(nn.Module):
    '''
        Implement the PE function.
        
    '''
    def __init__(self, emb_size, max_len=128):
        super(PositionalEncoding, self).__init__()
        
        # Compute the positional encodings once in log space.
        pe = torch.zeros(max_len, emb_size)
        
        position = torch.arange(0., max_len).unsqueeze(1)
        div_term = torch.exp(torch.arange(0., emb_size, 2) *
                             -(math.log(10000.0) / emb_size))
        
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0)
        self.register_buffer('pe', pe)
        
    def forward(self, x):
        x = x + Variable(self.pe[:, :x.size(1)], 
                         requires_grad=False)
        return x

In [72]:
model = NewAttModel(vocab_size, emb_size, output_size, pad_idx,dropout=0.5)
model = model.to(device)
optimizer = optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.BCEWithLogitsLoss()

In [73]:
N_EPOCHS = 6
best_valid_acc = 0.
for epoch in range(N_EPOCHS):
    train_loss, train_acc = train(model, train_loader, optimizer, criterion)
    valid_loss, valid_acc = evaluate(model, dev_loader, criterion)
    
    if valid_acc > best_valid_acc:
        best_valid_acc = valid_acc
        torch.save(model.state_dict(), "posn_atten.pth")
        
    print("Epoch", epoch, "Train Loss", train_loss, "Train Acc", train_acc)
    print("Epoch", epoch, "Valid Loss", valid_loss, "Valid Acc", valid_acc)

Epoch 0 Train Loss 1.4109000813813857 Train Acc 0.6043426095590293
Epoch 0 Valid Loss 0.463143636073385 Valid Acc 0.7839285731315613
Epoch 1 Train Loss 0.49624180626778636 Train Acc 0.7559622733341662
Epoch 1 Valid Loss 0.5220595704657691 Valid Acc 0.7508928562913623
Epoch 2 Train Loss 0.4354363615498131 Train Acc 0.7983560292695888
Epoch 2 Valid Loss 0.44228973771844593 Valid Acc 0.8169642857142857
Epoch 3 Train Loss 0.4015747473733491 Train Acc 0.818862207764574
Epoch 3 Valid Loss 0.46378378782953533 Valid Acc 0.830580357994352
Epoch 4 Train Loss 0.3760368804901074 Train Acc 0.8341770112684309
Epoch 4 Valid Loss 0.4796773408140455 Valid Acc 0.8263392874172756
Epoch 5 Train Loss 0.365120323465081 Train Acc 0.8421608612634744
Epoch 5 Valid Loss 0.5081853951726641 Valid Acc 0.8133928562913623


### Test

In [74]:
model.load_state_dict(torch.load('posn_atten.pth'))
test_loss, test_acc = evaluate(model,test_loader, criterion)
print(test_loss, test_acc)

0.4308139666401107 0.8376932226378342


### Bert test result

In [49]:
bert_res = []
with open('test_results_senti.txt') as f:
    for line in f.readlines()[1:]:
        pred = line.strip().split('\t')[1]
        bert_res.append(pred)

In [54]:
bert_acc = sum(np.array(bert_res)==np.array(test_label))/len(bert_res)
print('Bert Accuracy: ', bert_acc)

Bert Accuracy:  0.9357495881383855
