In [1]:
# for auto-reloading external modules
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2

In [3]:
import sys
import math
import torch
import time

import pandas as pd
import numpy as np
import torch.nn as nn

from tqdm import tqdm
from torch import optim
from pathlib import Path
from docopt import docopt

from model import SentModel
from utils import prepare_df
from language_structure import load_model, Lang

base = Path('../aclImdb')
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [5]:
# Data
df = pd.read_csv('train.csv')
lang = load_model()

# Model
hidden_size = 20
embed_size = 300
model = SentModel(embed_size, hidden_size, lang, device)
model = model.to(device)

lr = 1e-3
clip_grad = 5.
optimizer = torch.optim.Adam(model.parameters())
loss_fcn = nn.BCELoss()

# Structure Test Data 

In [6]:
from init import extract_helper

In [7]:
neg_exs, neg_labels, neg_ratings = extract_helper('test/neg/', 0)
pos_exs, pos_labels, pos_ratings = extract_helper('test/pos/', 1)

In [54]:
test_df = pd.DataFrame(data={'path': neg_exs + pos_exs,
                        'target': neg_labels + pos_labels,
                        'review_rating': neg_ratings + pos_ratings})
test_df = test_df.sample(frac=1.)
test_df.to_csv('test.csv', index=False)

# Predictions + Accuracy 

In [14]:
from train import batch_iter

In [55]:
test_df = pd.read_csv('test.csv')

In [56]:
threshold = torch.tensor([0.5])
n_examples = 0
n_correct = 0
for sents, targets in batch_iter(lang, test_df[:2000], 10, shuffle=True):
    preds = model(sents)
    preds = (preds >= threshold).float()
    n_correct += (1 - torch.abs(preds - targets)).sum()
    n_examples += len(targets)

(tensor(8.),
 10,
 tensor([0., 0., 0., 1., 0., 0., 0., 0., 0., 0.]),
 tensor([1., 0., 0., 1., 0., 0., 0., 1., 0., 0.]))

In [17]:
model(ex)

tensor([1.4297e-11, 9.4753e-04], grad_fn=<SqueezeBackward1>)

In [34]:
test = torch.tensor([0.5, 0.2, 0.2])
test

tensor([0.5000, 0.2000, 0.2000])

In [35]:
t = torch.tensor([0.5])
t

tensor([0.5000])

In [36]:
preds_rounded = (test >= t).float()
preds_rounded

tensor([1., 0., 0.])

In [37]:
true = torch.tensor([1, 0, 1]).float()
true

tensor([1., 0., 1.])

In [39]:
(1 - torch.abs(preds_rounded - true)).sum() / len(true)

tensor(0.6667)

In [40]:
len(true)

3

In [None]:
def validate():
    threshold = torch.tensor([0.5])
    n_examples = 0
    n_correct = 0
    for sents, targets in batch_iter(lang, test_df[:2000], 10, shuffle=True):
        preds = model(sents)
        preds = (preds >= threshold).float()
        n_correct += (1 - torch.abs(preds - targets)).sum()
        n_examples += len(targets)
    
    return n_correct / n_examples