In [1]:
#pip install torchtext==0.4.0

In [1]:
import torch
from torchtext import data

SEED = 1234

torch.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

#TEXT used to store text, and LABEL used to store labels
TEXT = data.Field(tokenize = 'spacy',   #Tokenizer
                  tokenizer_language = 'en_core_web_sm',
                  include_lengths = True,   #includes length of text in the field
                  pad_first=True)   #Pad the start and end of text when it is shorter than max length
LABEL = data.LabelField(dtype = torch.float)

In [2]:
from torchtext import datasets

train_data, test_data = datasets.IMDB.splits(TEXT, LABEL)   #Process the dataset in the way defined in TEXT and LABEL

In [4]:
print(len(test_data))
print(len(train_data))

25000
17500


In [5]:
max_len = 0
for sample in train_data:
  text_len = len(sample.text) # Access the second element (length)
  
  max_len = max(max_len, text_len)

print("Maximum sentence length:", max_len)


Maximum sentence length: 2789


In [6]:
import random

train_data, valid_data = train_data.split(random_state = random.seed(SEED))

In [7]:
MAX_VOCAB_SIZE = 25_000

TEXT.build_vocab(train_data, max_size = MAX_VOCAB_SIZE)
LABEL.build_vocab(train_data)

In [8]:
BATCH_SIZE = 64

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

print(device)
train_iterator, valid_iterator, test_iterator = data.BucketIterator.splits(
    (train_data, valid_data, test_data),
    batch_size = BATCH_SIZE,
    sort_within_batch = True,
    device = device)

cuda


In [9]:
for i, k in enumerate(train_iterator):
    if i == 0:
        print(k)
    else:
        break


[torchtext.data.batch.Batch of size 64]
	[.text]:('[torch.cuda.LongTensor of size 169x64 (GPU 0)]', '[torch.cuda.LongTensor of size 64 (GPU 0)]')
	[.label]:[torch.cuda.FloatTensor of size 64 (GPU 0)]


In [9]:
print(TEXT.vocab.itos[:10])

['<unk>', '<pad>', 'the', ',', '.', 'and', 'a', 'of', 'to', 'is']


## Initial Architecture

In [7]:
#No Word2Vec
import torch.nn as nn

class RNN(nn.Module):
    def __init__(self, input_dim, embedding_dim, hidden_dim, output_dim):

        super().__init__()

        self.embedding = nn.Embedding(input_dim, embedding_dim)

        self.rnn = nn.RNN(embedding_dim, hidden_dim)

        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, text, text_lengths):
        #text = [sent len, batch size]

        embedded = self.embedding(text)
        #embedded = [sent len, batch size, emb dim]

        output, hidden = self.rnn(embedded)
        #output = [sent len, batch size, hid dim]
        #hidden = [1, batch size, hid dim]


        #assert torch.equal(output[-1,:,:], hidden.squeeze(0))

        return self.fc(hidden.squeeze(0))

In [19]:
#For non-word2Vec
INPUT_DIM = len(TEXT.vocab)
EMBEDDING_DIM = 100
HIDDEN_DIM = 256
OUTPUT_DIM = 1

model = RNN(INPUT_DIM, EMBEDDING_DIM, HIDDEN_DIM, OUTPUT_DIM)

## Word2vec

In [10]:
import gensim.models.keyedvectors as word2vec

wv_model = word2vec.KeyedVectors.load_word2vec_format(
    "GoogleNews-vectors-negative300.bin.gz", binary=True)


In [7]:
vocab_size = len(wv_model)
vector_dimension = wv_model.vector_size

print(f"Vocabulary size: {vocab_size}")
print(f"Vector dimension: {vector_dimension}")


Vocabulary size: 3000000
Vector dimension: 300


In [76]:
miss = []
for i in range(-10, 1):
    text = TEXT.vocab.itos[i]

    if text not in wv_model.key_to_index:
        miss.append(text)
print(miss)


['antics.<br', 'any.<br', 'approach.<br', '<unk>']


In [47]:
#Match TEXT and W2V Embedding
#Non-matching word to unk

filtered_embedding = []
missed_word = 0
for i in range(len(TEXT.vocab)):
  text = TEXT.vocab.itos[i]

  if text in wv_model.key_to_index:
    text_embedding = wv_model[text]
    filtered_embedding.append(text_embedding)
  else:
    missed_word += 1
    text_embedding = wv_model['unk']
    filtered_embedding.append(text_embedding)
    continue

print(f"missed words = {missed_word}; found words = {len(filtered_embedding)}")


#Convert embedding to tensor
from torch.nn import Embedding
import numpy as np

embedding_weights = torch.FloatTensor(np.array(filtered_embedding))
#embedding = Embedding.from_pretrained(embedding_weights).to(device)
embedding = Embedding.from_pretrained(embedding_weights, freeze = False).to(device)
embedding

missed words = 1846; found words = 25002


Embedding(25002, 300)

In [63]:
#Match TEXT and W2V Embedding
#Non-matching word to mean and std w2v embedding

import numpy as np
from torch.nn import Embedding

embeddings = np.array([wv_model[wv_model.index_to_key[i]] for i in range(len(wv_model))])
mean = np.mean(embeddings, axis=0)
std = np.std(embeddings, axis=0)

# Pre-process OOV embeddings
oov_embeddings = {}
for text in TEXT.vocab.itos:
    if text not in wv_model.key_to_index:
        oov_embeddings[text] = np.random.normal(loc=mean, scale=std, size=300)

filtered_embedding = np.empty((len(TEXT.vocab), 300))

for i in range(len(TEXT.vocab)):
    text = TEXT.vocab.itos[i]

    if text in wv_model.key_to_index:
        text_embedding = wv_model[text]
    else:
        text_embedding = oov_embeddings[text]

    filtered_embedding[i] = text_embedding

embedding_weights = torch.FloatTensor(filtered_embedding)
#embedding = Embedding.from_pretrained(embedding_weights).to(device) #Frozen weight
embedding = Embedding.from_pretrained(embedding_weights, freeze = False).to(device) #Unfrozen weight


In [55]:
#OOV words sampled from mean and std of vocab embedding
import numpy as np
from torch.nn import Embedding

available_embeddings = []
for i in range(len(TEXT.vocab)):
    text = TEXT.vocab.itos[i]

    if text in wv_model.key_to_index:
        available_embeddings.append(wv_model[text])

available_embeddings = np.array(available_embeddings)
mean = np.mean(available_embeddings, axis = 0)
std = np.std(available_embeddings, axis = 0 )

oov_embeddings = {}
for text in TEXT.vocab.itos:
    if text not in wv_model.key_to_index:
        oov_embeddings[text] = np.random.normal(loc=mean, scale=std, size=300)

filtered_embedding = np.empty((len(TEXT.vocab), 300))

for i in range(len(TEXT.vocab)):
    text = TEXT.vocab.itos[i]

    if text in wv_model.key_to_index:
        text_embedding = wv_model[text]
    else:
        text_embedding = oov_embeddings[text]

    filtered_embedding[i] = text_embedding

embedding_weights = torch.FloatTensor(filtered_embedding)
embedding = Embedding.from_pretrained(embedding_weights).to(device) #Frozen weight

In [64]:
#With Word2Vec
import torch.nn as nn
from torch.nn import Embedding

class RNN(nn.Module):
    def __init__(self, input_dim, embedding, hidden_dim, output_dim):

        super().__init__()

        self.embedding = embedding
        

        self.rnn = nn.RNN(300, hidden_dim)

        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, text, text_lengths):
        #text = [sent len, batch size]

        embedded = self.embedding(text)
        #embedded = [sent len, batch size, emb dim]

        output, hidden = self.rnn(embedded)
        #output = [sent len, batch size, hid dim]
        #hidden = [1, batch size, hid dim]

        #assert torch.equal(output[-1,:,:], hidden.squeeze(0))

        return self.fc(hidden.squeeze(0))

In [65]:
#For word2vec
INPUT_DIM = len(TEXT.vocab)
EMBEDDING = embedding
HIDDEN_DIM = 256
OUTPUT_DIM = 1

model = RNN(INPUT_DIM, EMBEDDING, HIDDEN_DIM, OUTPUT_DIM)

# Different models

Use Adam optimizer, 50 epochs and randomly initialized embeddings, run the
experiments with the following models:

1. One-layer feed forward neural network, hidden dimension is 500.
2. Two-layer feed forward neural network, hidden dimensions are 500 and 300.
3. Three-layer feed forward neural network, hidden dimensions are 500, 300, and
200
4. CNN model (using three feature maps with the sizes are 1, 2, and 3)
5. LSTM model
6. Bi-LSTM model

In [11]:
#One-layer FFN: hidden dim = 500    
import torch.nn as nn

class FFN1(nn.Module):
    def __init__(self, input_dim, embedding_dim, hidden_dim, output_dim):

        super(FFN1, self).__init__()

        self.embedding = nn.Embedding(input_dim, embedding_dim)

        self.output_dim = output_dim

        self.fc1 = nn.Linear(embedding_dim, hidden_dim)

        self.output = nn.Linear(hidden_dim, output_dim)

    def forward(self, text, text_lengths):
        #text = [sent len, batch size]

        embedded = self.embedding(text)
        #embedded = [sent len, batch size, emb dim]

        x = self.fc1(embedded)

        x = nn.functional.relu(x) #ReLU activation

        x = torch.mean(x, dim=0) 
        
        output = self.output(x)

        #assert torch.equal(output[-1,:,:], x.squeeze(0))

        return output.squeeze()

In [42]:
#Two-layer FFN: hidden dim = 500, 300   
import torch.nn as nn

class FFN2(nn.Module):
    def __init__(self, input_dim, embedding_dim, hidden_dim1, hidden_dim2, output_dim):

        super(FFN2, self).__init__()

        self.embedding = nn.Embedding(input_dim, embedding_dim)

        self.output_dim = output_dim

        self.fc1 = nn.Linear(embedding_dim, hidden_dim1)

        self.fc2 = nn.Linear(hidden_dim1, hidden_dim2)

        self.output = nn.Linear(hidden_dim2, output_dim)

    def forward(self, text, text_lengths):
        #text = [sent len, batch size]

        embedded = self.embedding(text)
        #embedded = [sent len, batch size, emb dim]

        x = self.fc1(embedded)
        x = nn.functional.relu(x) #ReLU activation

        x = self.fc2(x)
        x = nn.functional.relu(x)

        x = torch.mean(x, dim=0) 
        
        
        output = self.output(x)

        #assert torch.equal(output[-1,:,:], x.squeeze(0))

        return output.squeeze()

In [60]:
#Three-layer FFN: hidden dim = 500, 300, 200
import torch.nn as nn

class FFN3(nn.Module):
    def __init__(self, input_dim, embedding_dim, hidden_dim1, hidden_dim2, hidden_dim3, output_dim):

        super(FFN3, self).__init__()

        self.embedding = nn.Embedding(input_dim, embedding_dim)

        self.output_dim = output_dim

        self.fc1 = nn.Linear(embedding_dim, hidden_dim1)

        self.fc2 = nn.Linear(hidden_dim1, hidden_dim2)

        self.fc3 = nn.Linear(hidden_dim2, hidden_dim3)

        self.output = nn.Linear(hidden_dim3, output_dim)

    def forward(self, text, text_lengths):
        #text = [sent len, batch size]

        embedded = self.embedding(text)
        #embedded = [sent len, batch size, emb dim]

        x = self.fc1(embedded)
        x = nn.functional.relu(x) #ReLU activation

        x = self.fc2(x)
        x = nn.functional.relu(x)

        x = self.fc3(x)
        x = nn.functional.relu(x)

        x = torch.mean(x, dim=0) 
    
        output = self.output(x)

        #assert torch.equal(output[-1,:,:], x.squeeze(0))

        return output.squeeze()

In [18]:
#CNN using Conv2d
import torch.nn as nn

class CNN(nn.Module):
  def __init__(self, input_dim, embedding_dim, hidden_dim, output_dim):
    super(CNN, self).__init__()
    self.embedding = nn.Embedding(input_dim, embedding_dim)  
    self.conv1 = nn.Conv2d(1, hidden_dim, kernel_size=(1, embedding_dim))  
    self.conv2 = nn.Conv2d(1, hidden_dim, kernel_size=(2, embedding_dim))  
    self.conv3 = nn.Conv2d(1, hidden_dim, kernel_size=(3, embedding_dim))  
    self.relu = nn.ReLU()
    self.pool = nn.AdaptiveMaxPool2d(1)  
    self.output = nn.Linear(hidden_dim * 3, output_dim)  
  

  def forward(self, text, text_lengths):
    
    embedded = self.embedding(text)  
    embedded = embedded.permute(1,0,2)
    embedded = embedded.unsqueeze(1)  # Add a channel dimension (required for Conv2d)

    conv1 = self.relu(self.conv1(embedded))
    conv2 = self.relu(self.conv2(embedded))
    conv3 = self.relu(self.conv3(embedded))

    
    pooled1 = self.pool(conv1)
    pooled2 = self.pool(conv2)
    pooled3 = self.pool(conv3)
    

    features = torch.cat([pooled1, pooled2, pooled3], dim=1).squeeze()
    output = self.output(features)  

    return output.squeeze()

In [87]:
#CNN (Alternative)
import torch.nn as nn 

class CNN(nn.Module):
  def __init__(self, input_dim, embedding_dim, kernel_sizes:list, hidden_dim, output_dim):
    super(CNN, self).__init__()
    self.embedding = nn.Embedding(input_dim, embedding_dim)
    self.convs = nn.ModuleList([nn.Conv1d(embedding_dim, hidden_dim, kernel_size=k) for k in kernel_sizes])
    self.pool = nn.AdaptiveMaxPool1d(1)
    self.relu = nn.ReLU()  # Add ReLU layer after each convolution
    self.fc = nn.Linear(len(kernel_sizes) * hidden_dim, output_dim)

  def forward(self, text, text_lengths):
    embedded = self.embedding(text)
    embedded = embedded.permute(1, 2, 0)
    x = torch.cat([self.relu(self.pool(conv(embedded))) for conv in self.convs], dim=1)
    #x = torch.cat([(self.pool(conv(embedded))) for conv in self.convs], dim=1)
    x = x.squeeze(2)
    output = self.fc(x)
    return output


In [15]:
#LSTM
import torch.nn as nn
class LSTM(nn.Module):
    def __init__(self, input_dim, embedding_dim, hidden_dim, output_dim):
        super(LSTM, self).__init__()
        self.embedding = nn.Embedding(input_dim, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim)
        self.output = nn.Linear(hidden_dim, output_dim)


    def forward(self, text, text_lengths):
        embedded = self.embedding(text)
        _, (hidden,cell) = self.lstm(embedded) #no need for activation because of gating mechanism
        output = self.output(hidden)
        

        return output.squeeze()

In [34]:
#BILSTM
import torch.nn as nn
class BiLSTM(nn.Module):
    def __init__(self, input_dim, embedding_dim, hidden_dim, output_dim):
        super(BiLSTM, self).__init__()
        self.embedding = nn.Embedding(input_dim, embedding_dim)
        self.bilstm = nn.LSTM(embedding_dim, hidden_dim, bidirectional= True)
        self.output = nn.Linear(hidden_dim*2, output_dim)


    def forward(self, text, text_lengths):
        embedded = self.embedding(text)
        _, (hidden,cell) = self.bilstm(embedded) #no need for activation because of gating mechanism
        hidden = torch.concat((hidden[0,:,:], hidden[1,:,:]), dim = 1)       #2 hidden states, concat them together along batch size
        #print('hidden:' , hidden.shape)
        
        output = self.output(hidden)
        #print('output: ', output.shape)
        

        return output.squeeze()

In [16]:
INPUT_DIM = len(TEXT.vocab)
EMBEDDING_DIM = 100
HIDDEN_DIM = 256 
OUTPUT_DIM = 1

model = LSTM(INPUT_DIM, EMBEDDING_DIM, HIDDEN_DIM,  OUTPUT_DIM).to(device)

In [21]:
INPUT_DIM = len(TEXT.vocab)
EMBEDDING_DIM = 100
HIDDEN_DIM = 10
OUTPUT_DIM = 1

#model = FFN1(INPUT_DIM, EMBEDDING_DIM, HIDDEN_DIM, OUTPUT_DIM)
model = CNN(INPUT_DIM, EMBEDDING_DIM, HIDDEN_DIM, OUTPUT_DIM)

# Train the Model

In [20]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)
def count_allparams(model):
    return sum(p.numel() for p in model.parameters())

print(f'The model has {count_parameters(model):,} trainable parameters')
print(f'The model has {count_allparams(model):,} total parameters')
print(model)

The model has 2,592,105 trainable parameters
The model has 2,592,105 total parameters
RNN(
  (embedding): Embedding(25002, 100)
  (rnn): RNN(100, 256)
  (fc): Linear(in_features=256, out_features=1, bias=True)
)


In [21]:
import torch.optim as optim

optimizer = optim.Adam(model.parameters(), lr=1e-3)
#optimizer = optim.SGD(model.parameters(), lr=1e-3)
#optimizer = optim.Adagrad(model.parameters(), lr=1e-3)

In [22]:
criterion = nn.BCEWithLogitsLoss()

#there is another type of loss: nn.BCELoss. This assumes we manually calculate the sigmoid prior.

In [23]:
model = model.to(device)
criterion = criterion.to(device)

In [24]:
def binary_accuracy(preds, y):
    """
    Returns accuracy per batch, i.e. if you get 8/10 right, this returns 0.8, NOT 8
    """

    #round predictions to the closest integer
    rounded_preds = torch.round(torch.sigmoid(preds))
    correct = (rounded_preds == y).float() #convert into float for division
    acc = correct.sum() / len(correct)
    return acc

In [25]:
def train(model, iterator, optimizer, criterion):

    epoch_loss = 0
    epoch_acc = 0

    model.train()

    for batch in iterator:

        optimizer.zero_grad()

        text, text_lengths = batch.text

        predictions = model(text, text_lengths).squeeze()       #Initial: squeeze(1)

        loss = criterion(predictions, batch.label)

        acc = binary_accuracy(predictions, batch.label)

        loss.backward()

        optimizer.step()

        epoch_loss += loss.item()

        epoch_acc += acc.item()

    return epoch_loss / len(iterator), epoch_acc / len(iterator)

In [26]:
def evaluate(model, iterator, criterion):

    epoch_loss = 0
    epoch_acc = 0

    model.eval()

    with torch.no_grad():

        for batch in iterator:
            text, text_lengths = batch.text

            predictions = model(text, text_lengths).squeeze()   #Initial: squeeze(1)

            loss = criterion(predictions, batch.label)

            acc = binary_accuracy(predictions, batch.label)

            epoch_loss += loss.item()
            epoch_acc += acc.item()

    return epoch_loss / len(iterator), epoch_acc / len(iterator)

In [27]:
import time

def epoch_time(start_time, end_time):
    elapsed_time = end_time - start_time
    elapsed_mins = int(elapsed_time / 60)
    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
    return elapsed_mins, elapsed_secs

In [28]:
#Original; also for Word2vec

N_EPOCHS = 20

best_valid_loss = float('inf')

for epoch in range(N_EPOCHS):

    start_time = time.time()

    train_loss, train_acc = train(model, train_iterator, optimizer, criterion)
    valid_loss, valid_acc = evaluate(model, valid_iterator, criterion)
    end_time = time.time()

    epoch_mins, epoch_secs = epoch_time(start_time, end_time)

    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(model.state_dict(), 'tut1-model.pt')

    print(f'Epoch: {epoch+1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s')
    print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
    print(f'\t Val. Loss: {valid_loss:.3f} |  Val. Acc: {valid_acc*100:.2f}%')

model.load_state_dict(torch.load('tut1-model.pt'))

test_loss, test_acc = evaluate(model, test_iterator, criterion)

print(f'Test Loss: {test_loss:.3f} | Test Acc: {test_acc*100:.2f}%')


Epoch: 01 | Epoch Time: 0m 2s
	Train Loss: 0.691 | Train Acc: 53.96%
	 Val. Loss: 0.691 |  Val. Acc: 52.87%
Epoch: 02 | Epoch Time: 0m 2s
	Train Loss: 0.690 | Train Acc: 53.19%
	 Val. Loss: 0.683 |  Val. Acc: 54.35%
Epoch: 03 | Epoch Time: 0m 2s
	Train Loss: 0.682 | Train Acc: 54.52%
	 Val. Loss: 0.678 |  Val. Acc: 55.36%
Epoch: 04 | Epoch Time: 0m 2s
	Train Loss: 0.655 | Train Acc: 60.29%
	 Val. Loss: 0.651 |  Val. Acc: 62.24%
Epoch: 05 | Epoch Time: 0m 2s
	Train Loss: 0.595 | Train Acc: 68.07%
	 Val. Loss: 0.680 |  Val. Acc: 54.07%
Epoch: 06 | Epoch Time: 0m 2s
	Train Loss: 0.564 | Train Acc: 71.43%
	 Val. Loss: 0.600 |  Val. Acc: 66.79%
Epoch: 07 | Epoch Time: 0m 2s
	Train Loss: 0.538 | Train Acc: 73.25%
	 Val. Loss: 0.576 |  Val. Acc: 70.72%
Epoch: 08 | Epoch Time: 0m 2s
	Train Loss: 0.500 | Train Acc: 76.39%
	 Val. Loss: 0.579 |  Val. Acc: 71.40%
Epoch: 09 | Epoch Time: 0m 2s
	Train Loss: 0.441 | Train Acc: 80.74%
	 Val. Loss: 0.536 |  Val. Acc: 74.93%
Epoch: 10 | Epoch Time: 0m 2

In [63]:
# MY own train (non-word2vec)

#Define different hyperparameters
import torch.optim as optim
#epochs = [50]
epochs = [50, 20, 10, 5]

def optimizer_to(optim, device):
    for param in optim.state.values():
        # Not sure there are any global tensors in the state dict
        if isinstance(param, torch.Tensor):
            param.data = param.data.to(device)
            if param._grad is not None:
                param._grad.data = param._grad.data.to(device)
        elif isinstance(param, dict):
            for subparam in param.values():
                if isinstance(subparam, torch.Tensor):
                    subparam.data = subparam.data.to(device)
                    if subparam._grad is not None:
                        subparam._grad.data = subparam._grad.data.to(device)
    return optim

for N_EPOCHS in epochs:
  print(f'N_EPOCHS is {N_EPOCHS}')
  model = RNN(INPUT_DIM, EMBEDDING_DIM, HIDDEN_DIM, OUTPUT_DIM)
  optimizer = optim.Adagrad(model.parameters(), lr= 1e-3)
  criterion = nn.BCEWithLogitsLoss()
  model = model.to(device)
  criterion = criterion.to(device)
  optimizer = optimizer_to(optimizer, device)
  best_valid_loss = float('inf')

  for epoch in range(N_EPOCHS):
      start_time = time.time()

      train_loss, train_acc = train(model, train_iterator, optimizer, criterion)
      valid_loss, valid_acc = evaluate(model, valid_iterator, criterion)

      end_time = time.time()

      epoch_mins, epoch_secs = epoch_time(start_time, end_time)

      if valid_loss < best_valid_loss:
          best_valid_loss = valid_loss
          torch.save(model.state_dict(), 'tut1-model.pt')

      print(f'Epoch: {epoch+1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s')
      print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
      print(f'\t Val. Loss: {valid_loss:.3f} |  Val. Acc: {valid_acc*100:.2f}%')

  model.load_state_dict(torch.load('tut1-model.pt'))

  test_loss, test_acc = evaluate(model, test_iterator, criterion)

  print(f'Test Loss: {test_loss:.3f} | Test Acc: {test_acc*100:.2f}%')

N_EPOCHS is 50
Epoch: 01 | Epoch Time: 0m 2s
	Train Loss: 0.685 | Train Acc: 54.71%
	 Val. Loss: 0.674 |  Val. Acc: 58.34%
Epoch: 02 | Epoch Time: 0m 2s
	Train Loss: 0.651 | Train Acc: 62.04%
	 Val. Loss: 0.658 |  Val. Acc: 60.69%
Epoch: 03 | Epoch Time: 0m 2s
	Train Loss: 0.627 | Train Acc: 65.01%
	 Val. Loss: 0.632 |  Val. Acc: 64.82%
Epoch: 04 | Epoch Time: 0m 2s
	Train Loss: 0.605 | Train Acc: 67.72%
	 Val. Loss: 0.613 |  Val. Acc: 66.70%
Epoch: 05 | Epoch Time: 0m 2s
	Train Loss: 0.595 | Train Acc: 68.60%
	 Val. Loss: 0.606 |  Val. Acc: 67.46%
Epoch: 06 | Epoch Time: 0m 2s
	Train Loss: 0.587 | Train Acc: 69.27%
	 Val. Loss: 0.600 |  Val. Acc: 67.47%
Epoch: 07 | Epoch Time: 0m 2s
	Train Loss: 0.581 | Train Acc: 69.89%
	 Val. Loss: 0.594 |  Val. Acc: 69.00%
Epoch: 08 | Epoch Time: 0m 2s
	Train Loss: 0.580 | Train Acc: 69.83%
	 Val. Loss: 0.599 |  Val. Acc: 67.76%
Epoch: 09 | Epoch Time: 0m 2s
	Train Loss: 0.572 | Train Acc: 70.91%
	 Val. Loss: 0.584 |  Val. Acc: 69.92%
Epoch: 10 | E

In [None]:
#My own train 2 (non-word2vec)
import torch.optim as optim

N_EPOCHS = 20
optimizer = optim.Adagrad(model.parameters(), lr= 1e-3)


best_valid_loss = float('inf')

for epoch in range(N_EPOCHS):

    start_time = time.time()

    train_loss, train_acc = train(model, train_iterator, optimizer, criterion)
    valid_loss, valid_acc = evaluate(model, valid_iterator, criterion)

    end_time = time.time()

    epoch_mins, epoch_secs = epoch_time(start_time, end_time)

    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(model.state_dict(), 'tut1-model.pt')

    print(f'Epoch: {epoch+1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s')
    print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
    print(f'\t Val. Loss: {valid_loss:.3f} |  Val. Acc: {valid_acc*100:.2f}%')


model.load_state_dict(torch.load('tut1-model.pt'))

test_loss, test_acc = evaluate(model, test_iterator, criterion)

print(f'Test Loss: {test_loss:.3f} | Test Acc: {test_acc*100:.2f}%')

model = RNN(INPUT_DIM, EMBEDDING_DIM, HIDDEN_DIM, OUTPUT_DIM)
criterion = nn.BCEWithLogitsLoss()
model = model.to(device)
criterion = criterion.to(device)

In [22]:
model.load_state_dict(torch.load('tut1-model.pt'))

test_loss, test_acc = evaluate(model, test_iterator, criterion)

print(f'Test Loss: {test_loss:.3f} | Test Acc: {test_acc*100:.2f}%')


Test Loss: 0.562 | Test Acc: 75.85%
