In [1]:
import torch
from torch import nn
from torch import optim

# check if GPU is available (better to check it at the start)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print('Using gpu: %s ' % torch.cuda.is_available())

Using gpu: True 


In [2]:
# download via API/Internet
import os
import json
import gzip
from urllib.request import urlopen

# data handling
import numpy as np
import pandas as pd
# data visualization
import seaborn as sns
# pytorch
import torch
from torch import nn
from torch import optim

# download the data from link
import requests
import os

# URL of the file you want to download
url = "https://datarepo.eng.ucsd.edu/mcauley_group/data/amazon_v2/categoryFilesSmall/Video_Games_5.json.gz"

# Local path where you want to save the downloaded file
local_filename = "Video_Games.json.gz"

# Check if the file already exists, if yes, remove it
if os.path.exists(local_filename):
    os.remove(local_filename)

# Download the file
response = requests.get(url)
with open(local_filename, 'wb') as f:
    f.write(response.content)

print(f"File '{local_filename}' downloaded successfully.")

# load the meta data
data = []
with gzip.open(local_filename, 'rt') as f:  # 'rt' for text mode
    for l in f:
        data.append(json.loads(l.strip()))

# total length of the list, this number equals the total number of products
print(len(data))

# first row of the list
print(data[0])


def parse(path):
  g = gzip.open(path, 'rb')
  for l in g:
    yield json.loads(l)

def getDF(path):
  i = 0
  df = {}
  for d in parse(path):
    df[i] = d
    i += 1
  return pd.DataFrame.from_dict(df, orient='index')

df = getDF('Video_Games.json.gz')
df=df[:100000]

File 'Video_Games.json.gz' downloaded successfully.
497577
{'overall': 5.0, 'verified': True, 'reviewTime': '10 17, 2015', 'reviewerID': 'A1HP7NVNPFMA4N', 'asin': '0700026657', 'reviewerName': 'Ambrosia075', 'reviewText': "This game is a bit hard to get the hang of, but when you do it's great.", 'summary': "but when you do it's great.", 'unixReviewTime': 1445040000}


In [3]:
print(f"Length of DataFrame: {len(df)}")

Length of DataFrame: 100000


In [4]:
# fill blank spaces with NaN
df = df.fillna('')
df.isnull().sum()


overall           0
verified          0
reviewTime        0
reviewerID        0
asin              0
reviewerName      0
reviewText        0
summary           0
unixReviewTime    0
vote              0
style             0
image             0
dtype: int64

In [5]:
# Lower case all reviews, ensuring that each entry is a string
df["reviewText"] = df["reviewText"].apply(lambda x: str(x).lower())

# Get rid of punctuation and newline
df["reviewText"] = df["reviewText"].str.replace(r'[^\w\s]+', '', regex=True).str.replace("\n", " ")


In [6]:
non_relevant_rev = ["verified",  "vote", "style", "image", "unixReviewTime"]
df = df.drop(non_relevant_rev, axis = 1)

In [7]:
# data clean
# 1. Removing duplicates
df = df.drop_duplicates()
print(len(df))

99946


In [8]:
# 2. Handling missing values
df.fillna(0, inplace=True) # Replace 0 with your desired placeholde

In [9]:
#3. Converting data types
df['reviewTime'] = pd.to_datetime(df['reviewTime'], errors='coerce')

In [10]:
# Check for missing values
print('Missing values:\n', df.isnull().sum())
# Print summary statistics
print('Summary statistics:\n', df.describe())

Missing values:
 overall         0
reviewTime      0
reviewerID      0
asin            0
reviewerName    0
reviewText      0
summary         0
dtype: int64
Summary statistics:
             overall
count  99946.000000
mean       4.208983
std        1.138513
min        1.000000
25%        4.000000
50%        5.000000
75%        5.000000
max        5.000000


In [11]:
unique_overallrating_values = df['overall'].unique()
unique_overallrating_values

array([5., 4., 3., 2., 1.])

In [12]:
 # Rename the DataFrame
clean_data = df.copy()

clean_data.to_csv('clean_data.csv', index=False)


In [13]:
# Create a new DataFrame 'reviews' and 'labels'
reviews = pd.DataFrame()
labels = pd.DataFrame()
# Map sentiment values based on 'overall' column
labels['labels'] = df['overall'].map({1: 'negative', 2: 'negative', 3: 'neutral', 4: 'positive', 5: 'positive'})

# Assign 'review' column from the original DataFrame 'df'
reviews['review'] = df['reviewText']

In [14]:
path_file_path = "/content/"

# Save the 'reviews' DataFrame to a text file
reviews.to_csv(path_file_path + "reviews.txt", index=False)

# Save the 'labels' DataFrame to a text file
labels.to_csv(path_file_path + "labels.txt", index=False)

In [15]:
import numpy as np

# read data from text files
with open('reviews.txt', 'r') as f:
    reviews = f.read()
with open('labels.txt', 'r') as f:
    labels = f.read()

In [16]:
print(reviews[:1000])
print()
print(labels[:20])

review
this game is a bit hard to get the hang of but when you do its great
i played it a while but it was alright the steam was a bit of trouble the more they move these game to steam the more of a hard time i have activating and playing a game but in spite of that it was fun i liked it now i am looking forward to anno 2205 i really want to play my way to the moon
ok game
found the game a bit too complicated not what i expected after having played 1602 1503 and 1701
great game i love it and have played it since its arrived
i liked a lot some time that i havent play a wonderfull game very simply and funny game verry good game
im an avid gamer but anno 2070 is an insult to gaming  it is so buggy and halffinished that the first campaign doesnt even work properly and the drm is incredibly frustrating to deal with  once you manage to work your way past the massive amounts of bugs and get through the drm hours later you finally figure out that the game has no real tutorial so you stuck just

# Data pre-processing

In [17]:
# Data pre-processing
from string import punctuation

print(punctuation)

# get rid of punctuation
reviews = reviews.lower() # lowercase, standardize
all_text = ''.join([c for c in reviews if c not in punctuation])

# split by new lines and spaces
reviews_split = all_text.split('\n')
all_text = ' '.join(reviews_split)

# create a list of words
words = all_text.split()

!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~


In [18]:
words[:30]

['review',
 'this',
 'game',
 'is',
 'a',
 'bit',
 'hard',
 'to',
 'get',
 'the',
 'hang',
 'of',
 'but',
 'when',
 'you',
 'do',
 'its',
 'great',
 'i',
 'played',
 'it',
 'a',
 'while',
 'but',
 'it',
 'was',
 'alright',
 'the',
 'steam',
 'was']

Encoding the words

In [19]:

from collections import Counter
## Build a dictionary that maps words to integers
counts = Counter(words)
vocab = sorted(counts, key=counts.get, reverse=True)

#Build a dictionary that maps words to integers by enumerating vocab
vocab_to_int = {word: index + 1 for index, word in enumerate(vocab)}

## store the tokenized reviews in reviews_ints (which is a list)
reviews_ints = []
for review in reviews_split:
    reviews_ints.append([vocab_to_int[word] for word in review.split()])

In [20]:
# stats about vocabulary
print('Unique words: ', len((vocab_to_int)))
print()

# print tokens in first review
print('Tokenized review: \n', reviews_ints[:1])

Unique words:  182030

Tokenized review: 
 [[354]]


In [21]:
# negative = 0, neutral = 1, positive = 2
import numpy as np
labels_split = labels.split('\n')
encoded_labels = np.array([2 if 'positive' in label else 0 if 'negative' in label else 1 for label in labels_split if label])


In [22]:
encoded_labels

array([1, 2, 2, ..., 2, 0, 1])

# Removing Outliers

In [23]:
# outlier review stats
review_lens = Counter([len(x) for x in reviews_ints])
print("Zero-length reviews: {}".format(review_lens[0])) # review_lens[0] counts the number of reviews with zero length
print("Maximum review length: {}".format(max(review_lens)))

Zero-length reviews: 45
Maximum review length: 5838


In [24]:
print('Number of reviews before removing outliers: ', len(reviews_ints))

## remove any reviews/labels with zero length from the reviews_ints list.

# get indices of any reviews with length 0
non_zero_idx = [ii for ii, review in enumerate(reviews_ints) if len(review) != 0]

# remove 0-length reviews and their labels
reviews_ints = [reviews_ints[ii] for ii in non_zero_idx]
encoded_labels = np.array([encoded_labels[ii] for ii in non_zero_idx])

print('Number of reviews after removing outliers: ', len(reviews_ints))

Number of reviews before removing outliers:  99948
Number of reviews after removing outliers:  99903


# Padding sequences

In [25]:
def pad_features(reviews_ints, seq_length):
    ''' Return features of review_ints, where each review is padded with 0's
        or truncated to the input seq_length.
    '''

    # getting the correct rows x cols shape
    features = np.zeros((len(reviews_ints), seq_length), dtype=int)

    # for each review, I grab that review and
    for i, row in enumerate(reviews_ints):
        features[i, -len(row):] = np.array(row)[:seq_length]

    return features

In [26]:
# Test your implementation!

seq_length = 200

features = pad_features(reviews_ints, seq_length=seq_length)

## test statements - do not change - ##
assert len(features)==len(reviews_ints), "Your features should have as many rows as reviews."
assert len(features[0])==seq_length, "Each feature row should contain seq_length values."

# print first 10 values of the first 30 batches
print(features[:30,:10])

[[    0     0     0     0     0     0     0     0     0     0]
 [    0     0     0     0     0     0     0     0     0     0]
 [    0     0     0     0     0     0     0     0     0     0]
 [    0     0     0     0     0     0     0     0     0     0]
 [    0     0     0     0     0     0     0     0     0     0]
 [    0     0     0     0     0     0     0     0     0     0]
 [    0     0     0     0     0     0     0     0     0     0]
 [    0     0     0     0     0     0     0     0     0     0]
 [    0     0     0     0     0     0     0     0     0     0]
 [    0     0     0     0     0     0     0     0     0     0]
 [    0     0     0     0     0     0     0     0     0     0]
 [    0     0     0     0     0     0     0     0     0     0]
 [    0     0     0     0     0     0     0     0     0     0]
 [    0     0     0     0     0     0     0     0     0     0]
 [  119    22   214   890    17     1    30  3197  3191   150]
 [    0     0     0     0     0     0     0     0     0

# Training, Validation, Test

In [27]:
split_frac = 0.8

## split data into training, validation, and test data (features and labels, x and y)

split_idx = int(len(features)*split_frac)
train_x, remaining_x = features[:split_idx], features[split_idx:]
train_y, remaining_y = encoded_labels[:split_idx], encoded_labels[split_idx:]

test_idx = int(len(remaining_x)*0.5)
val_x, test_x = remaining_x[:test_idx], remaining_x[test_idx:]
val_y, test_y = remaining_y[:test_idx], remaining_y[test_idx:]

## print out the shapes of your resultant feature data
print("\t\t\tFeature Shapes:")
print("Train set: \t\t{}".format(train_x.shape),
      "\nValidation set: \t{}".format(val_x.shape),
      "\nTest set: \t\t{}".format(test_x.shape))

			Feature Shapes:
Train set: 		(79922, 200) 
Validation set: 	(9990, 200) 
Test set: 		(9991, 200)


In [28]:
import torch
from torch.utils.data import TensorDataset, DataLoader

# create Tensor datasets
train_data = TensorDataset(torch.from_numpy(train_x), torch.from_numpy(train_y))
valid_data = TensorDataset(torch.from_numpy(val_x), torch.from_numpy(val_y))
test_data = TensorDataset(torch.from_numpy(test_x), torch.from_numpy(test_y))

# dataloaders
batch_size = 50

# make sure SHUFFLE your training data
# drop_last=True will drop the last batch if the size is less than the given batch_size
train_loader = DataLoader(train_data, shuffle=True, batch_size=batch_size,  drop_last=True)
valid_loader = DataLoader(valid_data, shuffle=True, batch_size=batch_size, drop_last=True)
test_loader = DataLoader(test_data, shuffle=True, batch_size=batch_size, drop_last=True)

In [29]:
# obtain one batch of training data
dataiter = iter(train_loader)
sample_x, sample_y = next(dataiter)

print('Sample input size: ', sample_x.size()) # batch_size, seq_length
print('Sample input: \n', sample_x)
print()
print('Sample label size: ', sample_y.size()) # batch_size
print('Sample label: \n', sample_y)

Sample input size:  torch.Size([50, 200])
Sample input: 
 tensor([[ 462, 1396, 3294,  ..., 2685,  511,    2],
        [ 919,    9,   78,  ...,   70,  145,    9],
        [   0,    0,    0,  ..., 1898, 7267, 1003],
        ...,
        [   1,  312,   11,  ...,  381,   19, 2481],
        [  36, 1722,    4,  ..., 1482,    1,    8],
        [   0,    0,    0,  ...,  118,  108, 1924]])

Sample label size:  torch.Size([50])
Sample label: 
 tensor([2, 1, 1, 1, 2, 2, 2, 1, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2,
        2, 2, 2, 2, 0, 2, 0, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1,
        1, 0])


# Sentiment Network with PyTorch

In [44]:
import torch.nn as nn

class SentimentRNN(nn.Module):
    """
    The RNN model that will be used to perform Sentiment analysis.
    """

    def __init__(self, vocab_size, output_size, embedding_dim, hidden_dim, n_layers, drop_prob=0.5):
        """
        Initialize the model by setting up the layers.
        """
        super(SentimentRNN, self).__init__()

        # Set output_size  for three classes
        self.output_size = 3
        self.n_layers = n_layers
        self.hidden_dim = hidden_dim

        self.output_size = output_size
        self.n_layers = n_layers
        self.hidden_dim = hidden_dim
        #add embedding and LSTM layers
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, n_layers,
                            dropout=drop_prob, batch_first=True)
        # dropout layer
        self.dropout = nn.Dropout(0.3)
        # linear and ReLUx layers
        self.fc = nn.Linear(hidden_dim, output_size)
        self.ReLU = nn.ReLU()


    def forward(self, x, hidden):
        """
        Perform a forward pass of our model on some input and hidden state.
        """
        batch_size = x.size(0)


        x = x.long()

        #compute embeddings and lstm_out
        embeds = self.embedding(x)
        lstm_out, hidden = self.lstm(embeds, hidden)

        lstm_out = lstm_out[:, -1, :] # getting the last time step output

        # dropout and fully-connected layer
        out = self.dropout(lstm_out)
        out = self.fc(out)
        # ReLU function
        ReLU_out = self.ReLU(out)

        # return last ReLU output and hidden state
        return ReLU_out, hidden


    def init_hidden(self, batch_size):
        ''' Initializes hidden state '''
        # Create two new tensors with sizes n_layers x batch_size x hidden_dim,
        # initialized to zero, for hidden state and cell state of LSTM
        weight = next(self.parameters()).data

        if (train_on_gpu):
            hidden = (weight.new(self.n_layers, batch_size, self.hidden_dim).zero_().cuda(),
                  weight.new(self.n_layers, batch_size, self.hidden_dim).zero_().cuda())
        else:
            hidden = (weight.new(self.n_layers, batch_size, self.hidden_dim).zero_(),
                      weight.new(self.n_layers, batch_size, self.hidden_dim).zero_())

        return hidden

In [52]:
# Instantiate the model w/ hyperparams
vocab_size = len(vocab_to_int)+1 # +1 for the 0 padding + our word tokens
output_size = 3
embedding_dim = 400
hidden_dim = 256
n_layers = 4

net = SentimentRNN(vocab_size, output_size, embedding_dim, hidden_dim, n_layers)

print(net)

SentimentRNN(
  (embedding): Embedding(182031, 400)
  (lstm): LSTM(400, 256, num_layers=4, batch_first=True, dropout=0.5)
  (dropout): Dropout(p=0.3, inplace=False)
  (fc): Linear(in_features=256, out_features=3, bias=True)
  (ReLU): ReLU()
)


Training

In [58]:
# train
# loss and optimization functions
lr=0.0005

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=lr)

In [47]:
train_on_gpu=torch.cuda.is_available()

if(train_on_gpu):
    print('Training on GPU.')
else:
    print('No GPU available, training on CPU.')

Training on GPU.


In [59]:
import torch
import torch.nn as nn
import torch.optim as optim

# training params
epochs = 3
counter = 0
print_every = 100
clip = 5  # gradient clipping

# move model to GPU, if available
if train_on_gpu:
    net.cuda()

criterion = nn.CrossEntropyLoss()  # Use CrossEntropyLoss for multi-class classification
optimizer = optim.Adam(net.parameters(), lr=0.0005)

net.train()
# train for some number of epochs
for e in range(epochs):
    # initialize hidden state
    h = net.init_hidden(batch_size)
    # batch loop
    for inputs, labels in train_loader:
        counter += 1

        if train_on_gpu:
            inputs, labels = inputs.cuda(), labels.cuda()

        h = tuple([each.data for each in h])

        # 1. get the output from the model
        output, h = net(inputs, h)

        # 2. calculate the loss and perform backprop
        loss = criterion(output, labels)

        # 3. zero accumulated gradients
        net.zero_grad()

        # 4. Backpropagation
        loss.backward()

        # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
        nn.utils.clip_grad_norm_(net.parameters(), clip)

        # 5. Update weights
        optimizer.step()

        # loss stats
        if counter % print_every == 0:
            # Get validation loss
            val_h = net.init_hidden(batch_size)
            val_losses = []
            net.eval()

            for val_inputs, val_labels in valid_loader:
                val_h = tuple([each.data for each in val_h])

                if train_on_gpu:
                    val_inputs, val_labels = val_inputs.cuda(), val_labels.cuda()

                val_output, val_h = net(val_inputs, val_h)
                val_loss = criterion(val_output, val_labels)
                val_losses.append(val_loss.item())

            net.train()
            print("Epoch: {}/{}...".format(e + 1, epochs),
                  "Step: {}...".format(counter),
                  "Loss: {:.6f}...".format(loss.item()),
                  "Val Loss: {:.6f}".format(np.mean(val_losses)))


Epoch: 1/3... Step: 100... Loss: 0.229507... Val Loss: 0.494144
Epoch: 1/3... Step: 200... Loss: 0.274468... Val Loss: 0.514202
Epoch: 1/3... Step: 300... Loss: 0.171634... Val Loss: 0.509283
Epoch: 1/3... Step: 400... Loss: 0.330458... Val Loss: 0.508140
Epoch: 1/3... Step: 500... Loss: 0.229334... Val Loss: 0.509444
Epoch: 1/3... Step: 600... Loss: 0.354606... Val Loss: 0.509620
Epoch: 1/3... Step: 700... Loss: 0.328719... Val Loss: 0.487618
Epoch: 1/3... Step: 800... Loss: 0.346488... Val Loss: 0.503769
Epoch: 1/3... Step: 900... Loss: 0.181998... Val Loss: 0.504937
Epoch: 1/3... Step: 1000... Loss: 0.393922... Val Loss: 0.543446
Epoch: 1/3... Step: 1100... Loss: 0.225744... Val Loss: 0.500255
Epoch: 1/3... Step: 1200... Loss: 0.419885... Val Loss: 0.517257
Epoch: 1/3... Step: 1300... Loss: 0.264943... Val Loss: 0.511308
Epoch: 1/3... Step: 1400... Loss: 0.254726... Val Loss: 0.501522
Epoch: 1/3... Step: 1500... Loss: 0.420228... Val Loss: 0.499182
Epoch: 2/3... Step: 1600... Loss: 

Testing

In [60]:
# Get test data loss and accuracy
#import torch.nn.functional as F
test_losses = [] # track loss
num_correct = 0

# init hidden state
h = net.init_hidden(batch_size)
output, h = net(inputs, h)


net.eval()
# iterate over test data
for inputs, labels in test_loader:
    # Creating new variables for the hidden state, otherwise
    # we'd backprop through the entire training history
    h = tuple([each.data for each in h])

    if train_on_gpu:
        inputs, labels = inputs.cuda(), labels.cuda()

    # get predicted outputs
    output, h = net(inputs, h)

    # calculate loss
    test_loss = criterion(output, labels)
    #labels_one_hot = F.one_hot(labels, num_classes=3)
    test_losses.append(test_loss.item())

    # get the predicted class indices (the index with the maximum probability)
    _, pred_indices = torch.max(output, 1)

    # compare predicted indices to true label
    correct = (pred_indices == labels).sum().item()
    num_correct += correct



In [61]:

# avg test loss
print("Test loss: {:.3f}".format(np.mean(test_losses)))

# accuracy over all test data
test_acc = num_correct/len(test_loader.dataset)
print("Test accuracy: {:.3f}".format(test_acc))

Test loss: 0.635
Test accuracy: 0.803


In [62]:
# save model
torch.save(net.state_dict(), 'model.pth')


Try another one ELU and hidden 3

In [38]:
#####

class SentimentRNN(nn.Module):
    """
    The RNN model that will be used to perform Sentiment analysis.
    """

    def __init__(self, vocab_size, output_size, embedding_dim, hidden_dim, n_layers, drop_prob=0.5):
        """
        Initialize the model by setting up the layers.
        """
        super(SentimentRNN, self).__init__()

        # Set output_size  for three classes
        self.output_size = 3
        self.n_layers = n_layers
        self.hidden_dim = hidden_dim

        self.output_size = output_size
        self.n_layers = n_layers
        self.hidden_dim = hidden_dim
        #add embedding and LSTM layers
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, n_layers,
                            dropout=drop_prob, batch_first=True)
        # dropout layer
        self.dropout = nn.Dropout(0.3)
        # linear and ELU layers
        self.fc = nn.Linear(hidden_dim, output_size)
        self.ELU = nn.ELU()


    def forward(self, x, hidden):
        """
        Perform a forward pass of our model on some input and hidden state.
        """
        batch_size = x.size(0)


        x = x.long()

        #compute embeddings and lstm_out
        embeds = self.embedding(x)
        lstm_out, hidden = self.lstm(embeds, hidden)

        lstm_out = lstm_out[:, -1, :] # getting the last time step output

        # dropout and fully-connected layer
        out = self.dropout(lstm_out)
        out = self.fc(out)
        # ELU function
        ELU_out = self.ELU(out)

        # return last ReLU output and hidden state
        return ELU_out, hidden


    def init_hidden(self, batch_size):
        ''' Initializes hidden state '''
        # Create two new tensors with sizes n_layers x batch_size x hidden_dim,
        # initialized to zero, for hidden state and cell state of LSTM
        weight = next(self.parameters()).data

        if (train_on_gpu):
            hidden = (weight.new(self.n_layers, batch_size, self.hidden_dim).zero_().cuda(),
                  weight.new(self.n_layers, batch_size, self.hidden_dim).zero_().cuda())
        else:
            hidden = (weight.new(self.n_layers, batch_size, self.hidden_dim).zero_(),
                      weight.new(self.n_layers, batch_size, self.hidden_dim).zero_())

        return hidden

In [39]:
# Instantiate the model w/ hyperparams
vocab_size = len(vocab_to_int)+1 # +1 for the 0 padding + our word tokens
output_size = 3
embedding_dim = 400
hidden_dim = 256
n_layers = 3

net = SentimentRNN(vocab_size, output_size, embedding_dim, hidden_dim, n_layers)

print(net)

SentimentRNN(
  (embedding): Embedding(182031, 400)
  (lstm): LSTM(400, 256, num_layers=3, batch_first=True, dropout=0.5)
  (dropout): Dropout(p=0.3, inplace=False)
  (fc): Linear(in_features=256, out_features=3, bias=True)
  (ELU): ELU(alpha=1.0)
)


In [40]:
# train
# loss and optimization functions
lr=0.0015

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(net.parameters(), lr=lr)

In [41]:
train_on_gpu=torch.cuda.is_available()

if(train_on_gpu):
    print('Training on GPU.')
else:
    print('No GPU available, training on CPU.')

Training on GPU.


In [42]:


# training params
epochs = 3
counter = 0
print_every = 150
clip = 5  # gradient clipping

# move model to GPU, if available
if train_on_gpu:
    net.cuda()

criterion = nn.CrossEntropyLoss()  # Use CrossEntropyLoss for multi-class classification
optimizer = optim.Adam(net.parameters(), lr=0.0015)

net.train()
# train for some number of epochs
for e in range(epochs):
    # initialize hidden state
    h = net.init_hidden(batch_size)
    # batch loop
    for inputs, labels in train_loader:
        counter += 1

        if train_on_gpu:
            inputs, labels = inputs.cuda(), labels.cuda()

        h = tuple([each.data for each in h])

        # 1. get the output from the model
        output, h = net(inputs, h)

        # 2. calculate the loss and perform backprop
        loss = criterion(output, labels)

        # 3. zero accumulated gradients
        net.zero_grad()

        # 4. Backpropagation
        loss.backward()

        # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
        nn.utils.clip_grad_norm_(net.parameters(), clip)

        # 5. Update weights
        optimizer.step()

        # loss stats
        if counter % print_every == 0:
            # Get validation loss
            val_h = net.init_hidden(batch_size)
            val_losses = []
            net.eval()

            for val_inputs, val_labels in valid_loader:
                val_h = tuple([each.data for each in val_h])

                if train_on_gpu:
                    val_inputs, val_labels = val_inputs.cuda(), val_labels.cuda()

                val_output, val_h = net(val_inputs, val_h)
                val_loss = criterion(val_output, val_labels)
                val_losses.append(val_loss.item())

            net.train()
            print("Epoch: {}/{}...".format(e + 1, epochs),
                  "Step: {}...".format(counter),
                  "Loss: {:.6f}...".format(loss.item()),
                  "Val Loss: {:.6f}".format(np.mean(val_losses)))


Epoch: 1/3... Step: 150... Loss: 0.742306... Val Loss: 0.651069
Epoch: 1/3... Step: 300... Loss: 0.675671... Val Loss: 0.630466
Epoch: 1/3... Step: 450... Loss: 0.579329... Val Loss: 0.611579
Epoch: 1/3... Step: 600... Loss: 0.349873... Val Loss: 0.595165
Epoch: 1/3... Step: 750... Loss: 0.519365... Val Loss: 0.550530
Epoch: 1/3... Step: 900... Loss: 0.512085... Val Loss: 0.537829
Epoch: 1/3... Step: 1050... Loss: 0.418088... Val Loss: 0.536438
Epoch: 1/3... Step: 1200... Loss: 0.496742... Val Loss: 0.520416
Epoch: 1/3... Step: 1350... Loss: 0.445305... Val Loss: 0.505581
Epoch: 1/3... Step: 1500... Loss: 0.464731... Val Loss: 0.502105
Epoch: 2/3... Step: 1650... Loss: 0.368388... Val Loss: 0.532010
Epoch: 2/3... Step: 1800... Loss: 0.422004... Val Loss: 0.509140
Epoch: 2/3... Step: 1950... Loss: 0.493289... Val Loss: 0.505941
Epoch: 2/3... Step: 2100... Loss: 0.319320... Val Loss: 0.481886
Epoch: 2/3... Step: 2250... Loss: 0.425612... Val Loss: 0.475294
Epoch: 2/3... Step: 2400... Los

In [43]:
# Get test data loss and accuracy
#import torch.nn.functional as F
test_losses = [] # track loss
num_correct = 0

# init hidden state
h = net.init_hidden(batch_size)
output, h = net(inputs, h)


net.eval()
# iterate over test data
for inputs, labels in test_loader:
    # Creating new variables for the hidden state, otherwise
    # we'd backprop through the entire training history
    h = tuple([each.data for each in h])

    if train_on_gpu:
        inputs, labels = inputs.cuda(), labels.cuda()

    # get predicted outputs
    output, h = net(inputs, h)

    # calculate loss
    test_loss = criterion(output, labels)
    #labels_one_hot = F.one_hot(labels, num_classes=3)
    test_losses.append(test_loss.item())

    # get the predicted class indices (the index with the maximum probability)
    _, pred_indices = torch.max(output, 1)

    # compare predicted indices to true label
    correct = (pred_indices == labels).sum().item()
    num_correct += correct
    # avg test loss
print("Test loss: {:.3f}".format(np.mean(test_losses)))

# accuracy over all test data
test_acc = num_correct/len(test_loader.dataset)
print("Test accuracy: {:.3f}".format(test_acc))

Test loss: 0.487
Test accuracy: 0.818
