Imports

# **امین فتحی - 400722102**

برای حل این سوال از لینک زیر استفاده شده است : https://colab.research.google.com/github/bentrevett/pytorch-sentiment-analysis/blob/master/1%20-%20Simple%20Sentiment%20Analysis.ipynb#scrollTo=p4uR0OXfZHyW

In [2]:
import numpy as np

from keras.datasets import imdb
from keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split

import torch
import torch.autograd as autograd
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import os
import random
from torch.autograd import Variable

**Load and prepare data**

The IMDB sentiment classification dataset consists of 50,000 movie reviews from IMDB users that are labeled as either positive (1) or negative (0). The reviews are preprocessed and each one is encoded as a sequence of word indexes in the form of integers. The words within the reviews are indexed by their overall frequency within the dataset. For example, the integer “2” encodes the second most frequent word in the data. The 50,000 reviews are split into 25,000 for training and 25,000 for testing.

Fortunately, the IMDB dataset has already been built in Keras. Since we want to avoid a 50/50 train test split, we will immediately merge the data into data and targets after downloading so we can do an 80/20 split later on.


In [3]:
print('Loading data...')

#Maximum number of words
max_features = 20000

(train_data, train_label), (test_data, test_label) = imdb.load_data(num_words=max_features)
data = np.concatenate((train_data, test_data), axis=0)
targets = np.concatenate((train_label, test_label), axis=0)
maxlen = 100

# The data must be the same length to enter as the input of our recursive neural network.
X = pad_sequences(data, maxlen=maxlen)

# split data to train and test
X_train, X_test, y_train, y_test = train_test_split(X, targets, test_size=0.2)

print('x_train shape:', X_train.shape)
print('x_test shape:', X_test.shape)

Loading data...
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
x_train shape: (40000, 100)
x_test shape: (10000, 100)


Now it is time to build a very simple recursive model. Use the embedding layer as the first layer.You can see more information about this layers in the following link. You are not allowed to use LSTM in this exercise.

https://pytorch.org/docs/stable/generated/torch.nn.RNN.html

https://pytorch.org/docs/stable/generated/torch.nn.Embedding.html

In [4]:
# Define hyperparameters
n_epochs = 20
lr = 0.0001

# Define Loss, Optimizer
criterion = nn.BCEWithLogitsLoss()
optimizer = None    # defined after creating model

class RNN_Model(nn.Module):
  def __init__(self, input_dim, embedding_dim, hidden_dim, output_dim):
    super(RNN_Model,self).__init__()
    self.embedding = nn.Embedding(input_dim, embedding_dim)
    self.rnn = nn.RNN(embedding_dim, hidden_dim,batch_first=True)    
    self.fc = nn.Linear(hidden_dim, output_dim)

  def forward(self,X):
    #x = [batch size, sent len]

    embedded = self.embedding(X)
    #embedded = [batch size, sent len, emb dim]

    output, hidden = self.rnn(embedded)
    #output = [batch size, sent len, hid dim]
    #hidden = [1, batch size, hid dim]

    return self.fc(hidden.squeeze(0))



In [5]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
model = RNN_Model(max_features,200,256,1).to(device)

In [6]:
optimizer = torch.optim.Adam(model.parameters(),lr=lr)

In [7]:
y_train = y_train.reshape((-1,1)).astype('float64')
y_test = y_test.reshape((-1,1)).astype('float64')
train_dataset = torch.utils.data.TensorDataset(torch.tensor(X_train),torch.tensor(y_train))
train_iter = torch.utils.data.DataLoader(train_dataset,batch_size=64)
test_dataset = torch.utils.data.TensorDataset(torch.tensor(X_test),torch.tensor(y_test))
test_iter = torch.utils.data.DataLoader(test_dataset,batch_size=64)

In [8]:
def binary_accuracy(preds, y):
    """
    Returns accuracy per batch, i.e. if you get 8/10 right, this returns 0.8, NOT 8
    """
    #round predictions to the closest integer
    rounded_preds = torch.round(torch.sigmoid(preds))
    correct = (rounded_preds == y).float() #convert into float for division 
    acc = correct.sum() / len(correct)
    return acc.item()

Train your model with training data, then report the model result on the test data.

In [9]:
# Train your model
train_losses = list()
test_losses = list()
train_accs = list()
test_accs = list()
for epoch in range (n_epochs):
  train_loss = list()
  train_acc = list() 
  test_loss = list()
  test_acc = list() 

  model.train()
  for i, (x, y) in enumerate(train_iter):
    optimizer.zero_grad()
    x, y = x.to(device), y.to(device)
    yprim = model(x)
    loss= criterion(yprim, y)
    loss.backward()
    optimizer.step()
    train_loss.append(loss.item())
    train_acc.append(binary_accuracy(yprim,y))
  
  train_losses.append(sum(train_loss)/len(train_loss))
  train_accs.append(sum(train_acc)/len(train_acc))
  
  model.eval()
  with torch.no_grad():
    for i, (X, y) in enumerate(test_iter):
      X, y = X.to(device), y.to(device) 
      yprim = model(X)
      loss = criterion(yprim, y)
      test_loss.append(loss.item())
      test_acc.append(binary_accuracy(yprim,y))
    
    test_losses.append(sum(test_loss)/len(test_loss))
    test_accs.append(sum(test_acc)/len(test_acc))
  print(f'train loss: {train_losses[-1]} , train_accuracy: {train_accs[-1]} , test_loss: {test_losses[-1]} , test_accuracy: {test_accs[-1]}')

train loss: 0.6265254256805405 , train_accuracy: 0.63875 , test_loss: 0.5599500434810539 , test_accuracy: 0.7202428343949044
train loss: 0.5311347952781711 , train_accuracy: 0.73665 , test_loss: 0.5219048845993612 , test_accuracy: 0.7484076433121019
train loss: 0.48415013163690457 , train_accuracy: 0.7722 , test_loss: 0.4875741836966748 , test_accuracy: 0.7713972929936306
train loss: 0.4424892383428756 , train_accuracy: 0.800575 , test_loss: 0.46574924748210544 , test_accuracy: 0.7886146496815286
train loss: 0.4116207946780836 , train_accuracy: 0.8173 , test_loss: 0.4459292485341515 , test_accuracy: 0.7985668789808917
train loss: 0.3796816525353468 , train_accuracy: 0.835575 , test_loss: 0.43543341026542626 , test_accuracy: 0.807921974522293
train loss: 0.35365962484389313 , train_accuracy: 0.850375 , test_loss: 0.4248165316113178 , test_accuracy: 0.8139928343949044
train loss: 0.3328312044329621 , train_accuracy: 0.862425 , test_loss: 0.4197124061523427 , test_accuracy: 0.813196656050