In [11]:
!rm -r ~/.kaggle
!mkdir ~/.kaggle
!mv ./kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

In [12]:
import torch
import pandas as pd 
import numpy as np
import torch.utils.data as data_utils

import torch.nn as nn
import torch.nn.functional as F

import torch.optim as optim

In [13]:
!kaggle datasets download -d bartoszmaj/tfidf-imdb-20k

Downloading tfidf-imdb-20k.zip to /content
100% 0.99G/0.99G [00:56<00:00, 18.5MB/s]
100% 0.99G/0.99G [00:56<00:00, 19.0MB/s]


In [14]:
!unzip tfidf-imdb-20k.zip

Archive:  tfidf-imdb-20k.zip
  inflating: X_test.csv              
  inflating: X_train.csv             
  inflating: y_test.csv              
  inflating: y_train.csv             


In [None]:
!kaggle datasets download -d bartoszmaj/x-test

Downloading x-test.zip to /content
 99% 674M/678M [00:05<00:00, 154MB/s]
100% 678M/678M [00:05<00:00, 127MB/s]


In [None]:
!unzip x-test.zip

Archive:  x-test.zip
  inflating: X_test.csv              


In [63]:
X_train = pd.read_csv("X_train.csv")
X_test = pd.read_csv("X_test.csv")


In [64]:
y_train = pd.read_csv("y_train.csv")
y_test = pd.read_csv("y_test.csv")

In [65]:
batch_size = 32

In [66]:
y_train = y_train.replace("negative", 0)
y_train = y_train.replace("positive", 1)
y_test = y_test.replace("negative", 0)
y_test = y_test.replace("positive", 1)
y_train.head(), y_test.head()

(   sentiment
 0          0
 1          1
 2          0
 3          1
 4          0,
    sentiment
 0          1
 1          1
 2          0
 3          1
 4          0)

In [67]:
train_target = torch.tensor(y_train.values.astype(np.float32))
train_target = train_target.flatten()
train = torch.tensor(X_train.values.astype(np.float32)) 
train_tensor = data_utils.TensorDataset(train, train_target) 
train_loader = data_utils.DataLoader(dataset = train_tensor, batch_size = batch_size, shuffle = True)

In [68]:
test_target = torch.tensor(y_test.values.astype(np.float32))
test_target = test_target.flatten()
test = torch.tensor(X_test.values.astype(np.float32)) 
test_tensor = data_utils.TensorDataset(test, test_target) 
test_loader = data_utils.DataLoader(dataset = test_tensor, batch_size = batch_size, shuffle = True)

In [69]:
if torch.cuda.is_available(): 
 dev = "cuda:0" 
else: 
 dev = "cpu" 
device = torch.device(dev) 

In [70]:
class Net(nn.Module):
  def __init__(self):
    super().__init__()
    self.fc1 = nn.Linear(2500, 500)
    self.fc2 = nn.Linear(500, 2)

    self.relu = nn.ReLU()

  def forward(self, x):
    x = self.relu(self.fc1(x))
    x = self.fc2(x)
    return x 
  
net = Net().to(device)

In [71]:
lr = 0.0001
loss_func = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr = lr)

In [72]:
epochs = 25
for epoch in range(epochs):
  running_loss = 0.0
  for i, data in enumerate(train_loader, 0):
    inputs, labels = data
    labels = labels.type(torch.LongTensor)
    inputs = inputs.to(device)
    labels = labels.to(device)

    optimizer.zero_grad()

    outputs = net(inputs)
    loss = loss_func(outputs, labels)
    loss.backward()
    optimizer.step()

  train_correct = 0
  train_total = 0
  with torch.no_grad():
    for train_data in train_loader:
      train_inputs, train_labels = train_data
      train_inputs = train_inputs.to(dev)
      train_labels = train_labels.to(dev)

      train_outputs = net(train_inputs)
      _, predicted = torch.max(train_outputs.data, 1)
      train_total += train_labels.size(0)
      train_correct += (predicted == train_labels).sum().item()
  print(f'Epoch: {epoch}')
  print(f'Training accuracy: {100 * train_correct // train_total} %')

  test_total = 0
  test_correct = 0 
  net.eval()
  with torch.no_grad():
    for test_data in test_loader:
      test_inputs, test_labels = test_data
      test_inputs = test_inputs.to(dev)
      test_labels = test_labels.to(dev)

      test_outputs = net(test_inputs)
      _, predicted = torch.max(test_outputs.data, 1)
      test_total += test_inputs.size(0)
      test_correct += (predicted == test_labels).sum().item()
  print(f'Testing accuracy: {100 * test_correct // test_total} %')

print('Finished Training')

Epoch: 0
Training accuracy: 86 %
Testing accuracy: 86 %
Epoch: 1
Training accuracy: 88 %
Testing accuracy: 88 %
Epoch: 2
Training accuracy: 90 %
Testing accuracy: 89 %
Epoch: 3
Training accuracy: 90 %
Testing accuracy: 89 %
Epoch: 4
Training accuracy: 91 %
Testing accuracy: 89 %
Epoch: 5
Training accuracy: 91 %
Testing accuracy: 89 %
Epoch: 6
Training accuracy: 91 %
Testing accuracy: 89 %
Epoch: 7
Training accuracy: 91 %
Testing accuracy: 89 %
Epoch: 8
Training accuracy: 92 %
Testing accuracy: 89 %
Epoch: 9
Training accuracy: 92 %
Testing accuracy: 90 %
Epoch: 10
Training accuracy: 92 %
Testing accuracy: 89 %
Epoch: 11
Training accuracy: 92 %
Testing accuracy: 90 %
Epoch: 12
Training accuracy: 92 %
Testing accuracy: 90 %
Epoch: 13
Training accuracy: 92 %
Testing accuracy: 89 %
Epoch: 14
Training accuracy: 92 %
Testing accuracy: 89 %
Epoch: 15
Training accuracy: 93 %
Testing accuracy: 90 %
Epoch: 16
Training accuracy: 93 %
Testing accuracy: 90 %
Epoch: 17
Training accuracy: 93 %
Testing