In [4]:
!rm -r ~/.kaggle
!mkdir ~/.kaggle
!mv ./kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

In [16]:
import torch
import pandas as pd 
import numpy as np
import torch.utils.data as data_utils

import torch.nn as nn
import torch.nn.functional as F

import torch.optim as optim

In [5]:
!kaggle datasets download -d bartoszmaj/x-train

Downloading x-train.zip to /content
100% 1.54G/1.54G [00:23<00:00, 78.7MB/s]
100% 1.54G/1.54G [00:23<00:00, 71.7MB/s]


In [6]:
!unzip x-train.zip

Archive:  x-train.zip
  inflating: X_train.csv             


In [7]:
!kaggle datasets download -d bartoszmaj/x-test

Downloading x-test.zip to /content
 99% 674M/678M [00:05<00:00, 154MB/s]
100% 678M/678M [00:05<00:00, 127MB/s]


In [8]:
!unzip x-test.zip

Archive:  x-test.zip
  inflating: X_test.csv              


In [9]:
X_train = pd.read_csv("X_train.csv")
X_test = pd.read_csv("X_test.csv")


In [47]:
y_train = pd.read_csv("y_train.csv")
y_test = pd.read_csv("y_test.csv")

In [48]:
batch_size = 32

In [49]:
y_train = y_train.replace("negative", 0)
y_train = y_train.replace("positive", 1)
y_test = y_test.replace("negative", 0)
y_test = y_test.replace("positive", 1)
y_train.head(), y_test.head()

(   sentiment
 0          0
 1          1
 2          0
 3          1
 4          0,
    sentiment
 0          1
 1          1
 2          0
 3          1
 4          0)

In [60]:
train_target = torch.tensor(y_train.values.astype(np.float32))
train_target = train_target.flatten()
train = torch.tensor(X_train.values.astype(np.float32)) 
train_tensor = data_utils.TensorDataset(train, train_target) 
train_loader = data_utils.DataLoader(dataset = train_tensor, batch_size = batch_size, shuffle = True)

In [61]:
test_target = torch.tensor(y_test.values.astype(np.float32))
test_target = test_target.flatten()
test = torch.tensor(X_test.values.astype(np.float32)) 
test_tensor = data_utils.TensorDataset(test, test_target) 
test_loader = data_utils.DataLoader(dataset = test_tensor, batch_size = batch_size, shuffle = True)

In [56]:
if torch.cuda.is_available(): 
 dev = "cuda:0" 
else: 
 dev = "cpu" 
device = torch.device(dev) 

In [96]:
class Net(nn.Module):
  def __init__(self):
    super().__init__()
    self.fc1 = nn.Linear(5100, 1000)
    self.fc2 = nn.Linear(1000, 500)
    self.fc3 = nn.Linear(500, 2)

    self.bn = nn.BatchNorm1d(1000)
    self.bn2 = nn.BatchNorm1d(500)

    self.dropout = nn.Dropout(0.4)

    self.relu = nn.ReLU()
  def forward(self, x):
    x = self.bn(self.fc1(x))
    x = self.dropout(x)
    x = self.bn2(self.fc2(x))
    x = self.dropout(x)
    x = self.fc3(x)
    return x 
  
net = Net().to(device)

In [97]:
lr = 0.001
loss_func = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr = lr)

In [None]:
epochs = 15
for epoch in range(epochs):
  running_loss = 0.0
  for i, data in enumerate(train_loader, 0):
    inputs, labels = data
    labels = labels.type(torch.LongTensor)
    inputs = inputs.to(device)
    labels = labels.to(device)

    optimizer.zero_grad()

    outputs = net(inputs)
    loss = loss_func(outputs, labels)
    loss.backward()
    optimizer.step()

  train_correct = 0
  train_total = 0
  with torch.no_grad():
    for train_data in train_loader:
      train_inputs, train_labels = train_data
      train_inputs = train_inputs.to(dev)
      train_labels = train_labels.to(dev)

      train_outputs = net(train_inputs)
      _, predicted = torch.max(train_outputs.data, 1)
      train_total += train_labels.size(0)
      train_correct += (predicted == train_labels).sum().item()
  print(f'Epoch: {epoch}')
  print(f'Training accuracy: {100 * train_correct // train_total} %')

  test_total = 0
  test_correct = 0 
  net.eval()
  with torch.no_grad():
    for test_data in test_loader:
      test_inputs, test_labels = test_data
      test_inputs = test_inputs.to(dev)
      test_labels = test_labels.to(dev)

      test_outputs = net(test_inputs)
      _, predicted = torch.max(test_outputs.data, 1)
      test_total += test_inputs.size(0)
      test_correct += (predicted == test_labels).sum().item()
  print(f'Testing accuracy: {100 * test_correct // test_total} %')

print('Finished Training')