In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
def process_data(file_name):
  x, y = [], []
  f = open('/content/drive/MyDrive/data/' + file_name, encoding='utf-8')
  for line in f:
    _, label, sentence = line.split(" ", 2)
    x.append(sentence.rstrip('\n'))
    y.append(label)
  f.close()
  x = np.asarray(x)
  y = np.asarray(y,dtype=float)
  return x, y

In [3]:
import numpy as np

emo_vecs = np.load('/content/drive/MyDrive/data/emo_vecs.npy').astype(float)

# load sarcasm data
x_tr, y_tr = process_data('train_main.txt')
x_te, y_te = process_data('test_main.txt')

In [5]:
print(emo_vecs.shape)

# print(x_tr.shape)
# print(x_te.shape)
# print(x_tr.shape[0] + x_te.shape[0])

# emo_vecs contains training data and test data --> split into train, val, test
N = x_tr.shape[0]
emo_tr = emo_vecs[:N]
M = round(x_te.shape[0]/2)
emo_val = emo_vecs[N:N+M]
emo_tr_val = np.concatenate((emo_tr,emo_val))
emo_te = emo_vecs[N+M:]

print(emo_tr.shape)
print(emo_val.shape, emo_te.shape)
print(emo_tr_val.shape)

# split y_te into validation and test set
y_val = y_te[:M]
y_tr_val = np.concatenate((y_tr,y_val))
y_te = y_te[M:]
print(y_val.shape, y_te.shape)
print(y_tr_val.shape)

(321748, 6)
(257082, 6)
(32333, 6) (32333, 6)
(289415, 6)
(32333,) (32333,)
(289415,)


In [13]:
import torch

class NN():
  def __init__(self, h_size=100):
    self.model = torch.nn.Sequential(
        torch.nn.Linear(6, h_size),
        torch.nn.ReLU(),
        torch.nn.Linear(h_size, 2),
        torch.nn.Softmax(dim=1)
    )

  def predict(self, X):
    x = torch.from_numpy(X).float()
    out = self.model(x)
    y_pred = torch.argmax(out, dim=1).detach().numpy() # predict 0 or 1    
    return y_pred
  
  def fit(self, X, Y, epochs=200, step=1e-3, decay=5e-4):
    optimizer = torch.optim.Adam(params=self.model.parameters(), lr=step, weight_decay=decay)       
    loss_fn = torch.nn.CrossEntropyLoss()
    x = torch.from_numpy(X).float()
    y = torch.from_numpy(Y).long()

    print('h_size: {}\tdecay: {}'.format(h_size, decay))
    start_time = timeit.default_timer()
    for i in range(1,epochs+1):
      # forward pass
      out = self.model(x)
      # compute loss
      loss = loss_fn(out, y)
      # zero gradients before backward pass
      optimizer.zero_grad()
      # backward pass
      loss.backward()
      # update model parameters
      optimizer.step()

      elapsed_time = timeit.default_timer() - start_time
      if i%10 == 0:
          print('epoch {}/{} \tloss: {} \telapsed time: {:.3f}'.format(i, epochs, loss.item(), elapsed_time))

In [16]:
import timeit

h_size = 200
decay = 5e-4
step = 1e-3
epochs = 200

model = NN(h_size=h_size)
model.fit(emo_tr,y_tr,epochs=epochs,step=step,decay=decay)

h_size: 200	decay: 0.0005
epoch 10/200 	loss: 0.6935808062553406 	elapsed time: 4.563
epoch 20/200 	loss: 0.693896472454071 	elapsed time: 9.086
epoch 30/200 	loss: 0.692541241645813 	elapsed time: 13.700
epoch 40/200 	loss: 0.6926780939102173 	elapsed time: 18.275
epoch 50/200 	loss: 0.6927775144577026 	elapsed time: 22.807
epoch 60/200 	loss: 0.6924811601638794 	elapsed time: 27.345
epoch 70/200 	loss: 0.6926854252815247 	elapsed time: 31.896
epoch 80/200 	loss: 0.692669689655304 	elapsed time: 36.435
epoch 90/200 	loss: 0.692686140537262 	elapsed time: 40.974
epoch 100/200 	loss: 0.692670464515686 	elapsed time: 45.513
epoch 110/200 	loss: 0.6926088929176331 	elapsed time: 50.055
epoch 120/200 	loss: 0.6925734281539917 	elapsed time: 54.624
epoch 130/200 	loss: 0.6925066113471985 	elapsed time: 59.147
epoch 140/200 	loss: 0.6924343109130859 	elapsed time: 63.679
epoch 150/200 	loss: 0.6924031376838684 	elapsed time: 68.198
epoch 160/200 	loss: 0.6923456788063049 	elapsed time: 72.71

In [17]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.metrics import precision_score

y_pred = model.predict(emo_val)
print('accuracy: ', accuracy_score(y_pred, y_val))
print('f1: ', f1_score(y_pred, y_val))
print('precision: ', precision_score(y_pred, y_val))

accuracy:  0.5284693656635635
f1:  0.5699779996615332
precision:  0.6249768046019669


In [18]:
# retrain model on train+val set

model = NN(h_size=h_size)
model.fit(emo_tr_val, y_tr_val)

h_size: 200	decay: 0.0005
epoch 10/200 	loss: 0.6934453248977661 	elapsed time: 5.207
epoch 20/200 	loss: 0.6926261782646179 	elapsed time: 10.316
epoch 30/200 	loss: 0.6926724314689636 	elapsed time: 15.431
epoch 40/200 	loss: 0.6928812861442566 	elapsed time: 20.531
epoch 50/200 	loss: 0.692765474319458 	elapsed time: 25.645
epoch 60/200 	loss: 0.6928519606590271 	elapsed time: 30.758
epoch 70/200 	loss: 0.692750096321106 	elapsed time: 35.850
epoch 80/200 	loss: 0.6926742196083069 	elapsed time: 40.936
epoch 90/200 	loss: 0.6926004886627197 	elapsed time: 46.032
epoch 100/200 	loss: 0.692500114440918 	elapsed time: 51.133
epoch 110/200 	loss: 0.6924343109130859 	elapsed time: 56.259
epoch 120/200 	loss: 0.6923749446868896 	elapsed time: 61.353
epoch 130/200 	loss: 0.6922917366027832 	elapsed time: 66.458
epoch 140/200 	loss: 0.692272961139679 	elapsed time: 71.549
epoch 150/200 	loss: 0.692196786403656 	elapsed time: 76.671
epoch 160/200 	loss: 0.6921823620796204 	elapsed time: 81.7

In [19]:
y_pred = model.predict(emo_te)
print('accuracy: ', accuracy_score(y_pred, y_te))
print('f1: ', f1_score(y_pred, y_te))
print('precision: ', precision_score(y_pred, y_te))

accuracy:  0.5243868493489624
f1:  0.564411964649898
precision:  0.6162934554002227
