In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import emoji

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

from emo_utils import *
from test_utils import *

%matplotlib inline

In [2]:
emoji_dictionary = {0: ":red_heart:",    # :heart: prints a black instead of red heart depending on the font
                    1: ":baseball:",
                    2: ":smile:",
                    3: ":disappointed:",
                    4: ":fork_and_knife:"}

def my_label_to_emoji(label):
    """
    Converts a label (int or string) into the corresponding emoji code (string) ready to be printed
    """
    return emoji.emojize(emoji_dictionary[label], language="alias")

for i in range(5):
    print(my_label_to_emoji(i))

❤️
⚾
😄
😞
🍴


In [3]:
word_to_index, index_to_word, word_to_vec_map = read_glove_vecs('data/glove.6B.50d.txt')

# Baseline model

In [4]:
def sentence_to_avg(sentence):
    
    avg = np.zeros(VEC_SHAPE)
    
    count = 0
    for word in sentence.lower().split():
        if word in word_to_vec_map:
            avg += word_to_vec_map[word]
            count += 1

    if count > 0: avg /= count
    
    return avg

In [5]:
class Data(Dataset):
    
    def __init__(self, filename):
        super(Data, self).__init__()
        data = pd.read_csv(filename, header=None)
        self.X, self.Y = data[0], data[1].astype("int")
        self.avg = torch.FloatTensor(np.stack(self.X.apply(sentence_to_avg)))
        self.Y1h = F.one_hot(torch.tensor(self.Y.to_numpy()), NUM_CLASSES).float()
        
    def __len__(self):
        return len(self.X)
        
    def __getitem__(self, index):
        return self.avg[index], self.Y1h[index]
    
class Model(nn.Module):
    
    def __init__(self, in_features, out_features):
        super().__init__()
        self.linear = nn.Linear(in_features, out_features, bias=True)
        self._initialize_weights()
    
    def _initialize_weights(self):
        nn.init.xavier_uniform_(self.linear.weight)
        nn.init.constant_(self.linear.bias, 0.0)
    
    def forward(self, x):
        return self.linear(x)


In [6]:
any_word = list(word_to_vec_map.keys())[0]
VEC_SHAPE = word_to_vec_map[any_word].shape
VEC_DIM = VEC_SHAPE[0]
NUM_CLASSES = 5

In [7]:
data_train = Data("data/train_emoji.csv")
data_test = Data("data/test_emoji.csv")

train_loader = DataLoader(data_train, batch_size=1, shuffle=True)
test_loader = DataLoader(data_train, batch_size=1, shuffle=True)

model = Model(VEC_DIM, NUM_CLASSES)

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

In [8]:
def train(model, train_loader, test_loader,
          criterion, optimizer, num_epochs):
    
    losses = []
    for epoch in range(num_epochs):
        running_loss = 0.
        
        for x, y in train_loader:
            
            optimizer.zero_grad()
            yhat = model(x)
            loss = criterion(yhat, y)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            
        epoch_loss = running_loss / len(train_loader)
        print(f"Epoch{epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}")
        losses.append(epoch_loss)
    
        if epoch % 100 == 0 or epoch==num_epochs-1:
            tp = 0
            for xx, yy in test_loader:    
                yyhat = model(xx)
                if yyhat.argmax() == yy.argmax(): tp+=1
            print(f"Accuracy: {tp/len(test_loader):.4f}")

    return losses

In [9]:
losses = train(model, train_loader, test_loader, criterion, optimizer, num_epochs=400)

Epoch1/400, Loss: 1.5049
Accuracy: 0.3864
Epoch2/400, Loss: 1.3859
Epoch3/400, Loss: 1.3151
Epoch4/400, Loss: 1.2229
Epoch5/400, Loss: 1.1813
Epoch6/400, Loss: 1.1155
Epoch7/400, Loss: 1.0588
Epoch8/400, Loss: 1.0268
Epoch9/400, Loss: 1.0020
Epoch10/400, Loss: 0.9658
Epoch11/400, Loss: 0.9373
Epoch12/400, Loss: 0.9119
Epoch13/400, Loss: 0.8881
Epoch14/400, Loss: 0.8614
Epoch15/400, Loss: 0.8514
Epoch16/400, Loss: 0.8314
Epoch17/400, Loss: 0.7980
Epoch18/400, Loss: 0.7972
Epoch19/400, Loss: 0.7846
Epoch20/400, Loss: 0.7636
Epoch21/400, Loss: 0.7443
Epoch22/400, Loss: 0.7415
Epoch23/400, Loss: 0.7306
Epoch24/400, Loss: 0.7177
Epoch25/400, Loss: 0.7072
Epoch26/400, Loss: 0.6925
Epoch27/400, Loss: 0.6805
Epoch28/400, Loss: 0.6817
Epoch29/400, Loss: 0.6724
Epoch30/400, Loss: 0.6597
Epoch31/400, Loss: 0.6559
Epoch32/400, Loss: 0.6438
Epoch33/400, Loss: 0.6438
Epoch34/400, Loss: 0.6318
Epoch35/400, Loss: 0.6262
Epoch36/400, Loss: 0.6097
Epoch37/400, Loss: 0.6040
Epoch38/400, Loss: 0.6043
Epoc

Epoch308/400, Loss: 0.2355
Epoch309/400, Loss: 0.2351
Epoch310/400, Loss: 0.2345
Epoch311/400, Loss: 0.2326
Epoch312/400, Loss: 0.2340
Epoch313/400, Loss: 0.2329
Epoch314/400, Loss: 0.2344
Epoch315/400, Loss: 0.2282
Epoch316/400, Loss: 0.2322
Epoch317/400, Loss: 0.2311
Epoch318/400, Loss: 0.2325
Epoch319/400, Loss: 0.2310
Epoch320/400, Loss: 0.2282
Epoch321/400, Loss: 0.2329
Epoch322/400, Loss: 0.2310
Epoch323/400, Loss: 0.2297
Epoch324/400, Loss: 0.2271
Epoch325/400, Loss: 0.2288
Epoch326/400, Loss: 0.2259
Epoch327/400, Loss: 0.2254
Epoch328/400, Loss: 0.2304
Epoch329/400, Loss: 0.2289
Epoch330/400, Loss: 0.2270
Epoch331/400, Loss: 0.2265
Epoch332/400, Loss: 0.2260
Epoch333/400, Loss: 0.2273
Epoch334/400, Loss: 0.2269
Epoch335/400, Loss: 0.2242
Epoch336/400, Loss: 0.2250
Epoch337/400, Loss: 0.2259
Epoch338/400, Loss: 0.2229
Epoch339/400, Loss: 0.2263
Epoch340/400, Loss: 0.2204
Epoch341/400, Loss: 0.2257
Epoch342/400, Loss: 0.2234
Epoch343/400, Loss: 0.2223
Epoch344/400, Loss: 0.2246
E