In [1]:
import pandas as pd

def _load_glove_model(File='../models/glove_42B/glove.42B.300d.txt'):
    print("Loading Glove Model")
    df = pd.read_csv(File, sep=" ", quoting=3, header=None, index_col=0)
    gloveModel = {key: val.values for key, val in df.T.items()}
    print(len(gloveModel)," words loaded!")
    return gloveModel

In [2]:
%%time
GLOVE_MODEL = _load_glove_model()
len(GLOVE_MODEL.get("move", None))

Loading Glove Model
1917492  words loaded!
Wall time: 2min


300

In [3]:
%%time
GLOVE_MODEL = _load_glove_model(File="../models/glove_840B/glove.840B.300d.txt")
len(GLOVE_MODEL.get("move", None))

Loading Glove Model
2196009  words loaded!
Wall time: 2min 53s


300

In [4]:
%%time
GLOVE_MODEL = _load_glove_model(File="../models/glove_twitter_27B/glove.twitter.27B.25d.txt")
len(GLOVE_MODEL.get("move", None))

Loading Glove Model
1193513  words loaded!
Wall time: 25.1 s


25

In [5]:
%%time
GLOVE_MODEL = _load_glove_model(File="../models/glove_twitter_27B/glove.twitter.27B.50d.txt")
len(GLOVE_MODEL.get("move", None))

Loading Glove Model
1193513  words loaded!
Wall time: 31.4 s


50

In [6]:
%%time
GLOVE_MODEL = _load_glove_model(File="../models/glove_twitter_27B/glove.twitter.27B.100d.txt")
len(GLOVE_MODEL.get("move", None))

Loading Glove Model
1193513  words loaded!
Wall time: 41.9 s


100

In [7]:
%%time
GLOVE_MODEL = _load_glove_model(File="../models/glove_twitter_27B/glove.twitter.27B.200d.txt")
len(GLOVE_MODEL.get("move", None))

Loading Glove Model
1193513  words loaded!
Wall time: 1min 14s


200

In [8]:
import torch
from torch.autograd import Variable

DATA_PATH = '../data/'
df = pd.read_csv(f'{DATA_PATH}final_data.csv')


def _get_word_embeddings(word):
    return GLOVE_MODEL.get(word, None)

df['GLOVE'] = df["actual_words"].apply(_get_word_embeddings)
df.dropna(inplace=True)

x_data = df.loc[:, df.columns == 'GLOVE']
y_data = df.loc[:, df.columns == 'move']

# determine the supported device
def get_device():
    if torch.cuda.is_available():
        device = torch.device('cuda:0')
    else:
        device = torch.device('cpu') # don't have GPU 
    return device

# convert a df to tensor to be used in pytorch
def df_to_tensor(s_df):
    device = get_device()
    return Variable(torch.Tensor(s_df.values)).to(device)

def complex_df_to_tensor(_df):
    device = get_device()
    temp_x = []
    for index, row in _df.iterrows():
        temp_row = []
        row = row.to_dict()
        g_em = []
        for key in row.keys():
            if key != 'GLOVE':
                temp_row.append(row[key])
            else:
                g_em = Variable(torch.Tensor(row[key])).to(device)
        temp_x.append(torch.cat([
            Variable(torch.Tensor(temp_row)).to(device), 
            g_em]
            ,dim=0))

    return torch.stack(temp_x, 0)

y_data = df_to_tensor(y_data)
x_data = complex_df_to_tensor(x_data)

In [9]:
class SingleLayeredNN(torch.nn.Module):
    def __init__(self, input_size, hidden_size):
        super(SingleLayeredNN, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.fc1 = torch.nn.Linear(self.input_size, self.hidden_size)
        self.relu = torch.nn.ReLU()
        self.fc2 = torch.nn.Linear(self.hidden_size, 1)
        self.sigmoid = torch.nn.Sigmoid()

    def forward(self, x):
        hidden = self.fc1(x)
        relu = self.relu(hidden)
        output = self.fc2(relu)
        output = self.sigmoid(output)
        return output


In [10]:
model = SingleLayeredNN(200, 200)
criterion = torch.nn.BCELoss() # torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)

In [11]:
model.cuda()

device = get_device()

train_loss = []
for epoch in range(5000):
    model.train()
    # Forward pass
    y_pred = model(x_data)
     # Compute Loss
    loss = criterion(y_pred, y_data)
    
    train_loss.append(loss.item())
    
    optimizer.zero_grad()
    # Backward pass
    loss.backward()
    optimizer.step()

In [12]:
from sklearn.metrics import accuracy_score

y_pred = model(x_data)
y_data = y_data.cpu().detach().numpy() 
x_data = x_data.cpu().detach().numpy() 
y_pred = y_pred.cpu().detach().numpy() 

y_pred_labels = []
for p in y_pred:
    if p < 0.5:
        label = 0
    else:
        label = 1
    y_pred_labels.append(label)
    
accuracy_score(y_data, y_pred_labels)

1.0