In [5]:
import torch
import torch.utils.data
import torch.nn as nn
import torch.nn.functional as F
import re
import numpy as np
import os as os
import pandas as pd
import pickle

from pytorch_pretrained_bert import BertTokenizer, BertModel, BertForMaskedLM
# Load pre-trained model tokenizer (vocabulary)
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')


In [11]:
#load data and labels 

with open('train_tweet_embeddings.pkl', 'rb') as f:
    train_sentence_embeddings_list = pickle.load(f)
print(len(train_sentence_embeddings_list))

with open('test_tweet_embeddings.pkl', 'rb') as f:
    test_sentence_embeddings_list = pickle.load(f)
print(len(test_sentence_embeddings_list))


with open('train_labels.pkl', 'rb') as f:
    train_labels = pickle.load(f)
print(len(train_labels))

with open('test_labels.pkl', 'rb') as f:
    test_labels = pickle.load(f)
print(len(test_labels))

40000
10000
40000
10000


In [12]:
#Definition of the model

class CLSTM(nn.Module):
    def __init__(self):
        super(CLSTM,self).__init__()
        # expects input tensor of [1,1,768]
        # 1 input channel, 20 output channels, 1x5 convolution with padding
        self.conv1 = nn.Conv1d(1,20,5,stride = 2,padding = 2)
        # 20 input channel, 10 output channels, 1x5 convolution
        self.conv2 = nn.Conv1d(20,10,5,stride = 2,padding = 2)
        # end up with tensors [1,10,192]
        # these turn into tensors [1,10,48] after pooling which we will
        # these are turned into a list of 10 [1,48] tensors to pass into the lstm
        # each of these 10 will be taken in as token representations
        
        self.lstm = nn.LSTM(
            input_size = 48,hidden_size = 48,num_layers = 2,bidirectional = True)
        # the lstm will output tuple of ([4,1,48],[4,1,48]) where the first entry is
        # the hidden state for classification and the second entry is the cell state

        self.fc1 = nn.Linear(4*48,120)
        self.fc2 = nn.Linear(120,40)
        self.fc3 = nn.Linear(40,1)
    
    def forward(self,x):
        #set initial hidden state and cell state for lstm
        # 2 is for the number of layers and the other 2 is because of bidirection
        hidden = torch.randn(2*2,1,48)
        cell = torch.randn(2*2,1,48)
        
        x = F.max_pool1d(F.relu(self.conv1(x)),kernel_size = 2,stride = 2)
        x = F.max_pool1d(F.relu(self.conv2(x)),kernel_size = 2,stride = 2)
        # x is now a tensor of shape [1,10,48] because we did the pooling
        
        # now put the "tokens" into a list
        tweet_representation = []
        for i in range(len(x[0])):
            tweet_representation.append(x[0][i].unsqueeze(0))
        # tweet_representation is a list of the ten [1,48] tensors to
        # be passed into the lstm
        for j in tweet_representation:
            _, hidden_tuple = self.lstm(j.view(1,1,-1),(hidden,cell)) 

        # hidden state tensor [4,1,48]
        hidden_state = hidden_tuple[0]
        # flatten the hidden_state to tensor [192]
        x = hidden_state.reshape(-1)
        x = F.relu(self.fc1(x))
        # x is now 1 x 120
        x = F.relu(self.fc2(x))
        # x is now 1 x 30
        x = self.fc3(x)
        # x is now a single predicted class
        return torch.sigmoid(x)
model = CLSTM()
print(model)

CLSTM(
  (conv1): Conv1d(1, 20, kernel_size=(5,), stride=(2,), padding=(2,))
  (conv2): Conv1d(20, 10, kernel_size=(5,), stride=(2,), padding=(2,))
  (lstm): LSTM(48, 48, num_layers=2, bidirectional=True)
  (fc1): Linear(in_features=192, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=40, bias=True)
  (fc3): Linear(in_features=40, out_features=1, bias=True)
)


In [13]:
# small test of the model---------------
input = torch.randn(1,1,768)
out = model(input)
print(out)

tensor([0.5042], grad_fn=<SigmoidBackward>)


In [15]:
# This cell formats the input data
train_data = torch.stack(train_sentence_embeddings_list)
train_data.shape

torch.Size([40000, 768])

In [17]:
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(),lr = .01)

#10 is the number of epochs
total_step = len(train_data)
for epoch in range(100):
    running_loss = 0.0
    for i, data in enumerate(train_data,0):
        #inputs.shape is [1,1,768]
        inputs = data
        #label is int (1 or 0)
        label = train_labels[i]
        label = torch.tensor([float(label)])
        optimizer.zero_grad()
        inputs = inputs.unsqueeze(0).unsqueeze(0)
        output = model(inputs)
        loss = criterion(output,label)
        loss.backward()
        optimizer.step()
        
        #print stats
        running_loss +=loss.item()
        if (i+1) % 1000 == 0:
            print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                   .format(epoch+1, 100, i+1, total_step, loss.item()))
            running_loss = 0.0
print('finished training')

Epoch [1/2], Step [100/40000], Loss: 0.6646
Epoch [1/2], Step [200/40000], Loss: 0.6135
Epoch [1/2], Step [300/40000], Loss: 0.5444
Epoch [1/2], Step [400/40000], Loss: 0.5019
Epoch [1/2], Step [500/40000], Loss: 0.5556
Epoch [1/2], Step [600/40000], Loss: 0.5741
Epoch [1/2], Step [700/40000], Loss: 0.5499
Epoch [1/2], Step [800/40000], Loss: 0.5623
Epoch [1/2], Step [900/40000], Loss: 0.5455
Epoch [1/2], Step [1000/40000], Loss: 0.5723
Epoch [1/2], Step [1100/40000], Loss: 0.5177
Epoch [1/2], Step [1200/40000], Loss: 0.5136
Epoch [1/2], Step [1300/40000], Loss: 0.5264
Epoch [1/2], Step [1400/40000], Loss: 0.5591
Epoch [1/2], Step [1500/40000], Loss: 0.5272
Epoch [1/2], Step [1600/40000], Loss: 0.5974
Epoch [1/2], Step [1700/40000], Loss: 0.5373
Epoch [1/2], Step [1800/40000], Loss: 1.0309
Epoch [1/2], Step [1900/40000], Loss: 0.5066
Epoch [1/2], Step [2000/40000], Loss: 0.4755
Epoch [1/2], Step [2100/40000], Loss: 0.9405
Epoch [1/2], Step [2200/40000], Loss: 0.9172
Epoch [1/2], Step [

KeyboardInterrupt: 