Imports

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import data_loading_code as pre
import numpy as np

Data loader:

In [2]:
 # get data, pre-process and split
data = pre.pd.read_csv("amazon_cells_labelled.txt", delimiter='\t', header=None)
data.columns = ['Sentence', 'Class']
data['index'] = data.index                                          # add new column index
columns = ['index', 'Class', 'Sentence']
data = pre.preprocess_pandas(data, columns)                             # pre-process
training_data, validation_data, training_labels, validation_labels = pre.train_test_split( # split the data into training, validation, and test splits
    data['Sentence'].values.astype('U'),
    data['Class'].values.astype('int32'),
    test_size=0.10,
    random_state=0,
    shuffle=True
)

# vectorize data using TFIDF and transform for PyTorch for scalability
word_vectorizer = pre.TfidfVectorizer(analyzer='word', ngram_range=(1,2), max_features=50000, max_df=0.5, use_idf=True, norm='l2')
training_data = word_vectorizer.fit_transform(training_data)        # transform texts to sparse matrix
training_data = training_data.todense()                             # convert to dense matrix for Pytorch
vocab_size = len(word_vectorizer.vocabulary_)
validation_data = word_vectorizer.transform(validation_data)
validation_data = validation_data.todense()
train_x_tensor = torch.from_numpy(np.array(training_data)).type(torch.FloatTensor)
train_y_tensor = torch.from_numpy(np.array(training_labels)).long()
validation_x_tensor = torch.from_numpy(np.array(validation_data)).type(torch.FloatTensor)
validation_y_tensor = torch.from_numpy(np.array(validation_labels)).long()


In [32]:


torch.set_printoptions(threshold=10_000)
print(len(training_data))

print(train_y_tensor[1])

900
tensor(1)


Network

In [41]:

class Net(nn.Module):
    def __init__(self):
        super(Net,self).__init__()
        self.network = nn.Sequential(
        nn.Linear(vocab_size,1000),
        nn.ReLU(),
        nn.Linear(1000,100),
        nn.ReLU(),
        nn.Linear(100,25),
        nn.ReLU(),
        nn.Linear(25,2)
        )
    
    def feedforward(self,input):
        return self.network(input)
    
    
def backward(model, epochs, optimizer, loss_function, train_x_tensor, train_y_tensor, validation_x_tensor, validation_y_tensor):
    train_Acc = 0
    train_Acc = 0
    validation_AccV = 0
    best_loss = 0
    
    for epoch in range(epochs):
        trainCorr = 0
        valCorr = 0
        for (sentens, labels) in zip(train_x_tensor,train_y_tensor):
            pred = model.feedforward(sentens)
            
            loss = loss_function(pred, labels)
            loss.backward()
            
            optimizer.step()
            optimizer.zero_grad()
            
            if best_loss > loss.item():
                best_loss = loss.item()
                best_model = torch.save("nlpBest")
                
            pred = torch.argmax(F.softmax(pred))
            trainCorr += torch.sum(pred==labels).item()
            train_Acc = trainCorr/len(training_data)
            
        model.eval()
        with torch.no_grad():   
            for (sentensv, labelsv) in zip(validation_x_tensor,validation_y_tensor):
                
                predv = model.feedforward(sentensv)
                lossv = loss_function(predv,labelsv)
                
                predv = torch.argmax(F.softmax(predv))
                valCorr += torch.sum(predv==labelsv).item()
                validation_AccV = valCorr/len(validation_data)
        print("epoch:", epoch, "Loss Training:", loss.item(), "Training acc:", train_Acc)
        print("epoch:", epoch, "Loss Validation:", lossv.item(),"Validation acc:", validation_AccV)
    return best_model
        
        
    
                
            

Implemntation:

In [42]:
epochs = 3
model = Net()


optimizer = torch.optim.AdamW(model.parameters(), lr = 0.001)
criterion = nn.CrossEntropyLoss()

trained_model = backward(model, epochs, optimizer, criterion, train_x_tensor, train_y_tensor, validation_x_tensor, validation_y_tensor)



  pred = torch.argmax(F.softmax(pred))
  predv = torch.argmax(F.softmax(predv))


epoch: 0 Loss Training: 0.5135350823402405 Training acc: 0.7111111111111111
epoch: 0 Loss Validation: 0.017608271911740303 Validation acc: 0.86
epoch: 1 Loss Training: 0.00019500737835187465 Training acc: 0.9844444444444445
epoch: 1 Loss Validation: 9.536738616588991e-07 Validation acc: 0.85
epoch: 2 Loss Training: 1.2397689715726301e-05 Training acc: 1.0
epoch: 2 Loss Validation: 1.1920928244535389e-07 Validation acc: 0.85
epoch: 3 Loss Training: 3.6954811548639555e-06 Training acc: 1.0
epoch: 3 Loss Validation: 0.0 Validation acc: 0.85
epoch: 4 Loss Training: 1.5497195136049413e-06 Training acc: 1.0
epoch: 4 Loss Validation: 0.0 Validation acc: 0.85
epoch: 5 Loss Training: 7.152555099310121e-07 Training acc: 1.0
epoch: 5 Loss Validation: 0.0 Validation acc: 0.84
epoch: 6 Loss Training: 3.576278118089249e-07 Training acc: 1.0
epoch: 6 Loss Validation: 0.0 Validation acc: 0.84
epoch: 7 Loss Training: 2.3841855067985307e-07 Training acc: 1.0
epoch: 7 Loss Validation: 0.0 Validation acc:

Test network: