## Training Torch models (Step by Step)

In [1]:
from models import *
import sklearn
from sklearn.preprocessing import LabelEncoder
from sklearn.pipeline import Pipeline
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
from skorch import NeuralNet, callbacks, NeuralNetClassifier
import numpy as np
from evaluator import Evaluator
from trainer import SkorchTrainer

Loading transformed data

Note: SkorchTrainer is a class in "trainer.py" which is responsible for training torch models. The presented NeuralNetClassifier classes are only showcases and the actual declarations are defined inside the trainer itself.

In [2]:
Data = np.loadtxt('data/transform_data.csv', skiprows=1, delimiter=',')
X, y = Data[:, :-1], Data[:, -1].astype('int')
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(X, y, test_size=0.2, random_state = 0)
trainer = SkorchTrainer(X_train,y_train)

### *Training Conv1D using skorch*

In [14]:
trainer.train_conv1d(Conv1dText,50)

Start training CONV1D
  epoch    train_acc    train_loss    valid_acc    valid_loss     dur
-------  -----------  ------------  -----------  ------------  ------
      1       [36m0.3569[0m        [32m1.0974[0m       [35m0.3569[0m        [31m1.0971[0m  2.4160
      2       0.3569        [32m1.0973[0m       0.3569        [31m1.0971[0m  1.7270
      3       0.3569        [32m1.0973[0m       0.3569        [31m1.0970[0m  1.8340
      4       0.3569        [32m1.0972[0m       0.3569        [31m1.0969[0m  1.7420
      5       0.3569        [32m1.0971[0m       0.3569        [31m1.0968[0m  1.7680
      6       0.3569        [32m1.0968[0m       0.3569        [31m1.0962[0m  1.7540
      7       0.3569        [32m1.0959[0m       0.3569        [31m1.0939[0m  1.6560
      8       [36m0.3817[0m        [32m1.0901[0m       [35m0.4233[0m        [31m1.0773[0m  1.7520
      9       [36m0.4087[0m        [32m1.0718[0m       [35m0.4236[0m        [31m1.0555[0m 

### *Training Conv2D using skorch*
Conv2D network which is called CBR_Network in "models.py" drops the last feature of tweet vectors to reshape the tensor into a squeared 7x7 matrix.

In [16]:
trainer.train_conv2d(CBR_Network,49,(7,7))

Start training CONV2D
  epoch    train_acc    train_loss    valid_acc    valid_loss     dur
-------  -----------  ------------  -----------  ------------  ------
      1       [36m0.3843[0m        [32m1.0861[0m       [35m0.4183[0m        [31m1.0661[0m  1.5470
      2       [36m0.4128[0m        [32m1.0635[0m       [35m0.4446[0m        [31m1.0490[0m  1.2140
      3       [36m0.4311[0m        [32m1.0520[0m       [35m0.4512[0m        [31m1.0401[0m  1.2880
      4       [36m0.4443[0m        [32m1.0445[0m       0.4459        [31m1.0390[0m  1.2800
      5       [36m0.4506[0m        [32m1.0367[0m       0.4502        [31m1.0364[0m  1.2480
      6       [36m0.4623[0m        [32m1.0329[0m       0.4512        1.0423  1.2150
      7       [36m0.4708[0m        [32m1.0281[0m       0.4509        [31m1.0346[0m  1.2580
      8       [36m0.4770[0m        [32m1.0234[0m       [35m0.4618[0m        [31m1.0345[0m  1.2810
      9       [36m0.4982[0m      

### *Training RNN using skorch*

In [9]:

input_size = 2
seq_length = X_train.shape[1]//input_size
hidden_size = 15
num_layers = 3
num_classes = 3
model = NeuralNetClassifier(module = RecurrentNN,
                            module__input_size = input_size,
                            module__hidden_size = hidden_size,
                            module__num_layers = num_layers,
                            module__seq_length  = seq_length,
                            module__num_classes = num_classes,
                            device = torch.device("cuda" if torch.cuda.is_available() else "cpu"),
                            criterion = nn.CrossEntropyLoss,
                            optimizer = optim.SGD,
                            lr = 0.008,
                            optimizer__momentum=0.9,
                            callbacks=[('tr_acc', callbacks.EpochScoring('accuracy',
                                                                         lower_is_better=False,
                                                                         on_train=True,
                                                                         name='train_acc'))],
                            batch_size = 2000,
                            max_epochs= 40)

Building Tensors out of numpy arrays

In [4]:
input_size = 2
trainer.train_rnn(RecurrentNN,params={
    'input_size': input_size,
    'seq_length': X_train.shape[1]//input_size,
    'hidden_size': 15,
    'num_layers': 3,
    'num_classes': 3
})


Start training RNN
  epoch    train_acc    train_loss    valid_acc    valid_loss     dur
-------  -----------  ------------  -----------  ------------  ------
      1       [36m0.3380[0m        [32m1.1014[0m       [35m0.3499[0m        [31m1.1012[0m  1.1320
      2       [36m0.3425[0m        [32m1.0967[0m       0.3453        [31m1.0942[0m  0.4240
      3       [36m0.3615[0m        [32m1.0907[0m       [35m0.4100[0m        [31m1.0881[0m  0.4850
      4       [36m0.4258[0m        [32m1.0835[0m       [35m0.4233[0m        [31m1.0820[0m  0.4310
      5       [36m0.4296[0m        [32m1.0779[0m       [35m0.4253[0m        [31m1.0761[0m  0.4900
      6       [36m0.4360[0m        [32m1.0722[0m       [35m0.4280[0m        [31m1.0708[0m  0.4400
      7       [36m0.4401[0m        [32m1.0668[0m       [35m0.4309[0m        [31m1.0660[0m  0.4890
      8       [36m0.4409[0m        [32m1.0620[0m       0.4286        [31m1.0613[0m  0.4500
      9    

### Training RNN using pytorch

In [101]:
torch.manual_seed(0) 
input_size = 2
seq_length = X_train.shape[1]//input_size
hidden_size = 15
num_layers = 3
num_classes = 3
learning_rate = 0.008
epoch_number = 40
batch_size = 2000
iteration = X_train.shape[0] // batch_size

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

RNN = RecurrentNN(input_size, hidden_size, num_layers, seq_length, num_classes).to(device)
optimizer = optim.SGD(RNN.parameters(), lr = learning_rate, momentum = 0.9 )
criterion = nn.CrossEntropyLoss()

epoch_errors = []
for i in range(epoch_number):
    for j in range(iteration):
        
        # Dividing batches
        X_train_batch = X_train[j: j+batch_size, :]
        y_train_batch = y_trn[j: j+batch_size]
        
        train_input = Variable(torch.Tensor(X_train_batch.reshape((X_train_batch.shape[0], seq_length, -1))),
                               requires_grad=True).to(device)
        train_output = Variable(torch.from_numpy(y_train_batch)).to(device)
        optimizer.zero_grad()                     
        # Forward propagation
        prediction = RNN(train_input)

        # Calculating loss
        loss = criterion(prediction, train_output.to(torch.long))
        epoch_errors.append(loss)
        # Back propagation
        loss.backward()
        optimizer.step()
    print("Epoch: %d, Training Loss: %1.5f" % (i, loss))


Epoch: 0, Training Loss: 1.09577
Epoch: 1, Training Loss: 1.08306
Epoch: 2, Training Loss: 1.06917
Epoch: 3, Training Loss: 1.05581
Epoch: 4, Training Loss: 1.04332
Epoch: 5, Training Loss: 1.03185
Epoch: 6, Training Loss: 1.02154
Epoch: 7, Training Loss: 1.01255
Epoch: 8, Training Loss: 1.00491
Epoch: 9, Training Loss: 0.99853
Epoch: 10, Training Loss: 0.99324
Epoch: 11, Training Loss: 0.98883
Epoch: 12, Training Loss: 0.98512
Epoch: 13, Training Loss: 0.98194
Epoch: 14, Training Loss: 0.97917
Epoch: 15, Training Loss: 0.97674
Epoch: 16, Training Loss: 0.97459
Epoch: 17, Training Loss: 0.97264
Epoch: 18, Training Loss: 0.97087
Epoch: 19, Training Loss: 0.96923
Epoch: 20, Training Loss: 0.96769
Epoch: 21, Training Loss: 0.96622
Epoch: 22, Training Loss: 0.96480
Epoch: 23, Training Loss: 0.96342
Epoch: 24, Training Loss: 0.96206
Epoch: 25, Training Loss: 0.96071
Epoch: 26, Training Loss: 0.95937
Epoch: 27, Training Loss: 0.95803
Epoch: 28, Training Loss: 0.95667
Epoch: 29, Training Loss

# Long Short Term Memory (LSTM)

### *Training LSTM using skorch*

In [14]:
#h_s=3
input_size = 2
seq_length = X_train.shape[1]//input_size
hidden_size = 3
num_layers = 2
num_classes = 3
model_lstm = NeuralNetClassifier(module = LSTM,
                            module__input_size = input_size,
                            module__hidden_size = hidden_size,
                            module__num_layers = num_layers,
                            module__seq_length  = seq_length,
                            module__num_classes = num_classes,
                            device = torch.device("cuda" if torch.cuda.is_available() else "cpu"),
                            criterion = nn.CrossEntropyLoss,
                            optimizer = optim.Adam,
                            lr = 0.008,
                            callbacks=[('tr_acc', callbacks.EpochScoring('accuracy',
                                                                         lower_is_better=False,
                                                                         on_train=True,
                                                                         name='train_acc'))],
                            batch_size = 2000,
                            max_epochs= 70)

In [9]:
trainer.train_lstm(LSTM,params={
    'input_size': input_size,
    'seq_length': X_train.shape[1]//input_size,
    'hidden_size': 3,
    'num_layers': 2,
    'num_classes': 3
})

Start training LSTM
  epoch    train_acc    train_loss    valid_acc    valid_loss     dur
-------  -----------  ------------  -----------  ------------  ------
      1       [36m0.3365[0m        [32m1.1039[0m       [35m0.3313[0m        [31m1.0987[0m  0.6560
      2       [36m0.3374[0m        [32m1.0942[0m       [35m0.4233[0m        [31m1.0798[0m  0.3920
      3       [36m0.4074[0m        [32m1.0742[0m       0.3954        [31m1.0597[0m  0.4590
      4       0.3998        [32m1.0653[0m       0.4114        [31m1.0548[0m  0.4310
      5       [36m0.4130[0m        [32m1.0599[0m       0.4160        [31m1.0490[0m  0.4850
      6       [36m0.4150[0m        [32m1.0533[0m       0.4177        [31m1.0424[0m  0.4190
      7       [36m0.4178[0m        [32m1.0474[0m       [35m0.4236[0m        [31m1.0367[0m  0.4730
      8       0.4174        [32m1.0402[0m       0.4170        [31m1.0345[0m  0.4190
      9       [36m0.4198[0m        [32m1.0357[0m   

## *Training LSTM-CNN*

In [3]:
trainer.train_rcnn(ConvRNN,params={
    'hidden_size': 30,
    'out_channels': 30,
    'shape':(5,10)
})

Start training LSTM-CNN
  epoch    train_acc    train_loss    valid_acc    valid_loss     dur
-------  -----------  ------------  -----------  ------------  ------
      1       [36m0.3431[0m        [32m1.0977[0m       [35m0.3569[0m        [31m1.0927[0m  2.1831
      2       [36m0.3756[0m        [32m1.0884[0m       [35m0.4193[0m        [31m1.0757[0m  0.5420
      3       [36m0.4012[0m        [32m1.0709[0m       0.4153        [31m1.0510[0m  0.6190
      4       [36m0.4036[0m        [32m1.0529[0m       [35m0.4220[0m        [31m1.0346[0m  0.5529
      5       [36m0.4108[0m        [32m1.0423[0m       [35m0.4256[0m        [31m1.0299[0m  0.5990
      6       [36m0.4154[0m        [32m1.0361[0m       0.4250        [31m1.0242[0m  0.5440
      7       0.4140        [32m1.0317[0m       0.4220        [31m1.0215[0m  0.5340
      8       0.4154        [32m1.0295[0m       0.4250        [31m1.0198[0m  0.6010
      9       0.4153        [32m1.0279[0