# Neural Network Model for Tabular Data in PyTorch

In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
import torch
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.nn.functional as F

In [2]:
train_df = pd.read_csv("avila-tr.txt", header=None)
test_df = pd.read_csv("avila-ts.txt", header=None)

In [3]:
train_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10
0,0.266074,-0.16562,0.32098,0.483299,0.17234,0.273364,0.371178,0.929823,0.251173,0.159345,A
1,0.130292,0.870736,-3.210528,0.062493,0.261718,1.43606,1.46594,0.636203,0.282354,0.515587,A
2,-0.116585,0.069915,0.068476,-0.783147,0.261718,0.439463,-0.081827,-0.888236,-0.123005,0.582939,A
3,0.031541,0.2976,-3.210528,-0.58359,-0.721442,-0.307984,0.710932,1.051693,0.594169,-0.533994,A
4,0.229043,0.807926,-0.052442,0.082634,0.261718,0.14879,0.635431,0.051062,0.032902,-0.086652,F


In [4]:
col_names = ['col_'+str(j+1) for j in range(train_df.shape[1]-1)]
indep_cols = col_names.copy()
col_names.append('y')

In [5]:
train_df.columns = col_names
test_df.columns = col_names

In [6]:
train_df.head()

Unnamed: 0,col_1,col_2,col_3,col_4,col_5,col_6,col_7,col_8,col_9,col_10,y
0,0.266074,-0.16562,0.32098,0.483299,0.17234,0.273364,0.371178,0.929823,0.251173,0.159345,A
1,0.130292,0.870736,-3.210528,0.062493,0.261718,1.43606,1.46594,0.636203,0.282354,0.515587,A
2,-0.116585,0.069915,0.068476,-0.783147,0.261718,0.439463,-0.081827,-0.888236,-0.123005,0.582939,A
3,0.031541,0.2976,-3.210528,-0.58359,-0.721442,-0.307984,0.710932,1.051693,0.594169,-0.533994,A
4,0.229043,0.807926,-0.052442,0.082634,0.261718,0.14879,0.635431,0.051062,0.032902,-0.086652,F


In [7]:
le = LabelEncoder()
le.fit(train_df['y'])

LabelEncoder()

In [8]:
le.classes_

array(['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'W', 'X', 'Y'],
      dtype=object)

In [9]:
le.transform(le.classes_)

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])

In [10]:
train_df['y_enc'] = le.transform(train_df['y'])
test_df['y_enc'] = le.transform(test_df['y'])

In [11]:
train_df.head()

Unnamed: 0,col_1,col_2,col_3,col_4,col_5,col_6,col_7,col_8,col_9,col_10,y,y_enc
0,0.266074,-0.16562,0.32098,0.483299,0.17234,0.273364,0.371178,0.929823,0.251173,0.159345,A,0
1,0.130292,0.870736,-3.210528,0.062493,0.261718,1.43606,1.46594,0.636203,0.282354,0.515587,A,0
2,-0.116585,0.069915,0.068476,-0.783147,0.261718,0.439463,-0.081827,-0.888236,-0.123005,0.582939,A,0
3,0.031541,0.2976,-3.210528,-0.58359,-0.721442,-0.307984,0.710932,1.051693,0.594169,-0.533994,A,0
4,0.229043,0.807926,-0.052442,0.082634,0.261718,0.14879,0.635431,0.051062,0.032902,-0.086652,F,5


In [12]:
train_df.shape, test_df.shape

((10430, 12), (10437, 12))

In [13]:
class AvilaDataset(Dataset):
    def __init__(self, data_frame, indep_cols, dep_col):
        data_frame = data_frame.copy()
        self.x = data_frame.loc[:, indep_cols].copy().values.astype(np.float32)        
        self.y = data_frame.loc[:, dep_col].copy().values
        
    def __len__(self):
        return len(self.y)
    
    def __getitem__(self, idx):
        return self.x[idx], self.y[idx]

In [14]:
# Create train and validation dataloaders
train_ds = AvilaDataset(data_frame=train_df, indep_cols=indep_cols, dep_col='y_enc')
valid_ds = AvilaDataset(data_frame=test_df, indep_cols=indep_cols, dep_col='y_enc')

# Should be some exponent of 2 (128, 256)
batch_size = 256
train_dl = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
valid_dl = DataLoader(valid_ds, batch_size=batch_size, shuffle=False)

In [15]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [16]:
# Model Structure
# Try to remove relu from the last layer
# Don't put drop and bn in the name network

class AvilaNNModelV1(nn.Module):
    
    def __init__(self, comps, classes):
        super().__init__()
        self.comps = comps
        self.classes = classes
        self.bn1 = nn.BatchNorm1d(self.comps)
        self.lin1 = nn.Linear(self.comps, 50)
        self.drops = nn.Dropout(0.3)
        self.bn2 = nn.BatchNorm1d(50)
        self.lin2 = nn.Linear(50, 70)
        self.bn3 = nn.BatchNorm1d(70)
        self.lin3 = nn.Linear(70, self.classes)
    
    def forward(self, x):
        x = self.bn1(x)
        x = F.relu(self.lin1(x))
        x = self.drops(x)
        x = self.bn2(x)
        x = F.relu(self.lin2(x))
        x = self.drops(x)
        x = self.bn3(x)
#         logits = F.relu(self.lin3(x))
        logits = self.lin3(x)
        return logits

In [17]:
model = AvilaNNModelV1(10, 12)

In [18]:
model = model.to(device)

In [19]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=0)

In [20]:
# Training Function
def train(dataloader, model, loss_fn, optimizer):
    # model.train() # Having this line prevents my model accuracy going beyond 70%
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    train_loss = 0
    correct = 0
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)

        # Compute prediction error
        pred = model(X)
        loss = loss_fn(pred, y)
        
        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
        correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    
    train_loss /= num_batches
    correct /= size
    return 100*correct, train_loss

In [21]:
# Inference
def test(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    test_loss /= num_batches
    correct /= size
    return 100*correct, test_loss

In [22]:
epochs = 100
best_loss = 1000
for t in range(epochs):
    train_acc, train_loss = train(train_dl, model, loss_fn, optimizer)
    test_acc, test_loss = test(valid_dl, model, loss_fn)
    if test_loss < best_loss:Welcome
        # save model checkpoint
        best_epoch = t + 1
        torch.save({'epochs': best_epoch,
                    'model': model.state_dict(),
                    'optimizer': optimizer.state_dict()},
                   "avila_chkpoint.pth")
        best_loss = test_loss
    print(f"Epoch {t+1} Train Accuracy: {train_acc:>0.1f}%, Train Loss: {train_loss:>8f}, Test Accuracy: {test_acc:>0.1f}%, Test loss: {test_loss:>8f}")
print("Done!")

Epoch 1 Train Accuracy: 25.4%, Train Loss: 2.344850, Test Accuracy: 49.9%, Test loss: 1.937866
Epoch 2 Train Accuracy: 55.1%, Train Loss: 1.557557, Test Accuracy: 57.0%, Test loss: 1.301057
Epoch 3 Train Accuracy: 59.5%, Train Loss: 1.211687, Test Accuracy: 60.7%, Test loss: 1.149702
Epoch 4 Train Accuracy: 62.3%, Train Loss: 1.092546, Test Accuracy: 63.1%, Test loss: 1.069381
Epoch 5 Train Accuracy: 64.1%, Train Loss: 1.019719, Test Accuracy: 64.7%, Test loss: 1.005361
Epoch 6 Train Accuracy: 65.8%, Train Loss: 0.957880, Test Accuracy: 65.7%, Test loss: 0.951836
Epoch 7 Train Accuracy: 67.1%, Train Loss: 0.904176, Test Accuracy: 67.2%, Test loss: 0.906212
Epoch 8 Train Accuracy: 68.4%, Train Loss: 0.864039, Test Accuracy: 68.3%, Test loss: 0.868830
Epoch 9 Train Accuracy: 69.7%, Train Loss: 0.828271, Test Accuracy: 69.3%, Test loss: 0.841738
Epoch 10 Train Accuracy: 71.0%, Train Loss: 0.793414, Test Accuracy: 70.0%, Test loss: 0.812738
Epoch 11 Train Accuracy: 71.4%, Train Loss: 0.766

In [23]:
# Load checkpoint
best_model = AvilaNNModelV1(10, 12)
checkpoint = torch.load("avila_chkpoint.pth")
best_model.load_state_dict(checkpoint['model'])
best_model.eval()
print(f"Loaded model from epoch {checkpoint['epochs']}")

Loaded model from epoch 93


In [24]:
# Get predicted values from the validation data loader using the best model
preds = []
best_model.eval()
with torch.no_grad():
    for X, y in valid_dl:
        X, y = X.to(device), y.to(device)
        out = best_model(X)        
        prob = F.softmax(out, dim=1)
        preds.append(prob)

# Concatenate the batch outputs of probabilities
for j in range(len(preds)):
    if j == 0:
        pred_array = preds[j]
    else:
        pred_array = torch.cat((pred_array, preds[j]), 0)

In [25]:
# Get the predicted class based on the max probability
pred_array = pred_array.numpy()
pred_df = pd.DataFrame(pred_array, columns=le.classes_)
pred_df['pred_enc'] = pred_array.argmax(axis=1)
pred_df['pred_class'] = le.inverse_transform(pred_df['pred_enc'])
test_df['pred_class'] = pred_df['pred_class']

In [26]:
test_df[['y', 'pred_class']].head()

Unnamed: 0,y,pred_class
0,W,W
1,A,I
2,I,I
3,E,E
4,A,A


In [28]:
# save model
torch.save({'epochs': best_epoch,
            'model': best_model.state_dict(),
            'optimizer': optimizer.state_dict()},
            "avila_nn1.pth")

In [None]:
# load model
model2 = AvilaNNModelV1(10, 12)
checkpoint = torch.load("avila_nn1.pth")
model2.load_state_dict(checkpoint['model'])
model2.eval()

### Convert model to onnx format

In [29]:
import torch.onnx

In [30]:
# create input to the model
i = 1
for X, y in valid_dl:
    X, y = X.to(device), y.to(device)
    if i == 1:
        break

obs = X[0]
obs = obs.view(1, 10)

In [31]:
best_model.eval()
with torch.no_grad():
    obs_out = best_model(obs)

In [32]:
# Export the model
torch.onnx.export(best_model,               # model being run
                  obs,                         # model input (or a tuple for multiple inputs)
                  "avila_nn1.onnx",   # where to save the model (can be a file or file-like object)
                  export_params=True,        # store the trained parameter weights inside the model file
                  opset_version=10,          # the ONNX version to export the model to
                  input_names = ['input'],   # the model's input names
                  output_names = ['output'], # the model's output names
                  verbose=True)

graph(%input : Float(1, 10, strides=[10, 1], requires_grad=0, device=cpu),
      %bn1.weight : Float(10, strides=[1], requires_grad=1, device=cpu),
      %bn1.bias : Float(10, strides=[1], requires_grad=1, device=cpu),
      %bn1.running_mean : Float(10, strides=[1], requires_grad=0, device=cpu),
      %bn1.running_var : Float(10, strides=[1], requires_grad=0, device=cpu),
      %lin1.weight : Float(50, 10, strides=[10, 1], requires_grad=1, device=cpu),
      %lin1.bias : Float(50, strides=[1], requires_grad=1, device=cpu),
      %bn2.weight : Float(50, strides=[1], requires_grad=1, device=cpu),
      %bn2.bias : Float(50, strides=[1], requires_grad=1, device=cpu),
      %bn2.running_mean : Float(50, strides=[1], requires_grad=0, device=cpu),
      %bn2.running_var : Float(50, strides=[1], requires_grad=0, device=cpu),
      %lin2.weight : Float(70, 50, strides=[50, 1], requires_grad=1, device=cpu),
      %lin2.bias : Float(70, strides=[1], requires_grad=1, device=cpu),
      %bn3.weig