In [1]:
import os
import torch
from torch import nn
from torch.utils.data import DataLoader

### Improved Neural Network
The following model is the base for all other models created by me.
It provides following functions:
##### compile
Provides essential parameters for training the model.
###### Parameters:
- loss_function
- optimizer
- batch_size
- device - device on which model is trained and makes predictions (using cuda for faster computations is highly recommended)

##### train_on_data
Trains the model. After the training is complited, it prints "Done" message.
###### Parameters:
- train_dataset - PyTorch's Dataset for model training
- test_dataset - PyTorch's Dataset for model validation
- n_epochs - number of epochs for the data to be trained
- print_step - frequency of printing loss for the current batch

##### predict
Makes prediction for given data.
##### Parameters:
- test_data - PyTorch's Dataset containing data our model is to use to make prediction 

In [2]:
class ImprovedNeuralNetwork(nn.Module):
    def __train_loop(self, dataloader):
        self.train()
        for X, y in dataloader:
            X, y = X.to(self.device), y.to(self.device)
            predictions = self(X)
            loss = self.loss_function(
                predictions, 
                torch.unsqueeze(y, 1)
            )
            # Backpropagation
            loss.backward()
            self.optimizer.step()
            self.optimizer.zero_grad()
    def __test_loop(self, dataloader):
        self.eval()
        test_loss = 0
        with torch.no_grad():
            for X, y in dataloader:
                X, y = X.to(self.device), y.to(self.device)
                predictions = self(X)
                test_loss += self.loss_function(
                    predictions, 
                    torch.unsqueeze(y, 1)
                )
        # Dividing test_loss by number of batches
        test_loss /= len(dataloader)
        return test_loss
    
    def compile(self, loss_function, optimizer, batch_size, device):
        self.loss_function = loss_function
        self.optimizer = optimizer
        self.batch_size = batch_size
        self.device = device
        self.to(device)
        
    def train_on_data(self, train_dataset, test_dataset, n_epochs, print_step = 1):
        train_dataloader = DataLoader(
            train_dataset,
            batch_size = self.batch_size,
            shuffle = True
        )
        test_dataloader = DataLoader(
            test_dataset,
            batch_size = self.batch_size,
            shuffle = True
        )

        self.to(self.device)
        for i in range(n_epochs):
            self.__train_loop(train_dataloader)
            test_loss = self.__test_loop(test_dataloader)
            if (i + 1) % print_step == 0:
              print("Epoch", i, ", loss:", test_loss)
        print("Done.")
        self.to('cpu')


    def predict(self, test_dataset):
        self.to(self.device)
        test_dataset = test_dataset.to(self.device)
        with torch.no_grad():
            predictions = self(test_dataset)
        self.to('cpu')
        test_dataset = test_dataset.to('cpu')
        return predictions

### Simplified Neural Network
The following model simplifies the process of creating neural network. To create the model, one has to provide following paramters:
- input_shape - the input's shape
- layer_sizes - list of numer of neurons in each layer
The the creates a series of layers of neurons connected by PyTorch's GELU activation function.
I used GELU instead of ReLU, because it was proved to yield better results, while not decreasing the computational time significantly (also it has nice robabilistic interpretation).

In [3]:
class SimplifiedNeuralNetwork(ImprovedNeuralNetwork):
    def __init__(self, input_shape, layer_sizes):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(input_shape, layer_sizes[0]),
        )
        for i in range(1, len(layer_sizes)):
            self.linear_relu_stack.append(nn.GELU())
            self.linear_relu_stack.append(
                nn.Linear(layer_sizes[i - 1], layer_sizes[i])
            )
            
    def forward(self, x):
        x = self.flatten(x)
        return self.linear_relu_stack(x)

### BinaryRegressor
This is basically SimplifiedNeuralNetwork, but after passing data through the neural network, it passes it additionally through the PyTorch's sigmoid layer, so that each number it produces is in range [0, 1].

In [4]:
class BinaryRegressor(SimplifiedNeuralNetwork):
    def __init__(self, input_shape, layer_sizes):
        super().__init__(input_shape, layer_sizes)
        self.sigmoid = nn.Sigmoid()
    def forward(self, x):
        x = super().forward(x)
        return self.sigmoid(x)

### Usage example

Below I present an example of how to use the BinaryRegressor class (the presentation encompasses all methods that were added by me in other classes as well).
For the presentation I will use data from Titanic Competition that was held on kaggle.com.

In [5]:
import random
import pandas as pd
import numpy as np
import torch.nn.functional as F
from torch.utils.data import TensorDataset

In [6]:
train_data = pd.read_csv('train.csv')
test_data = pd.read_csv('test.csv')
train_data.set_index('PassengerId', inplace = True)
test_data.set_index('PassengerId', inplace = True)

Below I perform simple preprocessing. I will not go into details as it is not the purpose of this text.

In [7]:
features = ['Pclass', 'Parch', 'Fare']

train_data.dropna(axis = 'index', subset = ['Survived'], inplace = True)
y = train_data.Survived
X_full = train_data.drop(['Survived'], axis = 'columns')[features]
X_test = test_data[features]

random_subset = random.sample(range(1, train_data.shape[0]), train_data.shape[0] // 5)
X_valid, y_valid = X_full.loc[random_subset, :], y.loc[random_subset]
X_train, y_train = X_full.drop(random_subset), y.drop(random_subset)

We specify the shape of our input, which in case of tabular data is simply number of columns in the dataframe.

In [8]:
input_shape = X_train.shape[1]

We define our model and set training parameters.

In [9]:
model = BinaryRegressor(input_shape, [512, 256, 128, 64, 1])
model.compile(
    loss_function = nn.BCELoss(),  
    optimizer = torch.optim.Adam(model.parameters(), lr = 0.01),
    batch_size = 64,
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
)

We create Datasets from our pd.Dataframes.

In [10]:
train_dataset = TensorDataset(
    torch.tensor(X_train.values, dtype = torch.float),
    torch.tensor(y_train.values, dtype = torch.float)
)

valid_dataset = TensorDataset(
    torch.tensor(X_valid.values, dtype = torch.float),
    torch.tensor(y_valid.values, dtype = torch.float)
)

And finally we train our model.

In [11]:
model.train_on_data(train_dataset, valid_dataset, 10)

Epoch 0 , loss: tensor(0.6268, device='cuda:0')
Epoch 1 , loss: tensor(0.6035, device='cuda:0')
Epoch 2 , loss: tensor(0.5930, device='cuda:0')
Epoch 3 , loss: tensor(0.5808, device='cuda:0')
Epoch 4 , loss: tensor(0.5773, device='cuda:0')
Epoch 5 , loss: tensor(0.6631, device='cuda:0')
Epoch 6 , loss: tensor(0.5841, device='cuda:0')
Epoch 7 , loss: tensor(0.5663, device='cuda:0')
Epoch 8 , loss: tensor(0.5830, device='cuda:0')
Epoch 9 , loss: tensor(0.5631, device='cuda:0')
Done.


After training the model, we can make some predictions.

In [12]:
probabilities_predictions = model.predict(torch.tensor(X_test.values, dtype = torch.float))
probabilities_predictions[0:10].squeeze()

tensor([0.2300, 0.2311, 0.4035, 0.2370, 0.4093, 0.2512, 0.2297, 0.4550, 0.2303,
        0.4401], device='cuda:0')