# PyTorch solution

## Imports

In [1]:
import torch
from torch import nn
import pandas as pd
import numpy as np

from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.model_selection import KFold
from sklearn.model_selection import GridSearchCV

## Data

In [2]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [3]:
X_train = pd.read_csv('data/processed/X_train.csv')
y_train = pd.read_csv('data/processed/y_train.csv')
X_test = pd.read_csv('data/processed/X_test.csv')

X_train = torch.tensor(X_train.values, dtype=torch.float32, device=device)
y_train = torch.tensor(y_train.values, dtype=torch.float32, device=device)
X_test = torch.tensor(X_test.values, dtype=torch.float32, device=device)

In [4]:
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)

torch.Size([891, 15])
torch.Size([891, 1])
torch.Size([417, 15])


In [5]:
class_weights = compute_class_weight(class_weight='balanced',
                                     y=y_train.numpy().reshape(-1),
                                     classes=np.unique(y_train))
class_weights = torch.tensor(class_weights).type(torch.float32)
class_weights

tensor([0.8115, 1.3026])

## PyTorch model

### nn.Module class

In [6]:
class BCModel(nn.Module):
    def __init__(self, in_features: int, hidden_units: int=10, dropout_p: float=0.0):
        super().__init__()
        self.l1 = nn.Linear(in_features=in_features, out_features=hidden_units)
        self.l2 = nn.Linear(in_features=hidden_units, out_features=hidden_units)
        self.l3 = nn.Linear(in_features=hidden_units, out_features=1)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(dropout_p)
        self.sigmoid = nn.Sigmoid()
    
    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = self.relu(self.l1(x))
        x = self.dropout(x)
        x = self.relu(self.l2(x))
        x = self.l3(x)
        x = self.sigmoid(x)
        return x

In [7]:
model = BCModel(in_features=X_train.shape[1],
                hidden_units=100)

### Loss function & optimizer

In [8]:
loss_fn = nn.BCELoss()
optimizer = torch.optim.SGD(params=model.parameters(), lr=0.01, momentum=0.95)

### Training loop

In [9]:
def train(X: torch.Tensor, y: torch.Tensor, num_epochs: int, show_output: bool=False) -> None:
    header_shown = False

    for epoch in range(num_epochs):
        model.train()
        y_pred = model(X)
        loss = loss_fn(y_pred, y)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if show_output:
            if not header_shown:
                print('{:^15} | {:^15} | {:^15} | {:^15} | {:^15}'.format('Epoch', 'Loss', 'Precision', 'Recall', 'F1'))
                header_shown = True
            if (epoch+1) % 50 == 0:
                model.eval()
                with torch.inference_mode():
                    y_pred_test = model(X)
                    y_pred_test = torch.round(y_pred_test)
                    loss_test = loss_fn(y, y_pred_test)
                    precision = precision_score(y_true=y, y_pred=y_pred_test, zero_division=np.nan)
                    recall = recall_score(y_true=y, y_pred=y_pred_test, zero_division=np.nan)
                    f1 = f1_score(y_true=y, y_pred=y_pred_test, zero_division=np.nan)
                    # print(f'Epoch {epoch+1}: Loss: {loss_test:.4f} | Precision: {precision:.4f} | Recall: {recall:.4f} | F1-score: {f1:.4f}')
                    print('{:<15} | {:<15.4f} | {:<15.4f} | {:<15.4f} | {:<15.4f}'.format(epoch+1, loss_test, precision, recall, f1))

In [10]:
train(X_train, y_train, 1000, show_output=True)

     Epoch      |      Loss       |    Precision    |     Recall      |       F1       
50              | 21.7733         | 0.7151          | 0.7193          | 0.7172         
100             | 29.4052         | 0.8922          | 0.2661          | 0.4099         
150             | 20.7632         | 0.7175          | 0.7573          | 0.7368         
200             | 17.7329         | 0.7875          | 0.7368          | 0.7613         
250             | 16.9473         | 0.7975          | 0.7485          | 0.7722         
300             | 16.9473         | 0.7994          | 0.7456          | 0.7716         
350             | 16.6105         | 0.8070          | 0.7456          | 0.7751         
400             | 16.6105         | 0.8050          | 0.7485          | 0.7758         
450             | 16.3861         | 0.8403          | 0.7076          | 0.7683         
500             | 15.6004         | 0.8664          | 0.7018          | 0.7754         
550             | 16.2738       

## Hyperparameter tuning

### Building the TitanicClassifier

In [11]:
class TitanicClassifier(BaseEstimator, ClassifierMixin):
    
    def __init__(self, in_features: int, lr: float=0.01, num_epochs: int=1000,
                 hidden_units: int=100, dropout_p: float=0.0,
                 momentum: float=0.0, print_learn_output: bool=False):
        self.in_features = in_features
        self.lr = lr
        self.num_epochs = num_epochs
        self.hidden_units = hidden_units
        self.dropout_p = dropout_p
        self.momentum = momentum
        self.print_learn_output = print_learn_output
        
        self.model = BCModel(in_features=in_features, hidden_units=hidden_units, dropout_p=dropout_p)
        self.loss_fn = nn.BCELoss()
        self.optimizer = torch.optim.SGD(params=self.model.parameters(), lr=lr, momentum=momentum)
        self.classes_ = [0, 1]
    
    def fit(self, X, y):
        train(X, y, num_epochs=self.num_epochs, show_output=self.print_learn_output)
    
    def predict(self, X):
        with torch.inference_mode():
            y_pred = model(X)
            y_pred = torch.round(y_pred)
        return y_pred

In [12]:
titanic_gs = TitanicClassifier(in_features=X_train.shape[1], print_learn_output=True)

### GridSearchCV

In [13]:
param_grid = {
    'momentum': [0.7, 0.8],
    'dropout_p': [0, 0.1]
}

kfold = KFold(n_splits=4, shuffle=True)

grid_search = GridSearchCV(estimator=titanic_gs, param_grid=param_grid, cv=kfold, scoring='precision')
grid_search.fit(X_train, y_train)

print("Best Hyperparameters:", grid_search.best_params_)
print("Best Precision Score:", grid_search.best_score_)

     Epoch      |      Loss       |    Precision    |     Recall      |       F1       


50              | 15.8683         | 0.8785          | 0.6543          | 0.7500         
100             | 15.2695         | 0.8373          | 0.7202          | 0.7743         
150             | 17.9641         | 0.7269          | 0.8107          | 0.7665         
200             | 19.1617         | 0.9457          | 0.5021          | 0.6559         
250             | 16.0180         | 0.8333          | 0.6996          | 0.7606         
300             | 15.8683         | 0.8246          | 0.7160          | 0.7665         
350             | 15.4192         | 0.8333          | 0.7202          | 0.7726         
400             | 15.1198         | 0.8586          | 0.6996          | 0.7710         
450             | 15.2695         | 0.8507          | 0.7037          | 0.7703         
500             | 14.6707         | 0.8571          | 0.7160          | 0.7803         
550             | 14.6707         | 0.8718          | 0.6996          | 0.7763         
600             | 15.1198       

### Define another classifier with new hyperparameters

In [14]:
momentum = 0.7
dropout_p = 0.0
lr = 0.01
hidden_units = 30
num_epochs=500

titanic = TitanicClassifier(in_features=X_train.shape[1], lr=lr,
                            hidden_units=hidden_units, dropout_p=dropout_p,
                            momentum=momentum, print_learn_output=True,
                            num_epochs=num_epochs)

In [15]:
titanic.fit(X_train, y_train)

     Epoch      |      Loss       |    Precision    |     Recall      |       F1       


50              | 6.5095          | 0.9765          | 0.8509          | 0.9094         
100             | 7.6319          | 0.9692          | 0.8275          | 0.8927         
150             | 5.7239          | 0.9770          | 0.8713          | 0.9212         
200             | 11.8967         | 0.8172          | 0.8889          | 0.8515         
250             | 10.2132         | 0.8769          | 0.8538          | 0.8652         
300             | 6.1728          | 0.9362          | 0.9006          | 0.9180         
350             | 5.9484          | 0.9366          | 0.9064          | 0.9212         
400             | 6.1728          | 0.9527          | 0.8830          | 0.9165         
450             | 15.4882         | 0.7247          | 0.9620          | 0.8266         
500             | 7.9686          | 0.9532          | 0.8333          | 0.8892         


## Predict outputs in test dataset

In [16]:
y_test_pred = titanic.predict(X_test)

### Save the results to csv

In [25]:
from pathlib import Path
Path('./results').mkdir(parents=True, exist_ok=True)

In [26]:
y_test_pred_df = pd.DataFrame(y_test_pred.numpy())
y_test_pred_df.to_csv('results/y_test_predictions.csv', index=False, header=['Survived'])