# PyTorch solution

## Imports

In [78]:
import torch
from torch import nn
import pandas as pd
import numpy as np

from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.model_selection import KFold
from sklearn.model_selection import GridSearchCV

## Data

In [79]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [80]:
X_train = pd.read_csv('data/processed/X_train.csv')
y_train = pd.read_csv('data/processed/y_train.csv')
X_test = pd.read_csv('data/processed/X_test.csv')

X_train = torch.tensor(X_train.values, dtype=torch.float32, device=device)
y_train = torch.tensor(y_train.values, dtype=torch.float32, device=device)
X_test = torch.tensor(X_test.values, dtype=torch.float32, device=device)

In [81]:
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)

torch.Size([891, 15])
torch.Size([891, 1])
torch.Size([417, 15])


In [82]:
class_weights = compute_class_weight(class_weight='balanced',
                                     y=y_train.numpy().reshape(-1),
                                     classes=np.unique(y_train))
class_weights = torch.tensor(class_weights).type(torch.float32)
class_weights

tensor([0.8115, 1.3026])

## PyTorch model

### nn.Module class

In [83]:
class BCModel(nn.Module):
    def __init__(self, in_features: int, hidden_units: int=10, dropout_p: float=0.0):
        super().__init__()
        self.l1 = nn.Linear(in_features=in_features, out_features=hidden_units)
        self.l2 = nn.Linear(in_features=hidden_units, out_features=hidden_units)
        self.l3 = nn.Linear(in_features=hidden_units, out_features=1)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(dropout_p)
        self.sigmoid = nn.Sigmoid()
    
    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = self.relu(self.l1(x))
        x = self.dropout(x)
        x = self.relu(self.l2(x))
        x = self.l3(x)
        x = self.sigmoid(x)
        return x

In [84]:
model = BCModel(in_features=X_train.shape[1],
                hidden_units=100)

### Loss function & optimizer

In [85]:
loss_fn = nn.BCELoss()
optimizer = torch.optim.SGD(params=model.parameters(), lr=0.01, momentum=0.95)

### Training loop

In [86]:
def train(X: torch.Tensor, y: torch.Tensor, num_epochs: int, show_output: bool=False) -> None:
    header_shown = False

    for epoch in range(num_epochs):
        model.train()
        y_pred = model(X)
        loss = loss_fn(y_pred, y)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if show_output:
            if not header_shown:
                print('{:^15} | {:^15} | {:^15} | {:^15} | {:^15}'.format('Epoch', 'Loss', 'Precision', 'Recall', 'F1'))
                header_shown = True
            if (epoch+1) % 50 == 0:
                model.eval()
                with torch.inference_mode():
                    y_pred_test = model(X)
                    y_pred_test = torch.round(y_pred_test)
                    loss_test = loss_fn(y, y_pred_test)
                    precision = precision_score(y_true=y, y_pred=y_pred_test, zero_division=np.nan)
                    recall = recall_score(y_true=y, y_pred=y_pred_test, zero_division=np.nan)
                    f1 = f1_score(y_true=y, y_pred=y_pred_test, zero_division=np.nan)
                    # print(f'Epoch {epoch+1}: Loss: {loss_test:.4f} | Precision: {precision:.4f} | Recall: {recall:.4f} | F1-score: {f1:.4f}')
                    print('{:<15} | {:<15.4f} | {:<15.4f} | {:<15.4f} | {:<15.4f}'.format(epoch+1, loss_test, precision, recall, f1))

In [87]:
train(X_train, y_train, 1000, show_output=True)

     Epoch      |      Loss       |    Precision    |     Recall      |       F1       
50              | 24.6914         | 0.8675          | 0.4211          | 0.5669         
100             | 19.3042         | 0.7560          | 0.7339          | 0.7448         
150             | 17.0595         | 0.8084          | 0.7281          | 0.7662         
200             | 17.1717         | 0.8954          | 0.6257          | 0.7367         
250             | 16.1616         | 0.8322          | 0.7251          | 0.7750         


300             | 16.3861         | 0.8141          | 0.7427          | 0.7768         
350             | 16.7228         | 0.8083          | 0.7398          | 0.7725         
400             | 16.4983         | 0.8037          | 0.7544          | 0.7783         
450             | 16.3861         | 0.8984          | 0.6462          | 0.7517         
500             | 17.1717         | 0.7771          | 0.7749          | 0.7760         
550             | 16.7228         | 0.8755          | 0.6579          | 0.7513         
600             | 16.0494         | 0.8996          | 0.6550          | 0.7580         
650             | 14.7026         | 0.8552          | 0.7427          | 0.7950         
700             | 16.1616         | 0.8960          | 0.6550          | 0.7568         
750             | 14.9270         | 0.8828          | 0.7047          | 0.7837         
800             | 16.3861         | 0.7952          | 0.7719          | 0.7834         
850             | 15.3760       

## Hyperparameter tuning

### Building the TitanicClassifier

In [88]:
class TitanicClassifier(BaseEstimator, ClassifierMixin):
    
    def __init__(self, in_features: int, lr: float=0.01, num_epochs: int=1000,
                 hidden_units: int=100, dropout_p: float=0.0,
                 momentum: float=0.0, print_learn_output: bool=False):
        self.in_features = in_features
        self.lr = lr
        self.num_epochs = num_epochs
        self.hidden_units = hidden_units
        self.dropout_p = dropout_p
        self.momentum = momentum
        self.print_learn_output = print_learn_output
        
        self.model = BCModel(in_features=in_features, hidden_units=hidden_units, dropout_p=dropout_p)
        self.loss_fn = nn.BCELoss()
        self.optimizer = torch.optim.SGD(params=self.model.parameters(), lr=lr, momentum=momentum)
        self.classes_ = [0, 1]
    
    def fit(self, X, y):
        train(X, y, num_epochs=self.num_epochs, show_output=self.print_learn_output)
    
    def predict(self, X):
        with torch.inference_mode():
            y_pred = model(X)
            y_pred = torch.round(y_pred)
        return y_pred

In [89]:
titanic_gs = TitanicClassifier(in_features=X_train.shape[1], print_learn_output=True)

### GridSearchCV

In [118]:
param_grid = {
    'momentum': [0.7, 0.8],
    'dropout_p': [0, 0.1]
}

kfold = KFold(n_splits=4, shuffle=True)

grid_search = GridSearchCV(estimator=titanic_gs, param_grid=param_grid, cv=kfold, scoring='precision')
grid_search.fit(X_train, y_train)

print("Best Hyperparameters:", grid_search.best_params_)
print("Best Precision Score:", grid_search.best_score_)

     Epoch      |      Loss       |    Precision    |     Recall      |       F1       
50              | 10.7784         | 0.8532          | 0.8600          | 0.8566         
100             | 7.7844          | 0.9381          | 0.8480          | 0.8908         
150             | 9.5808          | 0.8875          | 0.8520          | 0.8694         
200             | 6.1377          | 0.9604          | 0.8720          | 0.9140         
250             | 6.7365          | 0.9184          | 0.9000          | 0.9091         
300             | 6.4371          | 0.9481          | 0.8760          | 0.9106         
350             | 7.6347          | 0.8933          | 0.9040          | 0.8986         
400             | 7.1856          | 0.9391          | 0.8640          | 0.9000         
450             | 5.9880          | 0.9487          | 0.8880          | 0.9174         
500             | 6.4371          | 0.9295          | 0.8960          | 0.9124         
550             | 7.0359        

KeyboardInterrupt: 

### Define another classifier with new hyperparameters

In [123]:
momentum = 0.7
dropout_p = 0.0
lr = 0.01
hidden_units = 30
num_epochs=500

titanic = TitanicClassifier(in_features=X_train.shape[1], lr=lr,
                            hidden_units=hidden_units, dropout_p=dropout_p,
                            momentum=momentum, print_learn_output=True,
                            num_epochs=num_epochs)

In [124]:
titanic.fit(X_train, y_train)

     Epoch      |      Loss       |    Precision    |     Recall      |       F1       
50              | 5.7239          | 0.9533          | 0.8947          | 0.9231         
100             | 10.4377         | 0.8761          | 0.8480          | 0.8618         
150             | 6.1728          | 0.9335          | 0.9035          | 0.9183         
200             | 5.6117          | 0.9591          | 0.8918          | 0.9242         
250             | 5.6117          | 0.9563          | 0.8947          | 0.9245         
300             | 9.2031          | 0.8892          | 0.8684          | 0.8787         
350             | 7.9686          | 0.9094          | 0.8801          | 0.8945         
400             | 5.6117          | 0.9591          | 0.8918          | 0.9242         
450             | 8.3053          | 0.8508          | 0.9503          | 0.8978         
500             | 5.8361          | 0.9739          | 0.8713          | 0.9198         


## Predict outputs in test dataset

In [125]:
y_test_pred = titanic.predict(X_test)

### Save the results to csv

In [117]:
y_test_pred_df = pd.DataFrame(y_test_pred.numpy())
y_test_pred_df.to_csv('results/y_test_predictions.csv', index=False, header=['Survived'])