In [1]:
import itertools

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.cluster import KMeans
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import cross_val_score

import torch

sns.set_palette('deep')
random_state = 1

# Loading the Data

To focus on the PyTorch side of things and to keep this Kernel short and to the point, we are going to load the features from my previous kernel: [Surviving the Titanic step-by-step with groups](https://www.kaggle.com/dr1t10/surviving-the-titanic-step-by-step-with-groups).

In [2]:
train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")

These are the features we are going to use for the training set:

In [3]:
train.head()

Unnamed: 0,Survived,Pclass,AgeBand,IsMale,InGroup,InWcg,WcgAllSurvived,WcgAllDied
0,0,3,1,1,0,0,0,0
1,1,1,2,0,1,0,0,0
2,1,3,1,0,0,0,0,0
3,1,1,2,0,1,0,0,0
4,0,3,2,1,0,0,0,0


and the test set:

In [4]:
test.head()

Unnamed: 0,Pclass,AgeBand,IsMale,InGroup,InWcg,WcgAllSurvived,WcgAllDied
0,3,2,1,0,0,0,0
1,3,3,0,0,0,0,0
2,2,3,1,0,0,0,0
3,3,1,1,0,0,0,0
4,3,1,0,1,1,1,0


A short description of the features:
* **Pclass**: The ticket class;
* **AgeBand**: `Age` split into 4 bands using K-Means. Ranges from 0 to 3 with ascending `Age`;
* **IsMale**: `Sex` in binary form;
* **InGroup**: Is `1` if the passenger is in a group; otherwise `0`;
* **InWcg**: Is `1` if the passenger is in a woman-child-group; otherwise `0`;
* **WcgAllSurvived**: Equal to `1` if all members of its woman-child-group survived; otherwise `0`;
* **WcgAllDied**: The opposite of `WcgAllSurvived`.

# Neural Network with PyTorch

Split the dataset into training samples, training targets, and testing samples.

In [5]:
# Split the training set into samples and targets
X_train = np.array(train.drop(columns='Survived'))
Y_train = np.array(train['Survived'].astype(int))

# Test set samples to predict
X_test = np.array(test)

# Always important to check if the shapes agree with each other
print("Training samples shape: {}".format(X_train.shape))
print("Training targets shape: {}".format(Y_train.shape))
print("Test samples shape: {}".format(X_test.shape))

Training samples shape: (891, 7)
Training targets shape: (891,)
Test samples shape: (418, 7)


`numpy.array` to `torch.Tensor` conversion and model definition.

In [6]:
from sklearn.model_selection import KFold

def titanic_net(d_in, d_hidden, n_hidden, d_out):
    # Declare the input, hidden, and output layers
    input_layer = torch.nn.ModuleList([torch.nn.Linear(d_in, d_hidden), torch.nn.ReLU()])       
    hidden_layer = torch.nn.ModuleList([torch.nn.Linear(d_hidden, d_hidden), torch.nn.ReLU()])    
    output_layer = torch.nn.ModuleList([torch.nn.Linear(d_hidden, d_out)])    
    
    # Build the model
    layers = torch.nn.ModuleList()
    layers.extend(input_layer)
    for i in range(n_hidden):        
        layers.extend(hidden_layer)
    layers.extend(output_layer)
    
    return torch.nn.Sequential(*layers)

In [7]:
def fit(model, X, y, epochs=250, optim='adam', lr=0.001, use_cuda=True, verbose=0):
    valid_optims = ['sgd', 'rmsprop', 'adam']
    optim = optim.lower()
    if optim.lower() not in valid_optims:
        raise ValueError("invalid optimizer got '{0}' and expect one of {1}".format(optim, valid_optims))
    
    # Move model to CUDA if use_cuda is True
    if (use_cuda):
        X = X.cuda()
        y = y.cuda()
        model = model.cuda()
    
    # Define the loss function - we are dealing with a classification task with two classes
    # binary cross-entropy (BCE) is therefore the most appropriate loss function.
    # Within BCE we can use BCELoss or BCEWithLogitsLoss. The latter is more stable, so we'll
    # that one. It expects logits not predictions which is why our output layer doesn't have
    # an activation function
    loss_fn = torch.nn.BCEWithLogitsLoss()

    # Define the optiomization algorithm
    optim = optim.lower()
    if optim == 'sgd':
        optimizer = torch.optim.SGD(model.parameters(), lr=lr)
    elif optim == 'rmsprop':
        optimizer = torch.optim.RMSprop(model.parameters(), lr=lr)
    elif optim == 'adam':
        optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    # Training loop
    for t in range(epochs):
        # Forward pass: The model will return the logits, not predictions
        logits = model(X)

        # Compute loss from logits
        loss = loss_fn(logits, y)

        # Zero gradients, perform a backward pass, and update the weights.
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()    

        # We can get the tensor of predictions by applying the sigmoid non-linearity
        pred = torch.sigmoid(logits)

        # Compute training accuracy
        acc = torch.eq(y, pred.round_()).cpu().float().mean().item()
        
        if verbose > 1:
            print("Epoch {0:>{2}}/{1}: Loss={3:.4f}, Accuracy={4:.4f}"
                  .format(t + 1, epochs, len(str(epochs)), loss.item(), acc))
        
    if verbose > 0:
        print("Training complete! Loss={0:.4f}, Accuracy={1:.4f}".format(loss.item(), acc))

    return {'loss': loss.item(), 'acc': acc}

In [8]:
def cross_val_score(model, X, y, cv=3, optim='adam', lr=0.001, verbose=0):
    train_kfold = KFold(cv, False).split(X)
    score = {'loss': [], 'acc': []}
    for fold, (train_idx, val_idx) in enumerate(train_kfold):    
        X_train = X[train_idx]
        y_train = y[train_idx]
        X_val = X[val_idx]
        y_val = y[val_idx]

        # Convert the training data to Tensors
        X_train = torch.Tensor(X_train)
        y_train = torch.Tensor(y_train).unsqueeze_(-1)
        X_val = torch.Tensor(X_val)
        y_val = torch.Tensor(y_val).unsqueeze_(-1)

        # Train the network
        metrics = fit(model, X_train, y_train, optim=optim, lr=lr, verbose=0)
        score['loss'].append(metrics['loss'])
        score['acc'].append(metrics['acc'])
        if verbose > 1:
            print("Fold {0:>{2}}/{1}: Loss={3:.4f}, Accuracy={4:.4f}"
                  .format(fold + 1, cv, len(str(cv)), metrics['loss'], metrics['acc']))

    if verbose > 0:
        print("Mean k-fold accuracy:", np.mean(score['acc']))
        
    return score

In [9]:
def titanic_net_grid_search(X, y, param_grid, cv=3, verbose=0):
    # Cartesian product of a dictionary of lists
    # Source: https://stackoverflow.com/questions/5228158/cartesian-product-of-a-dictionary-of-lists
    grid_param = list((dict(zip(grid, param)) for param in itertools.product(*grid.values())))
    
    if verbose > 0:
        print("Fitting {0} folds for each of {1} candidates, totalling {2} fits"
             .format(n_folds, len(grid_param), n_folds * len(grid_param)))
        print()

    best_params = None
    best_model = None
    best_score = 0
    for candidate, params in enumerate(grid_param):
        if verbose > 1:
            print("Candidate", candidate + 1)
            print("Parameters: {}".format(params))

        # Model parameters and creation
        d_in = X_train.shape[-1]
        d_hidden = params['d_hidden']
        n_hidden = params['n_hidden']
        d_out = 1    
        model = titanic_net(d_in, d_hidden, n_hidden, d_out)

        # Cross-validation
        cv_score = cross_val_score(model, X_train, Y_train, cv = n_folds, verbose=0)
        cv_acc = np.mean(cv_score['acc'])
        if verbose > 1:
            print("Mean CV accuracy: {0:.4f}".format(cv_acc))    
            print()

        # Check if this  is the best model; if so, store it
        if cv_acc > best_score:
            best_params = params
            best_model = model
            best_score = cv_acc

    if verbose > 0:
        print("Best model")
        print("Parameters: {}".format(best_params))
        print("Mean CV accuracy: {0:.4f}".format(best_score))
    
    return {'best_model': best_model, 'best_params': best_params, 'best_score': best_score}

In [10]:
# Number of folds
n_folds = 10

# Grid search
grid = {
    'lr': [0.0001, 0.001, 0.01, 0.1],
    'd_hidden': [4, 7, 10, 15, 30],
    'n_hidden': [0, 1, 3, 6, 10, 20],
    'optim': ['RMSprop', 'Adam']
}
best_candidate = titanic_net_grid_search(X_train, Y_train, grid, cv=n_folds, verbose=1)
best_model = best_candidate['best_model']

Fitting 10 folds for each of 240 candidates, totalling 2400 fits

Best model
Parameters: {'lr': 0.0001, 'd_hidden': 30, 'n_hidden': 10, 'optim': 'RMSprop'}
Mean CV accuracy: 0.8682


# 6. Predictions and submission <a class="anchor" id="submission"></a>

In [11]:
# Convert test set from numpy to Tensor and generate predictions
X_test_t = torch.Tensor(X_test).cuda()

# Remember that the model outputs logits, we have to apply the sigmoid function and round the result
prediction = torch.sigmoid(best_model(X_test_t).detach()).cpu().round_().numpy().flatten()

# Submission
_test = pd.read_csv("../input/test.csv")
submission_df = pd.DataFrame({'PassengerId': _test['PassengerId'], 'Survived': prediction})
submission_df.to_csv("submission.csv", index=False)

# Storing the datasets
train.to_csv("submission_train.csv", index=False)
test.to_csv("submission_test.csv", index=False)