# **Titanic - Machine Learning from Disaster.**

Start here! Predict survival on the Titanic and get familiar with ML basics.

> [**Kaggle Dataset**](https://www.kaggle.com/competitions/titanic/data)



In [None]:
# Install Kaggle.
!pip install --upgrade --force-reinstall --no-deps kaggle

In [None]:
# Files Upload.
from google.colab import files

files.upload()

In [3]:
# Create a Kaggle Folder.
!mkdir ~/.kaggle

# Copy the kaggle.json to the folder created.
!cp kaggle.json ~/.kaggle/

# Permission for the json file to act.
!chmod 600 ~/.kaggle/kaggle.json

In [None]:
# Dataset Download.
!kaggle competitions download -c titanic

In [None]:
# Unzip Dataset.
!unzip titanic.zip

# **Titanic Classification using PyTorch.**

In [6]:
# Import Library.
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.autograd import Variable
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# Load Dataset.
train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")
sub = pd.read_csv("gender_submission.csv")


# Data Preprocessing.
def data_preprocess(dataframe):
    # Drop Unimportant Features.
    dataframe.drop(["PassengerId", "Name", "Ticket", "Cabin"], axis=1, inplace=True)

    # Encode Categorical Features.
    sex = pd.get_dummies(dataframe["Sex"], drop_first=True)
    embark = pd.get_dummies(dataframe["Embarked"], drop_first=True)

    dataframe = pd.concat([dataframe, sex, embark], axis=1)
    dataframe.drop(["Sex", "Embarked"], axis=1, inplace=True)

    # Handle Missing Values.
    dataframe.fillna(dataframe.mean(), inplace=True)

    return dataframe


# Apply Data Preprocessing.
train = data_preprocess(train)
test = data_preprocess(test)

# Split Dataset into Feature and Target Set.
X = train.iloc[:, 1:].values
y = train.iloc[:, 0].values

# Split Dataset into Training and Validation Set.
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

# Feature Scaling.
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)


# 1. Model Building (Binaryclass Classification).
class NeuralNetwork(nn.Module):
    def __init__(self, input_size, num_classes=2):
        super(NeuralNetwork, self).__init__()

        self.layer_1 = nn.Linear(input_size, 256)
        self.layer_2 = nn.Linear(256, 128)
        self.layer_3 = nn.Linear(128, 32)
        self.layer_out = nn.Linear(32, num_classes)

        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(p=0.3)
        self.batchnorm_1 = nn.BatchNorm1d(256)
        self.batchnorm_2 = nn.BatchNorm1d(128)
        self.batchnorm_3 = nn.BatchNorm1d(32)

    def forward(self, x):
        x = self.layer_1(x)
        x = self.batchnorm_1(x)
        x = self.relu(x)

        x = self.layer_2(x)
        x = self.batchnorm_2(x)
        x = self.relu(x)
        x = self.dropout(x)

        x = self.layer_3(x)
        x = self.batchnorm_3(x)
        x = self.relu(x)
        x = self.dropout(x)

        x = self.layer_out(x)

        return x


model = NeuralNetwork(input_size=X_train.shape[1])
print(model)


# 2. Model Hyperparameters.
num_epochs = 20
batch_size = 16
train_batch_num = len(X_train) // batch_size
test_batch_num = len(X_test) // batch_size
learning_rate = 0.001
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)


# 3. Model Training Loop.
train_epoch_loss = 0
val_epoch_loss = 0
val_loss_min = np.Inf

for epoch in range(1, num_epochs + 1):
    model.train()

    for i in range(train_batch_num):
        start = i * batch_size
        end = start + batch_size

        X_train_data = Variable(torch.FloatTensor(X_train[start:end]))
        y_train_data = Variable(torch.LongTensor(y_train[start:end]))

        # Forward Pass and Loss Calculation.
        optimizer.zero_grad()  # Clear Gradient.
        y_train_pred = model(X_train_data)
        train_loss = criterion(y_train_pred, y_train_data)

        # Backward Pass and Weight's Updation.
        train_loss.backward()
        optimizer.step()

        _, train_labels = torch.max(y_train_pred, 1)
        train_num_right = np.sum(train_labels.data.numpy() == y_train[start:end])
        train_epoch_loss += train_loss.item() * batch_size

    train_epoch_loss = train_epoch_loss / len(X_train)

    # torch.no_grad() tells PyTorch not to perform back-propagation, which reduces memory usage and speeds up computation.
    with torch.no_grad():
        model.eval()

        for i in range(test_batch_num):
            start = i * batch_size
            end = start + batch_size

            X_test_data = Variable(torch.FloatTensor(X_test[start:end]))
            y_test_data = Variable(torch.LongTensor(y_test[start:end]))

            # Forward Pass and Loss Calculation.
            y_test_pred = model(X_test_data)
            val_loss = criterion(y_test_pred, y_test_data)

            _, val_labels = torch.max(y_test_pred, 1)
            val_num_right = np.sum(val_labels.data.numpy() == y_test[start:end])
            val_epoch_loss += val_loss.item() * batch_size

    val_epoch_loss = val_epoch_loss / len(X_test)

    if epoch % 1 == 0:
        print(
            f"Epoch {epoch+0:03}: | Train Loss: {train_epoch_loss:.3f} | Val Loss: {val_epoch_loss:.3f} | Train Accuracy: {train_num_right/len(y_train[start:end]):.3f} | Val Accuracy: {val_num_right/len(y_test[start:end]):.3f}"
        )
        if val_epoch_loss <= val_loss_min:
            print(
                "Validation loss decreased ({:3f} ===> {:3f}). Saving the model...".format(
                    val_loss_min, val_epoch_loss
                )
            )
            torch.save(model.state_dict(), "titanic_model.pt")
            val_loss_min = val_epoch_loss
        print("")


print("Training Ended!")


# 4. Model Prediction.
test = test.iloc[:, :].values
test = Variable(torch.FloatTensor(test), requires_grad=False)

# Load PyTorch Model.
titanic_model = NeuralNetwork(input_size=X_test.shape[1])
titanic_model.load_state_dict(torch.load("titanic_model.pt"))
model.eval()

with torch.no_grad():
    predictions = model(test)

_, labels = torch.max(predictions, 1)
survived = labels.data.numpy()

# 5. Final Submission.
submission = pd.DataFrame({"PassengerId": sub["PassengerId"], "Survived": survived})
submission.to_csv("submission.csv", index=False)

NeuralNetwork(
  (layer_1): Linear(in_features=8, out_features=256, bias=True)
  (layer_2): Linear(in_features=256, out_features=128, bias=True)
  (layer_3): Linear(in_features=128, out_features=32, bias=True)
  (layer_out): Linear(in_features=32, out_features=2, bias=True)
  (relu): ReLU()
  (dropout): Dropout(p=0.3, inplace=False)
  (batchnorm_1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (batchnorm_2): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (batchnorm_3): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
Epoch 001: | Train Loss: 0.503 | Val Loss: 0.478 | Train Accuracy: 0.750 | Val Accuracy: 0.812
Validation loss decreased (inf ===> 0.477631). Saving the model...

Epoch 002: | Train Loss: 0.453 | Val Loss: 0.454 | Train Accuracy: 0.750 | Val Accuracy: 0.812
Validation loss decreased (0.477631 ===> 0.454448). Saving the model...

Epoch 003: | Train Loss: 0.416 | Val Lo