<a href="https://colab.research.google.com/github/d4rkl0rd3r3b05/AI_ML/blob/main/Titanic_Survival_Predictor.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
from torch import nn
from torch import optim

import matplotlib.pyplot as plt

from pathlib import Path
import requests

import pandas as pd

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

## Read and analyse the data

In [None]:
test_data = pd.read_csv("data/test.csv")
train_data = pd.read_csv("data/train.csv")

# removed variables on which pred is not dependent
train_data = train_data.drop(columns = ["Name", "Ticket", "Fare", "Cabin"])
test_data = test_data.drop(columns = ["Name", "Ticket", "Fare", "Cabin"])

# dropping records where Embarked = null as number of such record is only 2 out of 891
train_data = train_data.dropna(subset=["Embarked"])

# filling null Age records with mean of Age field
train_data = train_data.fillna({"Age": 29.64})

# encoding the non-numeric values for modeling
label_encoder = LabelEncoder()
train_data['Sex'] = label_encoder.fit_transform(train_data['Sex'])
train_data['Embarked'] = label_encoder.fit_transform(train_data['Embarked'])

test_data['Sex'] = label_encoder.fit_transform(test_data['Sex'])
test_data['Embarked'] = label_encoder.fit_transform(test_data['Embarked'])

# train_data[train_data["Age"].isnull()].head()
# train_data["Embarked"].unique()
train_data.head()

Unnamed: 0,PassengerId,Survived,Pclass,Sex,Age,SibSp,Parch,Embarked
0,1,0,3,1,22.0,1,0,2
1,2,1,1,0,38.0,1,0,0
2,3,1,3,0,26.0,0,0,2
3,4,1,1,0,35.0,1,0,2
4,5,0,3,1,35.0,0,0,2


## Create model, criterion and optimizer

In [None]:
# Model for predicting survival

class Titanic_Survival_Preditor(nn.Module):
  def __init__(self, input_features, output_features, hidden_nodes = 8):
    super().__init__()

    self.layer = nn.Sequential(nn.Linear(in_features=input_features, out_features=hidden_nodes),
                               nn.ReLU(),
                               nn.Linear(in_features=hidden_nodes, out_features=hidden_nodes),
                               nn.ReLU(),
                               nn.Linear(in_features=hidden_nodes, out_features=output_features))

  def forward(self, x):
    return self.layer(x)


# Define accurac function for classification
def accuracy(y_true, y_pred):
  return torch.eq(y_true, y_pred).sum().item()*100/len(y_pred)

model = Titanic_Survival_Preditor(len(test_data.columns), 1)
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(params=model.parameters(), lr=0.01)

## Import Classification visulizer

In [None]:
if Path('classification_plotter.py').is_file():
  print("Visualizer already exist, skipping download")
else:
  print("Downloading the visualizer")
  data = requests.get("https://raw.githubusercontent.com/mrdbourke/pytorch-deep-learning/main/helper_functions.py")
  with open("classification_plotter.py", "wb") as file:
    file.write(data.content)

from classification_plotter  import plot_decision_boundary


def visualize_classification(classifier, X, y):
  plt.figure(figsize=(7, 7))

  plot_decision_boundary(classifier, X, y)

  plt.legend()

  plt.show()

Downloading the visualizer


## Write the torch flow to train the model

In [None]:
epoch = 1000

# Create a tensor for input data
X = torch.tensor(train_data.loc[:, train_data.columns != "Survived"].values, dtype=torch.float32)
y = torch.tensor(train_data["Survived"].values, dtype= torch.float32)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# visualize_classification(model, X_train, y_train)

for index in range(epoch):
  model.train()

  train_pred_logits = model(X_train).squeeze()
  train_pred = torch.round(torch.sigmoid(train_pred_logits))

# train_pred_logits[:5], y_train[:5]

  train_loss = criterion(train_pred_logits, y_train)
  train_accuracy = accuracy(y_train, train_pred)

  optimizer.zero_grad()
  train_loss.backward()
  optimizer.step()

  model.eval()
  with torch.inference_mode():
    test_pred_logits = model(X_test).squeeze()
    test_pred = torch.round(torch.sigmoid(test_pred_logits))


    test_loss = criterion(test_pred_logits, y_test)
    test_accuracy = accuracy(y_test, test_pred)

  if epoch % 10 == 0:
    print(f"Epoch: {epoch} Train Loss: {train_loss} Train Accuracy: {train_accuracy} Test Loss: {test_loss} Test Accuracy: {test_accuracy}")


Epoch: 1000 Train Loss: 0.3971070945262909 Train Accuracy: 82.41912798874824 Test Loss: 0.7189573049545288 Test Accuracy: 82.02247191011236
Epoch: 1000 Train Loss: 0.3970276713371277 Train Accuracy: 81.85654008438819 Test Loss: 0.7442452311515808 Test Accuracy: 77.52808988764045
Epoch: 1000 Train Loss: 0.39560794830322266 Train Accuracy: 82.55977496483825 Test Loss: 0.721842885017395 Test Accuracy: 80.89887640449439
Epoch: 1000 Train Loss: 0.3922722339630127 Train Accuracy: 82.70042194092827 Test Loss: 0.7476613521575928 Test Accuracy: 78.08988764044943
Epoch: 1000 Train Loss: 0.38999077677726746 Train Accuracy: 81.9971870604782 Test Loss: 0.739618182182312 Test Accuracy: 80.89887640449439
Epoch: 1000 Train Loss: 0.38830167055130005 Train Accuracy: 82.41912798874824 Test Loss: 0.7539021968841553 Test Accuracy: 79.7752808988764
Epoch: 1000 Train Loss: 0.3874589800834656 Train Accuracy: 82.70042194092827 Test Loss: 0.7492173314094543 Test Accuracy: 80.33707865168539
Epoch: 1000 Train Los

In [None]:
X_test_data = torch.tensor(test_data.values, dtype=torch.float32)

model.eval()
with torch.inference_mode():
  pred_test_data_logits = model(X_test_data)
  interim = torch.round(torch.sigmoid(pred_test_data_logits))
  pred_test_data = torch.tensor(torch.round(torch.sigmoid(pred_test_data_logits)), dtype=torch.int)


X_test_data[:5,:], pred_test_data[:5],

test_data['PassengerId'].shape, pred_test_data.squeeze().shape

solution = pd.DataFrame({'PassengerId': test_data['PassengerId'], 'Survived': pred_test_data.squeeze()})

solution['Survived'].unique()
interim.unique()

# solution.to_csv("solution.csv")

  pred_test_data = torch.tensor(torch.round(torch.sigmoid(pred_test_data_logits)), dtype=torch.int)


tensor([0., 1., nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
        nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
        nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
        nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
        nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan])