# Prediction using NN

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from tensorflow import keras
import os

## Load Dataset

In [None]:
data_folder_path = '/content/drive/MyDrive/Colab Notebooks/Data'

file_path = os.path.join(data_folder_path, 'train.csv')
train_df = pd.read_csv(file_path)

file_path = os.path.join(data_folder_path, 'test.csv')
test_df = pd.read_csv(file_path)

train_df.head(10)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S
5,6,0,3,"Moran, Mr. James",male,,0,0,330877,8.4583,,Q
6,7,0,1,"McCarthy, Mr. Timothy J",male,54.0,0,0,17463,51.8625,E46,S
7,8,0,3,"Palsson, Master. Gosta Leonard",male,2.0,3,1,349909,21.075,,S
8,9,1,3,"Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)",female,27.0,0,2,347742,11.1333,,S
9,10,1,2,"Nasser, Mrs. Nicholas (Adele Achem)",female,14.0,1,0,237736,30.0708,,C


## EDA & Cleaning

In [None]:
# Drop the columns that are not useful for the model - PassengerId, Name, Ticket
train_df = train_df.drop(['PassengerId','Name','Ticket'], axis=1)

train_df.head(10)

Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Cabin,Embarked
0,0,3,male,22.0,1,0,7.25,,S
1,1,1,female,38.0,1,0,71.2833,C85,C
2,1,3,female,26.0,0,0,7.925,,S
3,1,1,female,35.0,1,0,53.1,C123,S
4,0,3,male,35.0,0,0,8.05,,S
5,0,3,male,,0,0,8.4583,,Q
6,0,1,male,54.0,0,0,51.8625,E46,S
7,0,3,male,2.0,3,1,21.075,,S
8,1,3,female,27.0,0,2,11.1333,,S
9,1,2,female,14.0,1,0,30.0708,,C


In [None]:
# check for missing values
train_df.isnull().sum()

Survived      0
Pclass        0
Sex           0
Age         177
SibSp         0
Parch         0
Fare          0
Cabin       687
Embarked      2
dtype: int64

In [None]:
train_df.shape

(891, 9)

In [None]:
# drop empty embarked rows
train_df.dropna(subset=['Embarked'])

# fill missing age values with the mean
train_df['Age'] = train_df['Age'].fillna(train_df['Age'].mean())

# check for missing values
train_df.isnull().sum()

Survived      0
Pclass        0
Sex           0
Age           0
SibSp         0
Parch         0
Fare          0
Cabin       687
Embarked      2
dtype: int64

In [None]:
# Transform the categorical data into numerical data
train_df = pd.get_dummies(train_df, columns=['Sex','Embarked'], dtype='int64')

train_df.head(10)

Unnamed: 0,Survived,Pclass,Age,SibSp,Parch,Fare,Cabin,Sex_female,Sex_male,Embarked_C,Embarked_Q,Embarked_S
0,0,3,22.0,1,0,7.25,,0,1,0,0,1
1,1,1,38.0,1,0,71.2833,C85,1,0,1,0,0
2,1,3,26.0,0,0,7.925,,1,0,0,0,1
3,1,1,35.0,1,0,53.1,C123,1,0,0,0,1
4,0,3,35.0,0,0,8.05,,0,1,0,0,1
5,0,3,29.699118,0,0,8.4583,,0,1,0,1,0
6,0,1,54.0,0,0,51.8625,E46,0,1,0,0,1
7,0,3,2.0,3,1,21.075,,0,1,0,0,1
8,1,3,27.0,0,2,11.1333,,1,0,0,0,1
9,1,2,14.0,1,0,30.0708,,1,0,1,0,0


In [None]:
from sklearn.preprocessing import LabelEncoder

# Assuming 'Cabin' is a categorical column in your DataFrame 'train_df'
cabin_encoder = LabelEncoder()
train_df['Cabin'] = cabin_encoder.fit_transform(train_df['Cabin'].astype(str))

# Now, 'Cabin' is transformed into numerical values
train_df.head(10)

Unnamed: 0,Survived,Pclass,Age,SibSp,Parch,Fare,Cabin,Sex_female,Sex_male,Embarked_C,Embarked_Q,Embarked_S
0,0,3,22.0,1,0,7.25,147,0,1,0,0,1
1,1,1,38.0,1,0,71.2833,81,1,0,1,0,0
2,1,3,26.0,0,0,7.925,147,1,0,0,0,1
3,1,1,35.0,1,0,53.1,55,1,0,0,0,1
4,0,3,35.0,0,0,8.05,147,0,1,0,0,1
5,0,3,29.699118,0,0,8.4583,147,0,1,0,1,0
6,0,1,54.0,0,0,51.8625,129,0,1,0,0,1
7,0,3,2.0,3,1,21.075,147,0,1,0,0,1
8,1,3,27.0,0,2,11.1333,147,1,0,0,0,1
9,1,2,14.0,1,0,30.0708,147,1,0,1,0,0


In [None]:
train_df.shape

(891, 12)

## Model training

In [None]:
train_data = train_df.drop('Survived', axis=1)
train_labels = train_df['Survived']

In [None]:
train_data.head()

Unnamed: 0,Pclass,Age,SibSp,Parch,Fare,Cabin,Sex_female,Sex_male,Embarked_C,Embarked_Q,Embarked_S
0,3,22.0,1,0,7.25,147,0,1,0,0,1
1,1,38.0,1,0,71.2833,81,1,0,1,0,0
2,3,26.0,0,0,7.925,147,1,0,0,0,1
3,1,35.0,1,0,53.1,55,1,0,0,0,1
4,3,35.0,0,0,8.05,147,0,1,0,0,1


In [None]:
train_dataset = tf.data.Dataset.from_tensor_slices((train_data, train_labels))

model = tf.keras.Sequential([
    tf.keras.layers.Flatten(input_shape=(11,)),  # input layer (1)
    tf.keras.layers.Dense(256, activation='tanh'),  # hidden layer (2)
    tf.keras.layers.Dense(128, activation='tanh'),
    tf.keras.layers.Dense(10, activation='sigmoid') # output layer (3)
])

model.compile(optimizer='nadam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

model.fit(train_data, train_labels, epochs=100, batch_size=10)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.src.callbacks.History at 0x7c14130db0d0>

In [None]:
# evaluate model
model.evaluate(train_data, train_labels)



[0.33450770378112793, 0.8675645589828491]

## Model Prediction

In [None]:
test_df.head()

Unnamed: 0,PassengerId,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,892,3,"Kelly, Mr. James",male,34.5,0,0,330911,7.8292,,Q
1,893,3,"Wilkes, Mrs. James (Ellen Needs)",female,47.0,1,0,363272,7.0,,S
2,894,2,"Myles, Mr. Thomas Francis",male,62.0,0,0,240276,9.6875,,Q
3,895,3,"Wirz, Mr. Albert",male,27.0,0,0,315154,8.6625,,S
4,896,3,"Hirvonen, Mrs. Alexander (Helga E Lindqvist)",female,22.0,1,1,3101298,12.2875,,S


In [None]:
def prediction_to_kaggle_format(model, threshold=0.5):
    proba_survive = model.predict(test_data, verbose=0)[:,0]
    return pd.DataFrame({
        "PassengerId": test_df["PassengerId"],
        "Survived": (proba_survive >= threshold).astype(int)
    })

def make_submission(kaggle_predictions):
    path = os.path.join(data_folder_path, 'titanic_submission.csv')
    # path="data/tf_lead_submission.csv"
    kaggle_predictions.to_csv(path, index=False)
    print(f"Submission exported to {path}")

kaggle_predictions = prediction_to_kaggle_format(model)
make_submission(kaggle_predictions)
# !head /content/drive/MyDrive/Colab Notebooks/Data/titanic_submission.csv

Submission exported to /content/drive/MyDrive/Colab Notebooks/Data/titanic_submission.csv
