In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.pyplot as plt

from scipy import stats
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from keras.callbacks import EarlyStopping
from tensorflow.keras.layers import Dropout

In [2]:
path_train = "https://raw.githubusercontent.com/codehaku/Machine_Learning/main/Titanic/train.csv"
path_test = "https://raw.githubusercontent.com/codehaku/Machine_Learning/main/Titanic/test.csv"
dataframe0 = pd.read_csv(path_train)
dataframe1 = pd.read_csv(path_test)

# Set the maximum number of rows and columns to be displayed
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

# Display the full DataFrame
pd.options.display.width = 1000

#print(dataframe0.head(100))

In [3]:
# Select only the numerical columns in the dataframe0
numerical_columns = dataframe0.select_dtypes(include=[np.number]).columns.tolist()

dataframe0 = dataframe0.drop(['Cabin', 'Name', 'Ticket', 'Embarked'], axis=1)
dataframe0['Sex'] = dataframe0['Sex'].replace(['male','female'],[1,0])

mean_age = dataframe0['Age'].mean()
#print(mean_age)
dataframe0['Age'].fillna(mean_age, inplace=True)
#print(dataframe0.head(100))

# Perform one-hot encoding on the Pclass column
pclass_encoded = pd.get_dummies(dataframe0['Pclass'], prefix='class')
dataframe0 = pd.concat([dataframe0, pclass_encoded], axis=1)
dataframe0 = dataframe0.drop(['Pclass'], axis=1)
#print(dataframe0.head(100))

# Perform binary encoding on the SibSp and Parch columns
dataframe0['SibSp_binary'] = dataframe0['SibSp'].apply(lambda x: 1 if x > 0 else 0)
dataframe0['Parch_binary'] = dataframe0['Parch'].apply(lambda x: 1 if x > 0 else 0)
dataframe0 = dataframe0.drop(['SibSp', 'Parch'], axis=1)
#print(dataframe0.head(10))

In [4]:
# split training and testing data

train_data = dataframe0.sample(frac=0.8, random_state=0)
test_data = dataframe0.drop(train_data.index)

# training i/p o/p and testing i/p o/p

train_input = train_data[['Sex', 'Age', 'Fare', 'class_1','class_2', 'class_3', 'SibSp_binary', 'Parch_binary']]
train_output = train_data[['Survived']]

test_input = test_data[['Sex', 'Age', 'Fare', 'class_1','class_2', 'class_3', 'SibSp_binary', 'Parch_binary']]
test_output = test_data[['Survived']]

# Define the model architecture
model = Sequential()
model.add(Dense(16, input_shape=(8,), activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(8, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(1, activation='sigmoid'))

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) # binary_crossentropy
early_stopping = EarlyStopping(monitor='val_loss', patience=10)
model.fit(train_input, train_output, validation_split=0.2, epochs=50, batch_size=32, callbacks=[early_stopping])

test_loss_train = model.evaluate(train_input, train_output)
print("Test Loss Train:", test_loss_train)

test_loss_test = model.evaluate(test_input, test_output)
print("Test Loss Test:", test_loss_test)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Test Loss Train: [0.6322661638259888, 0.6690042018890381]
Test Loss Test: [0.6351441144943237, 0.6741573214530945]


In [5]:
dataframe1 = dataframe1.drop(['Cabin', 'Name', 'Ticket', 'Embarked'], axis=1)
dataframe1['Sex'] = dataframe1['Sex'].replace(['male','female'],[1,0])

mean_fare = dataframe1['Fare'].mean()
dataframe1['Fare'].fillna(mean_fare, inplace=True)

mean_age = dataframe1['Age'].mean()
dataframe1['Age'].fillna(mean_age, inplace=True)

# Perform one-hot encoding on the Pclass column
pclass_encoded = pd.get_dummies(dataframe1['Pclass'], prefix='class')
dataframe1 = pd.concat([dataframe1, pclass_encoded], axis=1)
dataframe1 = dataframe1.drop(['Pclass'], axis=1)
#print(dataframe1.head())

# Perform binary encoding on the SibSp and Parch columns
dataframe1['SibSp_binary'] = dataframe1['SibSp'].apply(lambda x: 1 if x > 0 else 0)
dataframe1['Parch_binary'] = dataframe1['Parch'].apply(lambda x: 1 if x > 0 else 0)
dataframe1 = dataframe1.drop(['SibSp', 'Parch'], axis=1)
#print(dataframe1.head())

In [6]:
test_ext_data = dataframe1

test_ext_input = test_ext_data[['Sex', 'Age', 'Fare', 'class_1','class_2', 'class_3', 'SibSp_binary', 'Parch_binary']]

# Get predictions for test data
test_predictions = model.predict(test_ext_input)

# Add predictions to the test dataframe
dataframe1['Survival Prediction'] = test_predictions.flatten()

predictions = np.round(model.predict(test_ext_input)).astype(int)

# Display the updated dataframe
#print(dataframe1.head(500))



In [7]:
predictions = model.predict(test_ext_input)
predictions = np.round(predictions).astype(int)
dataframe1['Survived'] = predictions
print(dataframe1.head())

   PassengerId  Sex   Age     Fare  class_1  class_2  class_3  SibSp_binary  Parch_binary  Survival Prediction  Survived
0          892    1  34.5   7.8292        0        0        1             0             0             0.306091         0
1          893    0  47.0   7.0000        0        0        1             1             0             0.206004         0
2          894    1  62.0   9.6875        0        1        0             0             0             0.129904         0
3          895    1  27.0   8.6625        0        0        1             0             0             0.407526         0
4          896    0  22.0  12.2875        0        0        1             1             1             0.502174         1


In [8]:
#display(dataframe1[['PassengerId', 'Survived']])

In [9]:
# create a new DataFrame with only the PassengerId and Survived columns
output_dataframe1 = dataframe1[['PassengerId', 'Survived']]

# save the DataFrame to a CSV file
output_dataframe1.to_csv('output6.csv', index=False)

In [10]:
import os
print(os.getcwd())

E:\Work\Code\Projects\Jupyter_Notebook\Jupyter_Notebook\Kaggle_Projects\Titanic
