In [99]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, LeakyReLU

In [100]:
# Function to preprocess data
def preprocess_data(data):
    data['Age'].fillna(data['Age'].median(), inplace=True)
    data['HasCabin'] = data['Cabin'].apply(lambda x: 0 if pd.isnull(x) else 1)
    data['Embarked'].fillna(data['Embarked'].mode()[0], inplace=True)
    data.drop(['Cabin', 'Ticket', 'Name', 'PassengerId'], axis=1, inplace=True)
    data_encoded = pd.get_dummies(data, columns=['Sex', 'Embarked', 'Pclass'], drop_first=True)
    return data_encoded

In [101]:
# Load and preprocess training data
train_data = pd.read_csv(r'C:\Users\Syed Asad\OneDrive\Desktop\Python Files\Data Analysis\Kaggle Competitions\Titanic Disaster\titanic\train.csv')
train_data_encoded = preprocess_data(train_data)

In [102]:
train_data_encoded.head()

Unnamed: 0,Survived,Age,SibSp,Parch,Fare,HasCabin,Sex_male,Embarked_Q,Embarked_S,Pclass_2,Pclass_3
0,0,22.0,1,0,7.25,0,1,0,1,0,1
1,1,38.0,1,0,71.2833,1,0,0,0,0,0
2,1,26.0,0,0,7.925,0,0,0,1,0,1
3,1,35.0,1,0,53.1,1,0,0,1,0,0
4,0,35.0,0,0,8.05,0,1,0,1,0,1


In [103]:
# Normalizing the data
scaler = MinMaxScaler()
cols_to_normalize = ['Age', 'SibSp', 'Parch', 'Fare']
train_data_encoded[cols_to_normalize] = scaler.fit_transform(train_data_encoded[cols_to_normalize])

In [104]:
train_data_encoded.head(10)

Unnamed: 0,Survived,Age,SibSp,Parch,Fare,HasCabin,Sex_male,Embarked_Q,Embarked_S,Pclass_2,Pclass_3
0,0,0.271174,0.125,0.0,0.014151,0,1,0,1,0,1
1,1,0.472229,0.125,0.0,0.139136,1,0,0,0,0,0
2,1,0.321438,0.0,0.0,0.015469,0,0,0,1,0,1
3,1,0.434531,0.125,0.0,0.103644,1,0,0,1,0,0
4,0,0.434531,0.0,0.0,0.015713,0,1,0,1,0,1
5,0,0.346569,0.0,0.0,0.01651,0,1,1,0,0,1
6,0,0.673285,0.0,0.0,0.101229,1,1,0,1,0,0
7,0,0.019854,0.375,0.166667,0.041136,0,1,0,1,0,1
8,1,0.334004,0.0,0.333333,0.021731,0,0,0,1,0,1
9,1,0.170646,0.125,0.0,0.058694,0,0,0,0,1,0


In [105]:
# Splitting into training and validation sets
X = train_data_encoded.drop('Survived', axis=1).values
y = train_data_encoded['Survived'].values
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

In [106]:
# Define the Keras model
model = Sequential()
model.add(Dense(32, input_shape=(X_train.shape[1],)))
model.add(LeakyReLU())
model.add(Dense(1, activation='sigmoid'))

In [107]:
# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [108]:
# Train the model
history = model.fit(X_train, y_train, epochs=8, validation_data=(X_val, y_val))

Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8


In [109]:
# Evaluate the model on the validation set
val_loss, val_accuracy = model.evaluate(X_val, y_val, verbose=0)
print(f"Validation Accuracy: {val_accuracy * 100:.2f}%")

Validation Accuracy: 75.98%


In [110]:
# Load and preprocess test data
test_data = pd.read_csv(r'C:\Users\Syed Asad\OneDrive\Desktop\Python Files\Data Analysis\Kaggle Competitions\Titanic Disaster\titanic\test.csv')
test_data.head()

Unnamed: 0,PassengerId,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,892,3,"Kelly, Mr. James",male,34.5,0,0,330911,7.8292,,Q
1,893,3,"Wilkes, Mrs. James (Ellen Needs)",female,47.0,1,0,363272,7.0,,S
2,894,2,"Myles, Mr. Thomas Francis",male,62.0,0,0,240276,9.6875,,Q
3,895,3,"Wirz, Mr. Albert",male,27.0,0,0,315154,8.6625,,S
4,896,3,"Hirvonen, Mrs. Alexander (Helga E Lindqvist)",female,22.0,1,1,3101298,12.2875,,S


In [111]:
passenger_ids = test_data['PassengerId']

In [112]:
# Preprocess the test data similar to the training data
test_data_encoded = preprocess_data(test_data)
test_data_encoded[cols_to_normalize] = scaler.transform(test_data_encoded[cols_to_normalize])
X_test = test_data_encoded.values

In [113]:
# Make predictions on the test set
predictions = model.predict(X_test)
predicted_classes = (predictions > 0.5).astype(int).flatten()



In [114]:
predicted_classes

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0,
       1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
       1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1,
       0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1,
       0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
       0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1,
       1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
       0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0,
       1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,
       0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,

In [115]:
# Preparing the submission dataframe
submission_df = pd.DataFrame({
    'PassengerId': passenger_ids,
    'Survived': predicted_classes
})

In [116]:
# Saving the submission to a CSV file
submission_file_path = r'C:\Users\Syed Asad\OneDrive\Desktop\Python Files\Data Analysis\Kaggle Competitions\Titanic Disaster\titanic\titanic_predictions_DNN.csv'
submission_df.to_csv(submission_file_path, index=False)