# Titanic- Machine Learning from Disaster
*Importing Libraries*

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

*Load train and test datasets*

In [2]:
train_df = pd.read_csv('train.csv')
test_df = pd.read_csv('test.csv')

*Basic data exploration*

In [3]:
print("Dataset Preview:")
print(train_df.head())

Dataset Preview:
   PassengerId  Survived  Pclass  \
0            1         0       3   
1            2         1       1   
2            3         1       3   
3            4         1       1   
4            5         0       3   

                                                Name     Sex   Age  SibSp  \
0                            Braund, Mr. Owen Harris    male  22.0      1   
1  Cumings, Mrs. John Bradley (Florence Briggs Th...  female  38.0      1   
2                             Heikkinen, Miss. Laina  female  26.0      0   
3       Futrelle, Mrs. Jacques Heath (Lily May Peel)  female  35.0      1   
4                           Allen, Mr. William Henry    male  35.0      0   

   Parch            Ticket     Fare Cabin Embarked  
0      0         A/5 21171   7.2500   NaN        S  
1      0          PC 17599  71.2833   C85        C  
2      0  STON/O2. 3101282   7.9250   NaN        S  
3      0            113803  53.1000  C123        S  
4      0            373450   8.0500   

**Handle missing values**

*Fill missing 'Age' with median*

In [4]:
train_df['Age'].fillna(train_df['Age'].median(), inplace=True)
test_df['Age'].fillna(test_df['Age'].median(), inplace=True)

*Fill missing 'Embarked' with mode*

In [5]:
train_df['Embarked'].fillna(train_df['Embarked'].mode()[0], inplace=True)
test_df['Embarked'].fillna(test_df['Embarked'].mode()[0], inplace=True)

*Fill missing 'Fare' with median in case any missing in the test set*

In [6]:
train_df['Fare'].fillna(train_df['Fare'].median(), inplace=True)
test_df['Fare'].fillna(test_df['Fare'].median(), inplace=True)

*Create binary feature for Cabin availability*

In [7]:
train_df['CabinAvailable'] = train_df['Cabin'].apply(lambda x: 0 if pd.isnull(x) else 1)
test_df['CabinAvailable'] = test_df['Cabin'].apply(lambda x: 0 if pd.isnull(x) else 1)

*Drop unnecessary columns*

In [8]:
train_df.drop(columns=['PassengerId', 'Cabin', 'Ticket', 'Name'], inplace=True)
test_df.drop(columns=['Cabin', 'Ticket', 'Name'], inplace=True)

**Feature Engineering: Family size and IsAlone**

In [9]:
train_df['FamilySize'] = train_df['SibSp'] + train_df['Parch'] + 1
train_df['IsAlone'] = train_df['FamilySize'].apply(lambda x: 1 if x == 1 else 0)
test_df['FamilySize'] = test_df['SibSp'] + test_df['Parch'] + 1
test_df['IsAlone'] = test_df['FamilySize'].apply(lambda x: 1 if x == 1 else 0)

*One-hot encode categorical features*

In [10]:
train_df = pd.get_dummies(train_df, columns=['Sex', 'Embarked', 'Pclass'], drop_first=True)
test_df = pd.get_dummies(test_df, columns=['Sex', 'Embarked', 'Pclass'], drop_first=True)

**Define features (X) and target (y)**

In [11]:
X = train_df.drop('Survived', axis=1)
y = train_df['Survived']

*Split the data into training and test sets*

In [12]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

*Standardize the features*

In [13]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

**Build a simple neural network model**

In [14]:
model = Sequential()
model.add(Dense(64, activation='relu', input_dim=X_train_scaled.shape[1]))
model.add(Dense(32, activation='relu'))
model.add(Dense(1, activation='sigmoid'))  # Binary classification output layer

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


*Compile the model*

In [15]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

*Train the model*

In [16]:
model.fit(X_train_scaled, y_train, epochs=50, batch_size=32, validation_split=0.2, verbose=1)

Epoch 1/50
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 42ms/step - accuracy: 0.7134 - loss: 0.6331 - val_accuracy: 0.7902 - val_loss: 0.5435
Epoch 2/50
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.8129 - loss: 0.5262 - val_accuracy: 0.8042 - val_loss: 0.4817
Epoch 3/50
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.7770 - loss: 0.4766 - val_accuracy: 0.8112 - val_loss: 0.4419
Epoch 4/50
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.8155 - loss: 0.4495 - val_accuracy: 0.8252 - val_loss: 0.4199
Epoch 5/50
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - accuracy: 0.8437 - loss: 0.4147 - val_accuracy: 0.8322 - val_loss: 0.4008
Epoch 6/50
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - accuracy: 0.8249 - loss: 0.4099 - val_accuracy: 0.8392 - val_loss: 0.3968
Epoch 7/50
[1m18/18[0m [32m━━━━━━

<keras.src.callbacks.history.History at 0x79c53ceef130>

**Evaluate the model on the test set**

In [17]:
y_pred = (model.predict(X_test_scaled) > 0.5).astype("int32").flatten()

[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step 


**Print accuracy, classification report, and confusion matrix**

In [18]:
print(f"Neural Network Accuracy: {accuracy_score(y_test, y_pred):.2f}")
print("\nClassification Report:")
print(classification_report(y_test, y_pred))
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))

Neural Network Accuracy: 0.84

Classification Report:
              precision    recall  f1-score   support

           0       0.83      0.91      0.87       105
           1       0.86      0.74      0.80        74

    accuracy                           0.84       179
   macro avg       0.85      0.83      0.83       179
weighted avg       0.84      0.84      0.84       179


Confusion Matrix:
[[96  9]
 [19 55]]


**Align columns with training data (missing columns in test can be added as 0)**

In [19]:
missing_cols = set(X.columns) - set(test_df.columns)
for col in missing_cols:
    test_df[col] = 0

*Ensure the test set has the same columns as training set*

In [20]:
test_df = test_df[X.columns]

*Scale test data*

In [21]:
test_scaled = scaler.transform(test_df)

**Predict on the test set using the trained model**

In [22]:
test_predictions = (model.predict(test_scaled) > 0.5).astype("int32").flatten()

[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 


**Prepare the submission file**

In [23]:
submission = pd.DataFrame({
    'PassengerId': pd.read_csv('test.csv')['PassengerId'],
    'Survived': test_predictions
})

**Save the submission file**

In [24]:
submission.to_csv('submission.csv', index=False)
print("Submission file created: submission.csv")

Submission file created: submission.csv
