In [10]:
import numpy as np
import pandas as pd


In [11]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report

In [12]:
# Loading the datasets
train_df = pd.read_csv('train.csv')
test_df = pd.read_csv('test.csv')
gender_submission_df = pd.read_csv('gender_submission.csv')

In [14]:
# Preprocessing: Handle missing values and encode categorical variables
train_df['Age'] = train_df['Age'].fillna(train_df['Age'].median())
train_df['Embarked'] = train_df['Embarked'].fillna(train_df['Embarked'].mode()[0])
train_df['Fare'] = train_df['Fare'].fillna(train_df['Fare'].median())

test_df['Age'] = test_df['Age'].fillna(test_df['Age'].median())
test_df['Fare'] = test_df['Fare'].fillna(test_df['Fare'].median())

# Convert categorical features to numerical values (e.g., 'Sex' and 'Embarked')
train_df['Sex'] = train_df['Sex'].map({'male': 0, 'female': 1})
test_df['Sex'] = test_df['Sex'].map({'male': 0, 'female': 1})
train_df = pd.get_dummies(train_df, columns=['Embarked'])
test_df = pd.get_dummies(test_df, columns=['Embarked'])


In [15]:
# Defining features and target variable
X = train_df[['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked_C', 'Embarked_Q', 'Embarked_S']]
y = train_df['Survived']
X_test = test_df[['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked_C', 'Embarked_Q', 'Embarked_S']]

In [16]:
# Split train data for validation
train_X, val_X, train_y, val_y = train_test_split(X, y, test_size=0.2, random_state=1)

In [17]:
# Random Forest Classifier
rf_model = RandomForestClassifier(random_state=1)
rf_model.fit(train_X, train_y)
rf_preds = rf_model.predict(val_X)
print("Random Forest Classifier")
print("Accuracy:", accuracy_score(val_y, rf_preds))
print("Classification Report:\n", classification_report(val_y, rf_preds))

Random Forest Classifier
Accuracy: 0.7932960893854749
Classification Report:
               precision    recall  f1-score   support

           0       0.79      0.90      0.84       106
           1       0.81      0.64      0.72        73

    accuracy                           0.79       179
   macro avg       0.80      0.77      0.78       179
weighted avg       0.80      0.79      0.79       179



In [19]:
# Logistic Regression
lr_model = LogisticRegression(max_iter=1000, random_state=1)
lr_model.fit(train_X, train_y)
lr_preds = lr_model.predict(val_X)
print("\nLogistic Regression")
print("Accuracy:", accuracy_score(val_y, lr_preds))
print("Classification Report:\n", classification_report(val_y, lr_preds))


Logistic Regression
Accuracy: 0.7988826815642458
Classification Report:
               precision    recall  f1-score   support

           0       0.82      0.84      0.83       106
           1       0.76      0.74      0.75        73

    accuracy                           0.80       179
   macro avg       0.79      0.79      0.79       179
weighted avg       0.80      0.80      0.80       179



In [20]:
# K-Nearest Neighbors
knn_model = KNeighborsClassifier(n_neighbors=5)
knn_model.fit(train_X, train_y)
knn_preds = knn_model.predict(val_X)
print("\nK-Nearest Neighbors")
print("Accuracy:", accuracy_score(val_y, knn_preds))
print("Classification Report:\n", classification_report(val_y, knn_preds))


K-Nearest Neighbors
Accuracy: 0.6871508379888268
Classification Report:
               precision    recall  f1-score   support

           0       0.70      0.84      0.76       106
           1       0.67      0.47      0.55        73

    accuracy                           0.69       179
   macro avg       0.68      0.65      0.65       179
weighted avg       0.68      0.69      0.67       179



In [21]:
final_preds = rf_model.predict(X_test)

In [22]:
submission = pd.DataFrame({
    'PassengerId': test_df['PassengerId'],
    'Survived': final_preds
})


In [24]:
submission.to_csv('submission.csv', index=False)
print("Submission file created as 'submission.csv'")

Submission file created as 'submission.csv'
