<a href="https://colab.research.google.com/github/Vivekshrotriya1/Capgemini-Training/blob/main/ML_Capstone_Project_1_28_02_26.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score




# Load Dataset

df = sns.load_dataset('titanic')

# Select required columns
df = df[['survived','pclass','sex','age','sibsp','parch','fare']]

# Handle missing values
df['age'].fillna(df['age'].median(), inplace=True)

# Convert categorical to numeric
df['sex'] = df['sex'].map({'male':0, 'female':1})

# Features and target
X = df.drop('survived', axis=1)
y = df['survived']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


# Scaling (For LR and KNN)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


#  Logistic Regression


print("Logistic Regression")


lr = LogisticRegression()
lr.fit(X_train_scaled, y_train)
lr_pred = lr.predict(X_test_scaled)

print("Accuracy:", accuracy_score(y_test, lr_pred))
print("\nClassification Report:\n")
print(classification_report(y_test, lr_pred))

print("Confusion Matrix:\n")
print(confusion_matrix(y_test, lr_pred))


#  KNN (k=5)


print(" K-Nearest Neighbors (k=5)")

knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train_scaled, y_train)
knn_pred = knn.predict(X_test_scaled)

print("Accuracy:", accuracy_score(y_test, knn_pred))
print("\nClassification Report:\n")
print(classification_report(y_test, knn_pred))

print("Confusion Matrix:\n")
print(confusion_matrix(y_test, knn_pred))


#  Decision Tree


print("Decision Tree (max_depth=4)")


dt = DecisionTreeClassifier(max_depth=4, random_state=42)
dt.fit(X_train, y_train)
dt_pred = dt.predict(X_test)

print("Accuracy:", accuracy_score(y_test, dt_pred))
print("\nClassification Report:\n")
print(classification_report(y_test, dt_pred))

print("Confusion Matrix:\n")
print(confusion_matrix(y_test, dt_pred))


#  Accuracy Comparison


print("Model Comparison")


print("Logistic Regression Accuracy :", accuracy_score(y_test, lr_pred))
print("KNN Accuracy                :", accuracy_score(y_test, knn_pred))
print("Decision Tree Accuracy      :", accuracy_score(y_test, dt_pred))

Logistic Regression
Accuracy: 0.7988826815642458

Classification Report:

              precision    recall  f1-score   support

           0       0.81      0.86      0.83       105
           1       0.78      0.72      0.75        74

    accuracy                           0.80       179
   macro avg       0.80      0.79      0.79       179
weighted avg       0.80      0.80      0.80       179

Confusion Matrix:

[[90 15]
 [21 53]]
 K-Nearest Neighbors (k=5)
Accuracy: 0.7877094972067039

Classification Report:

              precision    recall  f1-score   support

           0       0.81      0.83      0.82       105
           1       0.75      0.73      0.74        74

    accuracy                           0.79       179
   macro avg       0.78      0.78      0.78       179
weighted avg       0.79      0.79      0.79       179

Confusion Matrix:

[[87 18]
 [20 54]]
Decision Tree (max_depth=4)
Accuracy: 0.7988826815642458

Classification Report:

              precision    recall