# 🚢 Titanic Dataset: Model Evaluation & Cross-Validation

In this notebook, you'll:
- Load and clean the Titanic dataset
- Train models (Decision Tree, KNN)
- Evaluate them using cross-validation and classification metrics

## 📥 Load Titanic Dataset

In [None]:
import seaborn as sns
import pandas as pd

df = sns.load_dataset('titanic')
df = df.dropna(subset=['age', 'embarked', 'fare', 'sex'])

df['sex'] = df['sex'].map({'male': 0, 'female': 1})
df['embarked'] = df['embarked'].map({'S': 0, 'C': 1, 'Q': 2})

features = ['pclass', 'sex', 'age', 'fare', 'embarked']
X = df[features]
y = df['survived']

## 🔍 Train/Test Split and Scaling

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

## 🌳 Decision Tree: Evaluation

In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, confusion_matrix

tree = DecisionTreeClassifier(max_depth=4, random_state=42)
tree.fit(X_train, y_train)
y_pred_tree = tree.predict(X_test)

print("Decision Tree Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_tree))
print("\nClassification Report:")
print(classification_report(y_test, y_pred_tree))

## 🌀 KNN: Evaluation

In [None]:
from sklearn.neighbors import KNeighborsClassifier

knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)
y_pred_knn = knn.predict(X_test)

print("KNN Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_knn))
print("\nClassification Report:")
print(classification_report(y_test, y_pred_knn))

## 🔁 Cross-Validation on Entire Dataset

In [None]:
from sklearn.model_selection import cross_val_score

tree_cv_scores = cross_val_score(tree, X, y, cv=5, scoring='accuracy')
knn_cv_scores = cross_val_score(knn, X, y, cv=5, scoring='accuracy')

print("Tree CV scores:", tree_cv_scores)
print("Tree CV accuracy:", tree_cv_scores.mean())
print("\nKNN CV scores:", knn_cv_scores)
print("KNN CV accuracy:", knn_cv_scores.mean())