<a href="https://colab.research.google.com/github/damiangohrh123/ml_projects/blob/main/classification/titanic_classification_models_comparison.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
"""
Day 5: Titanic Survival Prediction Project
- Compare Logistic Regression, Decision Tree, Random Forest, SVM
- Show accuracy & confusion matrices
"""
import pandas as pd
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, confusion_matrix

# Load Titanic dataset from seaborn
titanic = sns.load_dataset("titanic")

# Drop rows where target variable is missing
titanic = titanic.dropna(subset=["survived"])

# Fill missing ages and fares
titanic["age"] = titanic["age"].fillna(titanic["age"].mean())
titanic["fare"] = titanic["fare"].fillna(titanic["fare"].mean())

# Encode categorical variables
titanic = pd.get_dummies(titanic, columns=["sex", "embarked"], drop_first=True)

# Features & target
X = titanic[["pclass", "age", "sibsp", "parch", "fare", "sex_male", "embarked_Q", "embarked_S"]]
y = titanic["survived"]

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Models
models = {
    "Logistic Regression": LogisticRegression(max_iter=200),
    "Decision Tree": DecisionTreeClassifier(max_depth=4),
    "Random Forest": RandomForestClassifier(n_estimators=100),
    "SVM": SVC(kernel="rbf")
}

# Train & evaluate
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    print(f"\n{name}:")
    print("Accuracy:", accuracy_score(y_test, y_pred))
    print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))


Logistic Regression:
Accuracy: 0.8100558659217877
Confusion Matrix:
 [[90 15]
 [19 55]]

Decision Tree:
Accuracy: 0.7988826815642458
Confusion Matrix:
 [[96  9]
 [27 47]]

Random Forest:
Accuracy: 0.8268156424581006
Confusion Matrix:
 [[92 13]
 [18 56]]

SVM:
Accuracy: 0.659217877094972
Confusion Matrix:
 [[99  6]
 [55 19]]
