# Decision Tree Classifier on Titanic Dataset

## 1. Imports and Data

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
df = pd.read_csv('../data/processed/titanic_preprocessed.csv')

In [None]:
X = df.drop(columns='survived')
y = df['survived']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## 2. Train Decision Tree

In [None]:
tree_model = DecisionTreeClassifier(max_depth=4, random_state=42)
tree_model.fit(X_train, y_train)

## 3. Evaluate

In [None]:
y_pred = tree_model.predict(X_test)
print(classification_report(y_test, y_pred))

## 4. Confusion Matrix

In [None]:
cm = confusion_matrix(y_test, y_pred)
sns.heatmap(cm, annot=True, fmt='d')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix')
plt.show()

## 5. Feature Importances

In [None]:
importances = pd.Series(tree_model.feature_importances_, index=X.columns)
importances.sort_values().plot(kind='barh')
plt.title('Feature Importances')
plt.show()

In [None]:
plt.figure(figsize=(20, 10))
plot_tree(tree_model, feature_names=X.columns, class_names=["Not Survived", "Survived"], filled=True)
plt.show()