# Assignment 9 — Decision Trees & Random Forests
Titanic & Heart Disease Classification

## Import Libraries

In [None]:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, f1_score


## Load Titanic Dataset (from seaborn)

In [None]:

import seaborn as sns
titanic = sns.load_dataset('titanic')

titanic.head()


## Preprocess Titanic Dataset

In [None]:

df = titanic[['survived','pclass','sex','age','sibsp','parch','fare','embarked']].dropna()

df['sex'] = df['sex'].astype('category').cat.codes
df['embarked'] = df['embarked'].astype('category').cat.codes

X = df.drop('survived', axis=1)
y = df['survived']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

X.head()


## Decision Tree — Train & Evaluate

In [None]:

tree = DecisionTreeClassifier(random_state=42)
tree.fit(X_train, y_train)

pred_tree = tree.predict(X_test)

print("Accuracy:", accuracy_score(y_test, pred_tree))
print(confusion_matrix(y_test, pred_tree))


## Visualize Decision Tree

In [None]:

plt.figure(figsize=(12,6))
plot_tree(tree, feature_names=X.columns, filled=True, max_depth=3)
plt.show()


## Pruned Decision Tree (max_depth=3)

In [None]:

tree_pruned = DecisionTreeClassifier(max_depth=3, random_state=42)
tree_pruned.fit(X_train, y_train)

pred_pruned = tree_pruned.predict(X_test)

print("Accuracy (pruned):", accuracy_score(y_test, pred_pruned))


## Random Forest Model

In [None]:

rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)

pred_rf = rf.predict(X_test)

print("Accuracy:", accuracy_score(y_test, pred_rf))


## Feature Importance (Random Forest)

In [None]:

feat_imp = pd.Series(rf.feature_importances_, index=X.columns).sort_values(ascending=False)
feat_imp.plot(kind='bar')
plt.title("Feature Importance")
plt.show()


## Evaluation Report

In [None]:

print(classification_report(y_test, pred_rf))


## Discussion
Write observations about pruning, feature importance, and Random Forest vs Decision Tree.