In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, confusion_matrix

In [None]:
digits = load_digits()
X = digits.data
y = digits.target

In [None]:
# Visualize one image
plt.gray()
plt.matshow(digits.images[0])
plt.title(f"Target: {digits.target[0]}")
plt.show()

In [None]:
# Split Data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# A. Single Decision Tree
tree = DecisionTreeClassifier(random_state=42)
tree.fit(X_train, y_train)
tree_acc = accuracy_score(y_test, tree.predict(X_test))

In [None]:
# B. Random Forest (100 Trees)
forest = RandomForestClassifier(n_estimators=100, random_state=42)
forest.fit(X_train, y_train)
forest_acc = accuracy_score(y_test, forest.predict(X_test))

In [None]:
print(f"Single Decision Tree Accuracy: {tree_acc*100:.2f}%")
print(f"Random Forest Accuracy:      {forest_acc*100:.2f}%")

In [None]:
# OBSERVATION:
# The Forest should significantly outperform the single tree because 
# it averages out the errors of individual trees.

In [None]:
# Random Forest can tell us which pixels are most important for identifying a number.
importances = forest.feature_importances_
importances = importances.reshape(8, 8) # Reshape back to image size

In [None]:
plt.figure(figsize=(8, 6))
sns.heatmap(importances, cmap='viridis')
plt.title("Pixel Importance Heatmap (Which pixels matter?)")
plt.show()

In [None]:
y_pred = forest.predict(X_test)
cm = confusion_matrix(y_test, y_pred)



In [None]:
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix: Random Forest')
plt.show()