In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import graphviz
from sklearn import tree

ModuleNotFoundError: No module named 'graphviz'

In [None]:
data = load_iris()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = pd.Series(data.target)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [None]:
dt_classifier = DecisionTreeClassifier(random_state=42)
dt_classifier.fit(X_train, y_train)

# Visualize the Decision Tree
plt.figure(figsize=(12, 8))
plot_tree(dt_classifier, filled=True, feature_names=data.feature_names, class_names=data.target_names)
plt.title("Decision Tree Visualization")
plt.show()


In [None]:
dt_classifier_depth = DecisionTreeClassifier(max_depth=3, random_state=42)
dt_classifier_depth.fit(X_train, y_train)

# Visualize the pruned Decision Tree
plt.figure(figsize=(12, 8))
plot_tree(dt_classifier_depth, filled=True, feature_names=data.feature_names, class_names=data.target_names)
plt.title("Pruned Decision Tree Visualization (max_depth=3)")
plt.show()

In [None]:
y_pred_depth = dt_classifier_depth.predict(X_test)
print(f"Accuracy of pruned Decision Tree: {accuracy_score(y_test, y_pred_depth):.3f}")

In [None]:
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
rf_classifier.fit(X_train, y_train)

# Evaluate Random Forest accuracy
y_pred_rf = rf_classifier.predict(X_test)
print(f"Accuracy of Random Forest: {accuracy_score(y_test, y_pred_rf):.3f}")

In [None]:
importances = rf_classifier.feature_importances_
feature_importances = pd.DataFrame(importances, index=data.feature_names, columns=["Importance"]).sort_values("Importance", ascending=False)
print("\nFeature Importances:")
print(feature_importances)

In [None]:
cv_scores = cross_val_score(rf_classifier, X, y, cv=5)
print(f"\nRandom Forest Cross-Validation Accuracy: {cv_scores.mean():.3f} ± {cv_scores.std():.3f}")

In [None]:
plt.figure(figsize=(8, 6))
plt.barh(feature_importances.index, feature_importances['Importance'], color='skyblue')
plt.xlabel('Importance')
plt.title('Feature Importances from Random Forest')
plt.show()