In [None]:

#Feature Importance in Random Forest
importances = rf_clf.feature_importances_
for name, importance in zip(iris.feature_names, importances):
    print(f"{name}: {importance:.2f}")

# Visualize feature importance
import matplotlib.pyplot as plt
plt.barh(iris.feature_names, importances)
plt.xlabel('Feature Importance')
plt.title('Feature Importance in Random Forest')
plt.show()


In [None]:
# Impact of Number of Trees in Random Forest


from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load dataset
iris = load_iris()
X, y = iris.data, iris.target

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Test different numbers of trees
for n_trees in [10, 50, 100, 200, 500]:
    forest_clf = RandomForestClassifier(n_estimators=n_trees, random_state=42)
    forest_clf.fit(X_train, y_train)
    y_pred = forest_clf.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Number of Trees: {n_trees}, Accuracy: {accuracy:.2f}")


In [None]:
# Random Forest Classifier on the Iris Dataset


from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

# Load the Iris dataset
iris = load_iris()
X, y = iris.data, iris.target

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Create a Random Forest classifier
rf_clf = RandomForestClassifier(n_estimators=100, max_depth=5, random_state=42)

# Train the Random Forest model
rf_clf.fit(X_train, y_train)

# Test the model
y_pred = rf_clf.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")
print(classification_report(y_test, y_pred, target_names=iris.target_names))


In [None]:
# Single Decision Tree vs Random Forest


from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Load dataset
iris = load_iris()
X, y = iris.data, iris.target

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Decision Tree
tree_clf = DecisionTreeClassifier(random_state=42)
tree_clf.fit(X_train, y_train)
tree_pred = tree_clf.predict(X_test)
tree_accuracy = accuracy_score(y_test, tree_pred)

# Random Forest
forest_clf = RandomForestClassifier(n_estimators=100, random_state=42)
forest_clf.fit(X_train, y_train)
forest_pred = forest_clf.predict(X_test)
forest_accuracy = accuracy_score(y_test, forest_pred)

print(f"Decision Tree Accuracy: {tree_accuracy:.2f}")
print(f"Random Forest Accuracy: {forest_accuracy:.2f}")


In [None]:
# Comparing Random Forest with a Single Decision Tree

from sklearn.tree import DecisionTreeClassifier

# Train a single decision tree
dt_clf = DecisionTreeClassifier(max_depth=5, random_state=42)
dt_clf.fit(X_train, y_train)

# Predictions
dt_pred = dt_clf.predict(X_test)
rf_pred = rf_clf.predict(X_test)

# Evaluate and compare
dt_accuracy = accuracy_score(y_test, dt_pred)
rf_accuracy = accuracy_score(y_test, rf_pred)

print(f"Decision Tree Accuracy: {dt_accuracy:.2f}")
print(f"Random Forest Accuracy: {rf_accuracy:.2f}")


In [None]:
# Hyperparameter Tuning for Random Forest


from sklearn.model_selection import GridSearchCV

# Define parameter grid
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [3, 5, 10],
    'criterion': ['gini', 'entropy']
}

# GridSearchCV
grid_search = GridSearchCV(RandomForestClassifier(random_state=42), param_grid, cv=5)
grid_search.fit(X_train, y_train)

# Best parameters and model evaluation
print(f"Best Parameters: {grid_search.best_params_}")
best_model = grid_search.best_estimator_
print(f"Test Accuracy: {best_model.score(X_test, y_test):.2f}")





In [None]:
#Random Forest Visualization


from sklearn.tree import export_text

# Extract one tree from the forest
tree = rf_clf.estimators_[0]

# Visualize the tree as text
tree_rules = export_text(tree, feature_names=iris.feature_names)
print(tree_rules)

# Plot one of the trees
from sklearn.tree import plot_tree
plt.figure(figsize=(15, 10))
plot_tree(tree, feature_names=iris.feature_names, class_names=iris.target_names, filled=True)
plt.title("A Single Tree from the Random Forest")
plt.show()


In [None]:
#Visualizing Decision Boundaries


import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

# Generate synthetic dataset
X, y = make_classification(n_samples=300, n_features=2, n_classes=2, n_clusters_per_class=1, random_state=42)

# Train Decision Tree
tree_clf = DecisionTreeClassifier(max_depth=3, random_state=42)
tree_clf.fit(X, y)

# Train Random Forest
forest_clf = RandomForestClassifier(n_estimators=100, random_state=42)
forest_clf.fit(X, y)

# Plot decision boundaries
def plot_decision_boundary(clf, X, y, title):
    x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1),
                         np.arange(y_min, y_max, 0.1))
    Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)
    plt.contourf(xx, yy, Z, alpha=0.3)
    plt.scatter(X[:, 0], X[:, 1], c=y, edgecolor='k', cmap=plt.cm.Paired)
    plt.title(title)
    plt.show()

plot_decision_boundary(tree_clf, X, y, "Decision Tree Boundary")
plot_decision_boundary(forest_clf, X, y, "Random Forest Boundary")


# **THE END.**