In [None]:
import time
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
from sklearn.tree import DecisionTreeClassifier

In [None]:
iris = load_iris() 
df = pd.DataFrame(data=iris.data, columns=iris.feature_names)
df['target'] = iris.target
df.head()

In [None]:
X = iris.data
y = iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

In [None]:
print([X_train.shape, y_train.shape], [X_test.shape, y_test.shape])

In [None]:
dtc = DecisionTreeClassifier(criterion='entropy')
dtc.get_params()

In [None]:
start_time = time.time()

dtc.fit(X_train, y_train)
y_pred = dtc.predict(X_test)

end_time = time.time()
execution_time = end_time - start_time
dtc

In [None]:
print("Execution Time:")
print(f"{execution_time:.8f} seconds")

In [None]:
total_nodes = dtc.tree_.node_count
print(f"Total number of nodes in the tree:")
print(total_nodes)

In [None]:
import numpy as np

node_indicator = dtc.decision_path(X_test)
nodes_visited_per_sample = node_indicator.sum(axis=1)
nodes_visited = np.array(nodes_visited_per_sample).flatten()

print(f"Average nodes visited: {np.mean(nodes_visited):.2f}")
print(f"Total nodes visited over all samples: {np.sum(nodes_visited)}")

In [None]:
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

In [None]:
print(accuracy_score(y_test, y_pred))

In [None]:
print(classification_report(y_test, y_pred))

In [None]:
features = pd.DataFrame(dtc.feature_importances_, index=iris.feature_names)
features

In [None]:
from sklearn import tree

import matplotlib.pyplot as plt

plt.figure(figsize=(16,8))
tree.plot_tree(dtc, feature_names=iris.feature_names, class_names=iris.target_names, filled=True)
plt.show()