In [46]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score

In [47]:
#importing dataset
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
names = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species']
iris_df = pd.read_csv(url,names=names)
print(iris_df.head())

   sepal_length  sepal_width  petal_length  petal_width      species
0           5.1          3.5           1.4          0.2  Iris-setosa
1           4.9          3.0           1.4          0.2  Iris-setosa
2           4.7          3.2           1.3          0.2  Iris-setosa
3           4.6          3.1           1.5          0.2  Iris-setosa
4           5.0          3.6           1.4          0.2  Iris-setosa


In [48]:
#splitting into X and y variables
X = iris_df.iloc[:, :-1]
y = iris_df.iloc[:, -1]

In [49]:
#splitting the dataset into 75% training and 25% testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0)

In [50]:
#scaling the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [52]:
#hyperparameter's testing and tuning
depths = [None, 3, 5, 10, 15, 20]

best_depth = None
best_accuracy = 0.0

for depth in depths:
    clf = DecisionTreeClassifier(criterion='entropy', max_depth=depth, random_state=42)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Depth: {depth}, Accuracy: {accuracy}")

    if accuracy > best_accuracy:
        best_depth = depth
        best_accuracy = accuracy

#printing the best hyperparameter's and corresponding best accuracy
print(f"Best Depth: {best_depth}, Best Accuracy: {best_accuracy}")

Depth: None, Accuracy: 0.9736842105263158
Depth: 3, Accuracy: 0.9736842105263158
Depth: 5, Accuracy: 0.9736842105263158
Depth: 10, Accuracy: 0.9736842105263158
Depth: 15, Accuracy: 0.9736842105263158
Depth: 20, Accuracy: 0.9736842105263158
Best Depth: None, Best Accuracy: 0.9736842105263158


In [53]:
#showing confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(conf_matrix)

Confusion Matrix:
[[13  0  0]
 [ 0 15  1]
 [ 0  0  9]]


In [54]:
#evaluating the results using appropriate evaluation measures
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)

Accuracy: 0.9736842105263158
Precision: 0.9763157894736842
Recall: 0.9736842105263158
F1-score: 0.9739522830846216
