In [274]:
import pandas as pd
from id3 import DecisionTree
from pre_process import train_test_split, get_target_name, get_classes
from metrics import confusion_matrix, evaluate_metrics, count_nodes_and_leaves, tree_depth

In [275]:
# Load the dataset (Uncomment the dataset that you want to train)
#file_path = 'weather.csv'
#file_path = 'restaurant.csv'
file_path = 'iris.csv'
data = pd.read_csv(file_path)

#NOTE:- There is a SEPARATE NOTEBOOK for "CONNECT4" dataset

In [276]:
# Remove 'ID' column from the dataset
data = data.drop('ID', axis=1)

In [277]:
# Split the dataset into train and test
# (Note:- Splitting is not so good with small datasets like weather and restaurants, also iris)
# Because train set is even smaller than original dataset, thus produce weaker tree
train, test = train_test_split(data, 0.8)

In [278]:
# Instantiate and fit the DecisionTree model
tree_model = DecisionTree(max_depth=None) #You can set any depth you want, since these datasets are small you don't need to
tree_model.fit(data, get_target_name(data))#Replace parameter train by data, if you want to fit whole dataset
tree_model.print_tree(tree_model.tree)

<petallength>
    <=1.9: Iris-setosa (40)
    >1.9:
        <petalwidth>
            <=1.7:
                <sepalwidth>
                    <=2.8:
                        <sepallength>
                            <=5.8: Iris-versicolor (15)
                            >5.8: Iris-versicolor (8)
                    >2.8: Iris-versicolor (16)
            >1.7:
                <sepallength>
                    <=5.9:
                        <sepalwidth>
                            <=3.1: Iris-virginica (5)
                            >3.1: Iris-versicolor (1)
                    >5.9: Iris-virginica (31)


In [279]:
#Print the test data
print(train)
print("---------------------------------------------------")
print(test)

    sepallength sepalwidth petallength petalwidth            class
0           6.9        3.2         5.7        2.3   Iris-virginica
1           6.3        2.8         5.1        1.5   Iris-virginica
2           4.9        2.4         3.3        1.0  Iris-versicolor
3           6.0        2.7         5.1        1.6  Iris-versicolor
4           5.2        3.4         1.4        0.2      Iris-setosa
..          ...        ...         ...        ...              ...
115         4.8        3.4         1.9        0.2      Iris-setosa
116         7.2        3.6         6.1        2.5   Iris-virginica
117         5.8        4.0         1.2        0.2      Iris-setosa
118         5.0        3.4         1.6        0.4      Iris-setosa
119         5.5        3.5         1.3        0.2      Iris-setosa

[120 rows x 5 columns]
---------------------------------------------------
   sepallength sepalwidth petallength petalwidth            class
0          5.8        2.6         4.0        1.2  Iris

In [280]:
#Predicted levels and true levels
predicted_labels=tree_model.predict(test)
true_labels = test[test.columns[-1]].tolist()

for i in range(len(true_labels)):
    print("True: " + str(true_labels[i]) + " => Predicted: "+ str(predicted_labels[i]))

True: Iris-versicolor => Predicted: Iris-versicolor
True: Iris-virginica => Predicted: Iris-virginica
True: Iris-versicolor => Predicted: Iris-versicolor
True: Iris-virginica => Predicted: Iris-virginica
True: Iris-versicolor => Predicted: Iris-versicolor
True: Iris-versicolor => Predicted: Iris-versicolor
True: Iris-virginica => Predicted: Iris-versicolor
True: Iris-versicolor => Predicted: Iris-versicolor
True: Iris-setosa => Predicted: Iris-setosa
True: Iris-setosa => Predicted: Iris-setosa
True: Iris-versicolor => Predicted: Iris-versicolor
True: Iris-setosa => Predicted: Iris-setosa
True: Iris-setosa => Predicted: Iris-setosa
True: Iris-virginica => Predicted: Iris-virginica
True: Iris-versicolor => Predicted: Iris-versicolor
True: Iris-virginica => Predicted: Iris-virginica
True: Iris-virginica => Predicted: Iris-virginica
True: Iris-versicolor => Predicted: Iris-versicolor
True: Iris-virginica => Predicted: Iris-virginica
True: Iris-versicolor => Predicted: Iris-versicolor
True:

In [281]:
#Evaluate matrix
evaluate_metrics(true_labels, predicted_labels, get_classes(data))


Metrics for class Iris-setosa:
Accuracy: 1.00
Precision: 1.00
Recall: 1.00
F1 Score: 1.00

Metrics for class Iris-versicolor:
Accuracy: 0.97
Precision: 0.91
Recall: 1.00
F1 Score: 0.95

Metrics for class Iris-virginica:
Accuracy: 0.97
Precision: 1.00
Recall: 0.90
F1 Score: 0.95



In [282]:
#Confusion matrix
confusion_matrix(true_labels, predicted_labels, get_classes(data))


Confusion Matrix

10 0 0
0 10 0
0 1 9

Y-axis: True labels
Iris-setosa Iris-versicolor Iris-virginica 

X-axis: Predicted labels
Iris-setosa Iris-versicolor Iris-virginica 



{'t-Iris-setosa-p-Iris-setosa': 10,
 't-Iris-setosa-p-Iris-versicolor': 0,
 't-Iris-setosa-p-Iris-virginica': 0,
 't-Iris-versicolor-p-Iris-setosa': 0,
 't-Iris-versicolor-p-Iris-versicolor': 10,
 't-Iris-versicolor-p-Iris-virginica': 0,
 't-Iris-virginica-p-Iris-setosa': 0,
 't-Iris-virginica-p-Iris-versicolor': 1,
 't-Iris-virginica-p-Iris-virginica': 9}

In [283]:
#Tree depth, nodes count and leaves count
depth = tree_depth(tree_model.tree)
nodes, leaves = count_nodes_and_leaves(tree_model.tree)
print("Tree depth:- " + str(depth))
print("Node count:- " + str(nodes))
print("Leaves count:- " + str(leaves))

Tree depth:- 8
Node count:- 12
Leaves count:- 7
