In [1]:
import pandas as pd
from id3 import DecisionTree
from pre_process import train_test_split, get_target_name, get_classes
from metrics import confusion_matrix, evaluate_metrics, count_nodes_and_leaves, tree_depth

In [2]:
# Load the dataset (Uncomment the dataset that you want to train)
file_path = 'weather.csv'
#file_path = 'restaurant.csv'
#file_path = 'iris.csv'
data = pd.read_csv(file_path)

#NOTE:- There is a SEPARATE NOTEBOOK for "CONNECT4" dataset

In [3]:
# Remove 'ID' column from the dataset
data = data.drop('ID', axis=1)

In [4]:
# Split the dataset into train and test
# (Note:- Splitting is not so good with small datasets like weather and restaurants, also iris)
# Because train set is even smaller than original dataset, thus produce weaker tree
train, test = train_test_split(data, 0.8)

In [5]:
# Instantiate and fit the DecisionTree model
tree_model = DecisionTree(max_depth=None) #You can set any depth you want, since these datasets are small you don't need to
tree_model.fit(data, get_target_name(data))#Replace parameter train by data, if you want to fit whole dataset
tree_model.print_tree(tree_model.tree)

<Weather>
    sunny:
        <Humidity>
            <=70.0: yes (2)
            >70.0: no (3)
    overcast: yes (4)
    rainy:
        <Windy>
            False: yes (3)
            True: no (2)


In [6]:
#Print the test data
print(train)
print("---------------------------------------------------")
print(test)

     Weather Temp Humidity  Windy Play
0      rainy   65       70   True   no
1      sunny   72       95  False   no
2      rainy   70       96  False  yes
3   overcast   64       65   True  yes
4      sunny   69       70  False  yes
5      rainy   71       91   True   no
6   overcast   81       75  False  yes
7      sunny   80       90   True   no
8      sunny   75       70   True  yes
9   overcast   83       86  False  yes
10     rainy   68       80  False  yes
---------------------------------------------------
    Weather Temp Humidity  Windy Play
0  overcast   72       90   True  yes
1     rainy   75       80  False  yes
2     sunny   85       85  False   no


In [7]:
#Predicted levels and true levels
predicted_labels=tree_model.predict(test)
true_labels = test[test.columns[-1]].tolist()

for i in range(len(true_labels)):
    print("True: " + str(true_labels[i]) + " => Predicted: "+ str(predicted_labels[i]))

True: yes => Predicted: yes
True: yes => Predicted: yes
True: no => Predicted: no


In [8]:
#Evaluate matrix
evaluate_metrics(true_labels, predicted_labels, get_classes(data))


Metrics for class no:
Accuracy: 1.00
Precision: 1.00
Recall: 1.00
F1 Score: 1.00

Metrics for class yes:
Accuracy: 1.00
Precision: 1.00
Recall: 1.00
F1 Score: 1.00



In [9]:
#Confusion matrix
confusion_matrix(true_labels, predicted_labels, get_classes(data))


Confusion Matrix

1 0
0 2

Y-axis: True labels
no yes 

X-axis: Predicted labels
no yes 



{'t-no-p-no': 1, 't-no-p-yes': 0, 't-yes-p-no': 0, 't-yes-p-yes': 2}

In [10]:
#Tree depth, nodes count and leaves count
depth = tree_depth(tree_model.tree)
nodes, leaves = count_nodes_and_leaves(tree_model.tree)
print("Tree depth:- " + str(depth))
print("Nodes count:- " + str(nodes))
print("Leaves count:- " + str(leaves))

Tree depth:- 4
Nodes count:- 6
Leaves count:- 5
