In [244]:
import pandas as pd
from id3 import DecisionTree
from pre_process import train_test_split, get_target_name, get_classes
from metrics import confusion_matrix, evaluate_metrics, count_nodes_and_leaves, tree_depth

In [245]:
# Load the dataset (Uncomment the dataset that you want to train)
#file_path = 'weather.csv'
file_path = 'restaurant.csv'
#file_path = 'iris.csv'
data = pd.read_csv(file_path)

#NOTE:- There is a SEPARATE NOTEBOOK for "CONNECT4" dataset

In [246]:
# Remove 'ID' column from the dataset
data = data.drop('ID', axis=1)

In [247]:
# Split the dataset into train and test
# (Note:- Splitting is not so good with small datasets like weather and restaurants, also iris)
# Because train set is even smaller than original dataset, thus produce weaker tree
train, test = train_test_split(data, 0.8)

In [248]:
# Instantiate and fit the DecisionTree model
tree_model = DecisionTree(max_depth=None) #You can set any depth you want, since these datasets are small you don't need to
tree_model.fit(train, get_target_name(data))#Replace parameter train by data, if you want to fit whole dataset
tree_model.print_tree(tree_model.tree)

<Pat>
    Some: Yes (4)
    None: No (2)
    Full:
        <Type>
            Italian: No (1)
            Thai: No (1)
            Burger:
                <Alt>
                    Yes: Yes (1)
                    No: No (1)


In [249]:
#Print the test data
print(train)
print("---------------------------------------------------")
print(test)

   Alt  Bar  Fri  Hun   Pat Price Rain  Res     Type    Est Class
0  Yes   No   No  Yes  Some   $$$   No  Yes   French   0-10   Yes
1   No  Yes   No   No  None     $  Yes   No   Burger   0-10    No
2   No   No   No   No  None     $   No   No     Thai   0-10    No
3  Yes  Yes  Yes  Yes  Full   $$$   No  Yes  Italian  10-30    No
4   No   No   No  Yes  Some    $$  Yes  Yes     Thai   0-10   Yes
5   No  Yes   No  Yes  Some    $$  Yes  Yes  Italian   0-10   Yes
6  Yes   No   No  Yes  Full     $   No   No     Thai  30-60    No
7  Yes  Yes  Yes  Yes  Full     $   No   No   Burger  30-60   Yes
8   No  Yes  Yes   No  Full     $  Yes   No   Burger    >60    No
9   No  Yes   No   No  Some     $   No   No   Burger   0-10   Yes
---------------------------------------------------
   Alt Bar  Fri  Hun   Pat Price Rain  Res    Type    Est Class
0  Yes  No  Yes  Yes  Full     $   No   No    Thai  10-30   Yes
1  Yes  No  Yes   No  Full   $$$   No  Yes  French    >60    No


In [250]:
#Predicted levels and true levels
predicted_labels=tree_model.predict(test)
true_labels = test[test.columns[-1]].tolist()

for i in range(len(true_labels)):
    print("True: " + str(true_labels[i]) + " => Predicted: "+ str(predicted_labels[i]))

True: Yes => Predicted: No
True: No => Predicted: None


In [251]:
#Evaluate matrix
evaluate_metrics(true_labels, predicted_labels, get_classes(data))

TypeError: can only concatenate str (not "NoneType") to str

In [252]:
#Confusion matrix
confusion_matrix(true_labels, predicted_labels, get_classes(data))

TypeError: can only concatenate str (not "NoneType") to str

In [253]:
#Tree depth, nodes count and leaves count
depth = tree_depth(tree_model.tree)
nodes, leaves = count_nodes_and_leaves(tree_model.tree)
print("Tree depth:- " + str(depth))
print("Node count:- " + str(nodes))
print("Leaves count:- " + str(leaves))

Tree depth:- 6
Node count:- 6
Leaves count:- 6
