In [27]:
import pandas as pd
from id3 import DecisionTree
from pre_process import train_test_split, get_target_name, get_classes
from metrics import confusion_matrix, evaluate_metrics, count_nodes_and_leaves, tree_depth

In [28]:
data = pd.read_csv("connect4.csv")
train = pd.read_csv("connect4train.csv")
test = pd.read_csv("connect4test.csv")

In [29]:
# Instantiate and fit the DecisionTree model
tree_model = DecisionTree(max_depth=None) #You can set any depth as you want noting that higher depth requires higher time
tree_model.fit(train, get_target_name(data)) # 
tree_model.print_tree(tree_model.tree)

#NOTE: You need to wait a bit 

<c14>
    o:
        <c7>
            b:
                <c26>
                    b:
                        <c1>
                            b:
                                <c25>
                                    b:
                                        <c13>
                                            x: win (38)
                                            o:
                                                <c31>
                                                    b:
                                                        <c37>
                                                            b: loss (2)
                                                            x: win (1)
                                                            o: loss (2)
                                                    x:
                                                        <c38>
                                                            b: win (5)
                                                      

In [30]:
#Print the test data
print(test)

   c1 c2 c3 c4 c5 c6 c7 c8 c9 c10  ... c34 c35 c36 c37 c38 c39 c40 c41 c42  \
0   o  b  b  b  b  b  o  x  x   b  ...   b   b   b   b   b   b   b   b   b   
1   b  b  b  b  b  b  o  x  x   b  ...   b   b   b   o   b   b   b   b   b   
2   b  b  b  b  b  b  o  x  b   b  ...   b   b   b   b   b   b   b   b   b   
3   b  b  b  b  b  b  o  x  b   b  ...   b   b   b   b   b   b   b   b   b   
4   b  b  b  b  b  b  o  x  b   b  ...   b   b   b   b   b   b   b   b   b   
5   b  b  b  b  b  b  o  x  o   b  ...   b   b   b   b   b   b   b   b   b   
6   b  b  b  b  b  b  o  x  b   b  ...   b   b   b   b   b   b   b   b   b   
7   o  b  b  b  b  b  o  x  b   b  ...   b   b   b   b   b   b   b   b   b   
8   b  b  b  b  b  b  o  x  b   b  ...   b   b   b   o   b   b   b   b   b   
9   x  b  b  b  b  b  o  x  b   b  ...   b   b   b   b   b   b   b   b   b   
10  x  o  b  b  b  b  o  x  b   b  ...   b   b   b   b   b   b   b   b   b   
11  x  b  b  b  b  b  o  x  b   b  ...   b   b   b   o   b   b  

In [31]:
#Predicted levels and true levels
predicted_labels=tree_model.predict(test)
true_labels = test[test.columns[-1]].tolist()

for i in range(len(true_labels)):
    print("True: " + str(true_labels[i]) + " => Predicted: "+ str(predicted_labels[i]))

True: win => Predicted: win
True: win => Predicted: loss
True: loss => Predicted: loss
True: win => Predicted: win
True: win => Predicted: draw
True: draw => Predicted: loss
True: win => Predicted: loss
True: win => Predicted: win
True: win => Predicted: loss
True: loss => Predicted: loss
True: win => Predicted: draw
True: draw => Predicted: loss
True: loss => Predicted: loss
True: loss => Predicted: win
True: loss => Predicted: loss
True: win => Predicted: loss
True: win => Predicted: win
True: win => Predicted: draw
True: loss => Predicted: draw
True: win => Predicted: draw


In [32]:
#Evaluate matrix
evaluate_metrics(true_labels, predicted_labels, get_classes(data))


Metrics for class win:
Accuracy: 0.55
Precision: 0.80
Recall: 0.33
F1 Score: 0.47

Metrics for class draw:
Accuracy: 0.65
Precision: 0.00
Recall: 0.00
F1 Score: 0.00

Metrics for class loss:
Accuracy: 0.60
Precision: 0.40
Recall: 0.67
F1 Score: 0.50



In [33]:
#Confusion matrix
confusion_matrix(true_labels, predicted_labels, get_classes(data))


Confusion Matrix

4 4 4
0 0 2
1 1 4

Y-axis: True labels
win draw loss 

X-axis: Predicted labels
win draw loss 



{'t-win-p-win': 4,
 't-win-p-draw': 4,
 't-win-p-loss': 4,
 't-draw-p-win': 0,
 't-draw-p-draw': 0,
 't-draw-p-loss': 2,
 't-loss-p-win': 1,
 't-loss-p-draw': 1,
 't-loss-p-loss': 4}

In [34]:
#Tree depth, nodes count and leaves count
depth = tree_depth(tree_model.tree)
nodes, leaves = count_nodes_and_leaves(tree_model.tree)
print("Tree depth:- " + str(depth))
print("Nodes count:- " + str(nodes))
print("Leaves count:- " + str(leaves))

Tree depth:- 26
Nodes count:- 488
Leaves count:- 360


In [35]:
'''
Since the dataset is too huge, it took a lot of time to split the dataset into train and test.
Hence, we decided to skip the train-test splitting part for connect4 dataset
'''

'\nSince the dataset is too huge, it took a lot of time to split the dataset into train and test.\nHence, we decided to skip the train-test splitting part for connect4 dataset\n'