In [13]:
import pandas as pd
from id3 import DecisionTree
from pre_process import train_test_split, get_target_name, get_classes
from metrics import confusion_matrix, evaluate_metrics, count_nodes_and_leaves, tree_depth

In [14]:
data = pd.read_csv("connect4.csv")
train = pd.train_csv("connect4train.csv")
test = pd.read_csv("connect4test.csv")

In [16]:
# Instantiate and fit the DecisionTree model
tree_model = DecisionTree(max_depth=3) #You can set any depth as you want noting that higher depth requires higher time
tree_model.fit(data, get_target_name(data))
tree_model.print_tree(tree_model.tree)

#NOTE: You need to wait a bit 

<c1>
    b:
        <c37>
            b:
                <c21>
                    x: win (1182)
                    o: loss (1235)
                    b: win (7266)
            o:
                <c38>
                    b: win (3440)
                    o: win (1521)
                    x: win (2582)
            x:
                <c38>
                    b: win (3418)
                    o: win (2855)
                    x: loss (1483)
    o:
        <c7>
            b:
                <c19>
                    x: win (2599)
                    o: win (1704)
                    b: win (2510)
            x:
                <c20>
                    o: win (1059)
                    x: win (1061)
                    b: win (6854)
            o:
                <c37>
                    b: win (2357)
                    x: win (1292)
                    o: win (888)
    x:
        <c19>
            x:
                <c21>
                    x: win (421)
                    o: loss 

In [17]:
#Print the test data
print(test)

   c1 c2 c3 c4 c5 c6 c7 c8 c9 c10  ... c34 c35 c36 c37 c38 c39 c40 c41 c42  \
0   b  b  b  b  b  b  b  b  b   b  ...   b   b   b   b   b   b   b   b   b   
1   b  b  b  b  b  b  b  b  b   b  ...   b   b   b   b   b   b   b   b   b   
2   b  b  b  b  b  b  o  b  b   b  ...   b   b   b   b   b   b   b   b   b   
3   b  b  b  b  b  b  b  b  b   b  ...   b   b   b   b   b   b   b   b   b   
4   b  b  b  b  b  b  b  b  b   b  ...   b   b   b   b   b   b   b   b   b   
5   b  b  b  b  b  b  o  b  b   b  ...   b   b   b   b   b   b   b   b   b   
6   b  b  b  b  b  b  b  b  b   b  ...   b   b   b   b   b   b   b   b   b   
7   o  b  b  b  b  b  b  b  b   b  ...   b   b   b   b   b   b   b   b   b   
8   b  b  b  b  b  b  b  b  b   b  ...   b   b   b   o   b   b   b   b   b   
9   b  b  b  b  b  b  x  b  b   b  ...   b   b   b   b   b   b   b   b   b   
10  b  b  b  b  b  b  x  b  b   b  ...   b   b   b   b   b   b   b   b   b   
11  b  b  b  b  b  b  x  o  b   b  ...   b   b   b   b   b   b  

In [18]:
#Predicted levels and true levels
predicted_labels=tree_model.predict(test)
true_labels = test[test.columns[-1]].tolist()

for i in range(len(true_labels)):
    print("True: " + str(true_labels[i]) + " => Predicted: "+ str(predicted_labels[i]))

True: win => Predicted: win
True: win => Predicted: win
True: win => Predicted: win
True: loss => Predicted: win
True: draw => Predicted: win
True: win => Predicted: win
True: loss => Predicted: win
True: win => Predicted: win
True: win => Predicted: win
True: loss => Predicted: win
True: win => Predicted: win
True: loss => Predicted: win
True: draw => Predicted: win
True: win => Predicted: win
True: draw => Predicted: win
True: win => Predicted: win


In [19]:
#Evaluate matrix
evaluate_metrics(true_labels, predicted_labels, get_classes(data))


Metrics for class win:
Accuracy: 0.56
Precision: 0.56
Recall: 1.00
F1 Score: 0.72

Metrics for class draw:
Accuracy: 0.81
Precision: 0.00
Recall: 0.00
F1 Score: 0.00

Metrics for class loss:
Accuracy: 0.75
Precision: 0.00
Recall: 0.00
F1 Score: 0.00



In [20]:
#Confusion matrix
confusion_matrix(true_labels, predicted_labels, get_classes(data))


Confusion Matrix

9 0 0
3 0 0
4 0 0

Y-axis: True labels
win draw loss 

X-axis: Predicted labels
win draw loss 



{'t-win-p-win': 9,
 't-win-p-draw': 0,
 't-win-p-loss': 0,
 't-draw-p-win': 3,
 't-draw-p-draw': 0,
 't-draw-p-loss': 0,
 't-loss-p-win': 4,
 't-loss-p-draw': 0,
 't-loss-p-loss': 0}

In [21]:
#Tree depth, nodes count and leaves count
depth = tree_depth(tree_model.tree)
nodes, leaves = count_nodes_and_leaves(tree_model.tree)
print("Tree depth:- " + str(depth))
print("Nodes count:- " + str(nodes))
print("Leaves count:- " + str(leaves))

Tree depth:- 6
Nodes count:- 26
Leaves count:- 27


In [22]:
'''
Since the dataset is too huge, it took a lot of time to split the dataset into train and test.
Hence, we decided to skip the train-test splitting part for connect4 dataset
'''

'\nSince the dataset is too huge, it took a lot of time to split the dataset into train and test.\nHence, we decided to skip the train-test splitting part for connect4 dataset\n'