In [83]:
import numpy as np
import pandas as pd
import logging
import math


In [7]:
arr_data = np.array([['-', 'short', 'blond', 'brown'],
                 ['-', 'tall', 'dark', 'brown'],
                 ['+', 'tall', 'blond', 'blue'],
                 ['-', 'tall', 'dark', 'blue'],
                 ['-', 'short', 'dark', 'blue'],
                 ['+', 'tall', 'red', 'blue'],
                 ['-', 'tall', 'blond', 'brown'],
                 ['+', 'short', 'blond', 'blue']])


data = pd.DataFrame(arr_data, columns = ['class', 'height', 'hair', 'eyes'])
data

Unnamed: 0,class,height,hair,eyes
0,-,short,blond,brown
1,-,tall,dark,brown
2,+,tall,blond,blue
3,-,tall,dark,blue
4,-,short,dark,blue
5,+,tall,red,blue
6,-,tall,blond,brown
7,+,short,blond,blue


In [130]:
class TreeNode:
    def __init__(self, instance):
        self.instances = instance
        self.isleaf = True
        self.class_labels = instance.iloc[:, 0].mode().iloc[0]
        self.children = {}

class color:
    INFO = '\033[94m'
    WARNING = '\033[91m'
    BOLD = '\033[1m'
    END = '\033[0m'

class IncrementalDTree:
    def __init__(self):
        self.root = None


    def predict(self, instance, current_node):
        if current_node.isleaf == True:
            return current_node

        else:
            split_attribute_val = instance[current_node.instances].iloc[0]
            try:
                return self.predict(instance.drop(current_node.instances, axis = 1), current_node.children[split_attribute_val])
            except Exception as e:
                print(e)
                return 'bad'

    def entropy(self, columns, data, target_attributes):
        frequency = {}
        entropy_data = 0.0
        i = 0
        for entry in columns:
            if (target_attributes == entry):
                break
            i = i + 1
        i = i - 1
        for entry in data:
            if (entry[i] in frequency.keys()):
                frequency[entry[i]] += 1.0
            else:
                frequency[entry[i]]  = 1.0
        for frequency in frequency.values():
            entropy_data += (-frequency/len(data)) * math.log(frequency/len(data), 2)
        return entropy_data

    def informationGain(self, columns, data, attr, target_attributes):
        frequency_of_desired_var = {}
        subset_entropy = 0.0
        i = columns.index(attr)

        for entry in data:
            if (entry[i] in frequency_of_desired_var.keys()):
                frequency_of_desired_var[entry[i]] += 1.0
            else:
                frequency_of_desired_var[entry[i]]  = 1.0

        for val in frequency_of_desired_var.keys():
            valProb        = frequency_of_desired_var[val] / sum(frequency_of_desired_var.values())
            dataSubset     = [entry for entry in data if entry[i] == val]
            subset_entropy += valProb * self.entropy(columns, dataSubset, target_attributes)

        return (self.entropy(columns, data, target_attributes) - subset_entropy)

                    

    def find_split(self, node):
        columns = list(node.columns)
        best = columns[0]
        # And maximum information gain to be 0
        maximim_gain = 0;

        for attr in columns:
            # For each columnn find out the information gain
            new_info_gain = self.informationGain(columns, data, attr, columns[0])
            # If the new information gain happens to be more than the current maximum
            # Then update the current max info_gain with the new gain
            if new_info_gain>maximim_gain:
                maximim_gain = new_info_gain
                best = attr


        return best

    def add_node(self, instance, current_node):
        print(current_node.children)
        if current_node.isleaf == True:
            print('it was a leaf')
            pass

        else:
            print('it was not a leaf')
            split_attribute_val = instance[current_node.instances].iloc[0] 
            try:
                print('Trying to add the nodes to the children:', split_attribute_val, instance)
                self.add_node(instance.drop(current_node.instances, axis = 1), current_node.children[split_attribute_val])
            except Exception as e:
                print(e)
                new_node = TreeNode(instance)
                # print(split_attribute_val, new_node.instances)
                current_node.children[split_attribute_val] = new_node



    def split(self, node):
        print(color.BOLD + color.INFO + f'[Info] Initializing the split of the conflicting node hence extending the tree...' + color.END)
        split_feat = self.find_split(node.instances)
        # print(split_feat)
        # split_feat = node.instances.columns[split_feat]
        if split_feat == None:
            return node

        else:
            print(split_feat, '\n', node.instances)
            for val in node.instances[split_feat]:
                temp_instances = node.instances.drop(split_feat, axis = 1)
                new_node = TreeNode(temp_instances[node.instances[split_feat] == val])
                node.children[val] = new_node
            node.instances = split_feat
            node.isleaf = False
            # print(node.instances, node.children)
            return node

    def fit_instance(self, instance, label):
        print(color.BOLD + color.INFO + f'[Info] Training with new instnce...' + color.END)
        if self.root == None:
            print(color.BOLD + color.INFO + f'[Info] Initiation of the tree, this the very first node...' + color.END)
            new_node = TreeNode(instance)
            self.root = new_node

        elif self.root != None:
            print(color.BOLD + color.INFO + f'[Info] Initiating the updation of the tree...' + color.END)
            pred_node = self.predict(instance, self.root)
            if pred_node != 'bad':
                pred = pred_node.class_labels
                if pred == label:
                    # Could add the the instances to the node but skipping for now
                    print(color.BOLD + color.INFO + f'[Info] This instance was classified correctly...' + color.END)
                    pass
                else:
                    print(color.BOLD + color.INFO + f'[Info] Adding the instance to appropriate node...' + color.END)
                    pred_node.instances = pd.concat([pred_node.instances, instance[pred_node.instances.columns]])
                    pred_node = self.split(pred_node)
                    # print(pred_node.instances, pred_node.children)
                    # print(self.root.instances, self.root.children)
            else:
                print(self.root.children['tall'].children)
                self.add_node(instance, self.root)
                print(self.root.children['tall'].children)

    def print_tree(self, current_node):
        if current_node.isleaf == True:
            print(current_node.class_labels, end = '\t')
        elif current_node.isleaf != True:
            print(current_node.instances, end = '\t')
            for node in current_node.children:
                self.print_tree(current_node.children[node])
                print()

                    


                    

            

In [131]:
tree = IncrementalDTree()

In [132]:
tree.fit_instance(data.iloc[[0]], data.iloc[0, 0])

[1m[94m[Info] Training with new instnce...[0m
[1m[94m[Info] Initiation of the tree, this the very first node...[0m


In [108]:
tree.print_tree(tree.root)

-	

In [88]:
tree.fit_instance(data.iloc[[1]], data.iloc[1, 0])

<__main__.TreeNode object at 0x11b565640>
- -


In [89]:
tree.predict(data.iloc[[1]], tree.root).class_labels

'-'

In [90]:
tree.print_tree(tree.root)

-	

In [91]:
tree.fit_instance(data.iloc[[2]], data.iloc[2, 0])

<__main__.TreeNode object at 0x11b565640>
- +
height 
   class height   hair   eyes
0     -  short  blond  brown
2     +   tall  blond   blue


In [92]:
tree.print_tree(tree.root)

height	-	
+	


In [64]:
tree.fit_instance(data.iloc[[3]], data.iloc[3, 0])

<__main__.TreeNode object at 0x11b59dbe0>
+ -
hair 
   class   hair  eyes
2     +  blond  blue
3     -   dark  blue


In [65]:
data.iloc[[3]]

Unnamed: 0,class,height,hair,eyes
3,-,tall,dark,blue


In [66]:
tree.print_tree(tree.root)

height	-	
hair	+	
-	



In [67]:
tree.root.instances

'height'

In [68]:
tree.root.children

{'short': <__main__.TreeNode at 0x11b59d9a0>,
 'tall': <__main__.TreeNode at 0x11b59dbe0>}

In [69]:
tree.root.children['short'].instances

Unnamed: 0,class,hair,eyes
0,-,blond,brown


In [70]:
tree.root.children['tall'].instances

'hair'

In [71]:
tree.root.class_labels

'-'

In [72]:
pd.concat([data.iloc[[0]], data.iloc[[-1]]])

Unnamed: 0,class,height,hair,eyes
0,-,short,blond,brown
7,+,short,blond,blue


In [73]:
data.iloc[0].iloc[0]

'-'

In [74]:
temp_instances = data.drop('height', axis = 1)
temp_instances[data['height'] == 'tall']

Unnamed: 0,class,hair,eyes
1,-,dark,brown
2,+,blond,blue
3,-,dark,blue
5,+,red,blue
6,-,blond,brown


In [75]:
tree = IncrementalDTree()

for i in range(data.shape[0]):
    tree.fit_instance(data.iloc[[i]], data.iloc[i, 0])

<__main__.TreeNode object at 0x11b4efac0>
- -
<__main__.TreeNode object at 0x11b4efac0>
- +
height 
   class height   hair   eyes
0     -  short  blond  brown
2     +   tall  blond   blue
<__main__.TreeNode object at 0x11da6cf70>
+ -
hair 
   class   hair  eyes
2     +  blond  blue
3     -   dark  blue
<__main__.TreeNode object at 0x11da6ca00>
- -
'red'
bad
{'blond': <__main__.TreeNode object at 0x11da6c2b0>, 'dark': <__main__.TreeNode object at 0x11da6c1f0>}
{'short': <__main__.TreeNode object at 0x11da6ca00>, 'tall': <__main__.TreeNode object at 0x11da6cf70>}
it was not a leaf
Trying to add the nodes to the children: tall   class height hair  eyes
5     +   tall  red  blue
{'blond': <__main__.TreeNode object at 0x11da6c2b0>, 'dark': <__main__.TreeNode object at 0x11da6c1f0>}
it was not a leaf
Trying to add the nodes to the children: red   class hair  eyes
5     +  red  blue
'red'
{'blond': <__main__.TreeNode object at 0x11da6c2b0>, 'dark': <__main__.TreeNode object at 0x11da6c1f0>, '

In [76]:
tree.print_tree(tree.root)

height	hair	+	

hair	eyes	+	
-	

-	
+	



In [77]:
tree.root.instances

'height'

In [78]:
tree.root.children

{'short': <__main__.TreeNode at 0x11da6ca00>,
 'tall': <__main__.TreeNode at 0x11da6cf70>}

In [79]:
tree.root.children['short'].children

{'blond': <__main__.TreeNode at 0x11da6c400>}

In [80]:
tree.root.children['tall'].children

{'blond': <__main__.TreeNode at 0x11da6c2b0>,
 'dark': <__main__.TreeNode at 0x11da6c1f0>,
 'red': <__main__.TreeNode at 0x11b565550>}

In [82]:
for i in range(data.shape[0]):
    print(data.iloc[i, 0], tree.predict(data.iloc[[i]], tree.root).class_labels)

- +
- -
+ +
- -
'dark'


AttributeError: 'str' object has no attribute 'class_labels'

In [447]:
data['class'].mode().iloc[0]

'-'