In [83]:
import numpy as np
import pandas as pd
import logging
import math


In [7]:
arr_data = np.array([['-', 'short', 'blond', 'brown'],
                 ['-', 'tall', 'dark', 'brown'],
                 ['+', 'tall', 'blond', 'blue'],
                 ['-', 'tall', 'dark', 'blue'],
                 ['-', 'short', 'dark', 'blue'],
                 ['+', 'tall', 'red', 'blue'],
                 ['-', 'tall', 'blond', 'brown'],
                 ['+', 'short', 'blond', 'blue']])


data = pd.DataFrame(arr_data, columns = ['class', 'height', 'hair', 'eyes'])
data

Unnamed: 0,class,height,hair,eyes
0,-,short,blond,brown
1,-,tall,dark,brown
2,+,tall,blond,blue
3,-,tall,dark,blue
4,-,short,dark,blue
5,+,tall,red,blue
6,-,tall,blond,brown
7,+,short,blond,blue


In [182]:
class TreeNode:
    def __init__(self, instance):
        print(color.BOLD + color.INFO + f'Creating a new node...' + color.END)
        self.instances = instance
        self.isleaf = True
        self.class_labels = instance.iloc[:, 0].mode().iloc[0]
        self.children = {}

class color:
    INFO = '\033[94m'
    WARNING = '\033[91m'
    BOLD = '\033[1m'
    END = '\033[0m'

class IncrementalDTree:
    def __init__(self):
        self.root = None


    def predict(self, instance, current_node):
        if current_node.isleaf == True:
            return current_node

        else:
            split_attribute_val = instance[current_node.instances].iloc[0]
            try:
                return self.predict(instance.drop(current_node.instances, axis = 1), current_node.children[split_attribute_val])
            except Exception as e:
                print(color.BOLD + color.WARNING + f'Current tree can not comprehend the instance, its a bad input...' + color.END)
                return 'bad'

    def entropy(self, columns, data, target_attributes):
        frequency = {}
        entropy_data = 0.0
        i = 0
        for entry in columns:
            if (target_attributes == entry):
                break
            i = i + 1
        i = i - 1
        for entry in data:
            if (entry[i] in frequency.keys()):
                frequency[entry[i]] += 1.0
            else:
                frequency[entry[i]]  = 1.0
        for frequency in frequency.values():
            entropy_data += (-frequency/len(data)) * math.log(frequency/len(data), 2)
        return entropy_data

    def informationGain(self, columns, data, attr, target_attributes):
        frequency_of_desired_var = {}
        subset_entropy = 0.0
        i = columns.index(attr)

        for entry in data:
            if (entry[i] in frequency_of_desired_var.keys()):
                frequency_of_desired_var[entry[i]] += 1.0
            else:
                frequency_of_desired_var[entry[i]]  = 1.0

        for val in frequency_of_desired_var.keys():
            valProb        = frequency_of_desired_var[val] / sum(frequency_of_desired_var.values())
            dataSubset     = [entry for entry in data if entry[i] == val]
            subset_entropy += valProb * self.entropy(columns, dataSubset, target_attributes)

        return (self.entropy(columns, data, target_attributes) - subset_entropy)

                    

    def find_split(self, node):
        columns = list(node.columns)
        best = columns[0]
        # And maximum information gain to be 0
        maximim_gain = 0;

        for attr in columns:
            # For each columnn find out the information gain
            new_info_gain = self.informationGain(columns, data, attr, columns[0])
            # If the new information gain happens to be more than the current maximum
            # Then update the current max info_gain with the new gain
            if new_info_gain>maximim_gain:
                maximim_gain = new_info_gain
                best = attr

        return best

    def add_node(self, instance, current_node):
        print(current_node.children)
        if current_node.isleaf == True:
            pass

        else:
            split_attribute_val = instance[current_node.instances].iloc[0] 
            try:
                print(color.BOLD + color.INFO + f'Trying to add the nodes to the children of {split_attribute_val}...' + color.END)
                self.add_node(instance.drop(current_node.instances, axis = 1), current_node.children[split_attribute_val])
            except Exception as e:
                print(color.BOLD + color.INFO + f'Adding the node {split_attribute_val}...' + color.END)
                new_node = TreeNode(instance)
                current_node.children[split_attribute_val] = new_node
        print(color.BOLD + color.INFO + f'Successful addition of the node complete...' + color.END)



    def split(self, node):
        print(color.BOLD + color.INFO + f'[Info] Initializing the split of the conflicting node hence extending the tree...' + color.END)
        split_feat = self.find_split(node.instances)
        print(color.BOLD + color.INFO + f'[Info] Found the splitting feature {split_feat} using information gain...' + color.END)
        if split_feat == None:
            print(color.BOLD + color.WARNING + f'[Info] No Split node found...' + color.END)
            return node

        else:
            for val in node.instances[split_feat]:
                temp_instances = node.instances.drop(split_feat, axis = 1)
                new_node = TreeNode(temp_instances[node.instances[split_feat] == val])
                print(color.BOLD + color.INFO + f'[Info] Extending the tree on the node {val}...' + color.END)
                node.children[val] = new_node
            node.instances = split_feat
            node.isleaf = False
            print(color.BOLD + color.INFO + f'[Info] Successful split complete...' + color.END)
            return node

    def fit_instance(self, instance, label):
        print(color.BOLD + color.INFO + f'[Info] Training with new instnce...' + color.END)
        if self.root == None:
            print(color.BOLD + color.INFO + f'[Info] Initiation of the tree, this the very first node...' + color.END)
            new_node = TreeNode(instance)
            self.root = new_node

        elif self.root != None:
            print(color.BOLD + color.INFO + f'[Info] Initiating the updation of the tree...' + color.END)
            pred_node = self.predict(instance, self.root)
            if pred_node != 'bad':
                pred = pred_node.class_labels
                if pred == label:
                    # Could add the the instances to the node but skipping for now
                    print(color.BOLD + color.INFO + f'[Info] This instance was classified correctly...' + color.END)
                    pass
                else:
                    print(color.BOLD + color.INFO + f'[Info] Adding the instance to appropriate node...' + color.END)
                    pred_node.instances = pd.concat([pred_node.instances, instance[pred_node.instances.columns]])
                    pred_node = self.split(pred_node)
                    # print(pred_node.instances, pred_node.children)
                    # print(self.root.instances, self.root.children)
            else:
                print(self.root.children['tall'].children)
                self.add_node(instance, self.root)
                print(self.root.children['tall'].children)


                    


                    

            

In [183]:
tree = IncrementalDTree()

In [184]:
tree.fit_instance(data.iloc[[0]], data.iloc[0, 0])

[1m[94m[Info] Training with new instnce...[0m
[1m[94m[Info] Initiation of the tree, this the very first node...[0m
[1m[94mCreating a new node...[0m


In [186]:
tree.fit_instance(data.iloc[[1]], data.iloc[1, 0])

[1m[94m[Info] Training with new instnce...[0m
[1m[94m[Info] Initiating the updation of the tree...[0m
[1m[94m[Info] This instance was classified correctly...[0m


In [189]:
tree.fit_instance(data.iloc[[2]], data.iloc[2, 0])

[1m[94m[Info] Training with new instnce...[0m
[1m[94m[Info] Initiating the updation of the tree...[0m
[1m[94m[Info] Adding the instance to appropriate node...[0m
[1m[94m[Info] Initializing the split of the conflicting node hence extending the tree...[0m
[1m[94m[Info] Found the splitting feature height using information gain...[0m
[1m[94mCreating a new node...[0m
[1m[94m[Info] Extending the tree on the node short...[0m
[1m[94mCreating a new node...[0m
[1m[94m[Info] Extending the tree on the node tall...[0m
[1m[94m[Info] Successful split complete...[0m


In [202]:
tree = IncrementalDTree()

for i in range(data.shape[0]):
    tree.fit_instance(data.iloc[[i]], data.iloc[i, 0])

[1m[94m[Info] Training with new instnce...[0m
[1m[94m[Info] Initiation of the tree, this the very first node...[0m
[1m[94mCreating a new node...[0m
[1m[94m[Info] Training with new instnce...[0m
[1m[94m[Info] Initiating the updation of the tree...[0m
[1m[94m[Info] This instance was classified correctly...[0m
[1m[94m[Info] Training with new instnce...[0m
[1m[94m[Info] Initiating the updation of the tree...[0m
[1m[94m[Info] Adding the instance to appropriate node...[0m
[1m[94m[Info] Initializing the split of the conflicting node hence extending the tree...[0m
[1m[94m[Info] Found the splitting feature height using information gain...[0m
[1m[94mCreating a new node...[0m
[1m[94m[Info] Extending the tree on the node short...[0m
[1m[94mCreating a new node...[0m
[1m[94m[Info] Extending the tree on the node tall...[0m
[1m[94m[Info] Successful split complete...[0m
[1m[94m[Info] Training with new instnce...[0m
[1m[94m[Info] Initiating the updati

In [209]:
for i in range(data.shape[0]):
    try:
        print(data.iloc[i, 0], tree.predict(data.iloc[[i]], tree.root).class_labels)
    except:
        print(data.iloc[i, 0], tree.predict(data.iloc[[i]], tree.root))

- +
- -
+ +
- -
[1m[91mCurrent tree can not comprehend the instance, its a bad input...[0m
[1m[91mCurrent tree can not comprehend the instance, its a bad input...[0m
- bad
+ +
- -
+ +
