In [3]:
import numpy as np
import pandas as pd
from numpy import log2 as log
from sklearn import tree
import pydotplus
from IPython.display import Image
import time

In [9]:
eps = np.finfo(float).eps

In [10]:
dataset = {'Taste':['Salty','Spicy','Spicy','Spicy','Spicy','Sweet','Salty','Sweet','Spicy','Salty' ],
            'Temperature':['Hot','Hot','Hot','Cold','Hot','Cold','Cold','Hot','Cold','Hot'],
            'Texture':['Soft','Soft','Hard','Hard','Hard','Soft','Soft','Soft','Soft','Hard'],
            'Eat':['No','No','Yes','No','Yes','Yes','No','Yes','Yes','Yes']
          }

In [11]:
df = pd.DataFrame(dataset, columns=['Taste','Temperature','Texture','Eat'])
print(df)

   Taste Temperature Texture  Eat
0  Salty         Hot    Soft   No
1  Spicy         Hot    Soft   No
2  Spicy         Hot    Hard  Yes
3  Spicy        Cold    Hard   No
4  Spicy         Hot    Hard  Yes
5  Sweet        Cold    Soft  Yes
6  Salty        Cold    Soft   No
7  Sweet         Hot    Soft  Yes
8  Spicy        Cold    Soft  Yes
9  Salty         Hot    Hard  Yes


In [12]:
entropy_node = 0
values = df.Eat.unique()
for value in values:
    fraction = df.Eat.value_counts()[value]/len(df.Eat)
    entropy_node += -fraction * np.log2(fraction)
    
print('Parent Entropy is: ', entropy_node)

Parent Entropy is:  0.9709505944546686


In [14]:
def index(attribute):
    attribute = attribute
    target_variables  = df.Eat.unique()
    variables = df[attribute].unique()
    entropy_attribute = 0
    for variable in variables:
        entropy_each_feature = 0
        for target_variable in target_variables:
            num = len(df[attribute][df[attribute]==variable][df.Eat == target_variable])
            den = len(df[attribute][df[attribute]==variable])
            fraction = num/(den+eps)
            entropy_each_feature += fraction*log(fraction+eps)
        fraction2 = den/len(df)
        entropy_attribute += -fraction2*entropy_each_feature

    IG_attribute = entropy_node - abs(entropy_attribute)
    print("Entropy of ", attribute, "is: ", round(abs(entropy_attribute),3))
    print("IG ", "due to", attribute, "is: ", round(IG_attribute,3))
    
index('Taste')
index('Temperature')
index('Texture')

Entropy of  Taste is:  0.761
IG  due to Taste is:  0.21
Entropy of  Temperature is:  0.951
IG  due to Temperature is:  0.02
Entropy of  Texture is:  0.925
IG  due to Texture is:  0.046


In [22]:
x_columns = pd.get_dummies(df[['Taste','Temperature','Texture']])

for rand in range(10):
    dtree = tree.DecisionTreeClassifier(max_features = 3, random_state = rand, max_depth = 5, 
                                        criterion = 'gini', splitter = 'best')
    model = dtree.fit(x_columns,df['Eat'])
    out_file = tree.export_graphviz(model,feature_names = list(x_columns.columns.values),
                                    class_names = ['No','Yes'], filled = True, rounded = True)
    graph = pydotplus.graph_from_dot_data(out_file)
    Image(graph.create_png())
    graph.write_png('tree.png')
    print(rand)
    time.sleep(5)
    
    

0
1
2
3
4
5
6
7
8
9
