In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from HelperMethods import *

In [2]:
this = %pwd

In [3]:
PATH = (f'{this}/').replace('NoteBook/','')+'Data/'

In [4]:
column_names = ["sex", "length", "diameter", "height", "whole weight", 
                "shucked weight", "viscera weight", "shell weight", "rings"]
df = pd.read_csv(PATH + "abalone.data", names=column_names)

In [5]:
df.shape

(4177, 9)

In [6]:
df.head(50)

Unnamed: 0,sex,length,diameter,height,whole weight,shucked weight,viscera weight,shell weight,rings
0,M,0.455,0.365,0.095,0.514,0.2245,0.101,0.15,15
1,M,0.35,0.265,0.09,0.2255,0.0995,0.0485,0.07,7
2,F,0.53,0.42,0.135,0.677,0.2565,0.1415,0.21,9
3,M,0.44,0.365,0.125,0.516,0.2155,0.114,0.155,10
4,I,0.33,0.255,0.08,0.205,0.0895,0.0395,0.055,7
5,I,0.425,0.3,0.095,0.3515,0.141,0.0775,0.12,8
6,F,0.53,0.415,0.15,0.7775,0.237,0.1415,0.33,20
7,F,0.545,0.425,0.125,0.768,0.294,0.1495,0.26,16
8,M,0.475,0.37,0.125,0.5095,0.2165,0.1125,0.165,9
9,F,0.55,0.44,0.15,0.8945,0.3145,0.151,0.32,19


In [7]:
X = df.drop('rings', axis='columns')
y = df['rings']

In [8]:
d = {'M': 1, 'F': 2, 'I': 3}
X['sex'].replace(d,inplace = True)

X.head(10)

Unnamed: 0,sex,length,diameter,height,whole weight,shucked weight,viscera weight,shell weight
0,1,0.455,0.365,0.095,0.514,0.2245,0.101,0.15
1,1,0.35,0.265,0.09,0.2255,0.0995,0.0485,0.07
2,2,0.53,0.42,0.135,0.677,0.2565,0.1415,0.21
3,1,0.44,0.365,0.125,0.516,0.2155,0.114,0.155
4,3,0.33,0.255,0.08,0.205,0.0895,0.0395,0.055
5,3,0.425,0.3,0.095,0.3515,0.141,0.0775,0.12
6,2,0.53,0.415,0.15,0.7775,0.237,0.1415,0.33
7,2,0.545,0.425,0.125,0.768,0.294,0.1495,0.26
8,1,0.475,0.37,0.125,0.5095,0.2165,0.1125,0.165
9,2,0.55,0.44,0.15,0.8945,0.3145,0.151,0.32


In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)
print(X_test.values[:, 0])

[1. 1. 2. ... 1. 1. 2.]


In [10]:
def value_count(x, threshold):
    result = {}
    result[0] = 0
    result[1] = 0
    for value in x:
        if value <= threshold:
            result[0] += 1
        else:
            result[1]+=1
    return result

def target_value_count(y):
    result = {}
    for value in y:
        if value not in result:
            result[value] = 1
        else:
            result[value]+=1
    return result
    
    
def entropy(y, val_type, threshold = None):
    if val_type == "target":
        result = target_value_count(y)
        entropy = 0
        for value in result.values():
            p = value/len(y)
            entropy -= p*log2(p)
        return entropy
    else:
        result= value_count(y, threshold)
        entropy = 0
        for idx in range(2):
            p = result[idx]/len(y)
            entropy -= p*log2(p)
        return entropy
    

In [11]:
ja = X_train.index
print(ja.values)

[3823 3956 3623 ... 3092 3772  860]


# text

In [93]:
def information_gain(attribute, target, method):
    target_entropy = entropy(target, "target")
    #print(target_entropy)
    info_gains = []
    for attr in attribute:
        threshold = np.mean(attribute[attr])
        le_idx = np.where(attribute[attr] <= threshold)
        g_idx = np.where(attribute[attr] > threshold)
        y_le = target.values[le_idx]
        y_g = target.values[g_idx]
        # count hvor mange av hver ring som er i <= og >
        # ta count/len * log2 len/count
        le_entropy = entropy(y_le, "target")
        g_entropy = entropy(y_g, "target")
        
        
        attr_entropy = (len(y_le)/len(attribute[attr]))*le_entropy + (len(y_g)/len(attribute[attr]))*g_entropy
        information_gain = target_entropy - attr_entropy
        print(information_gain)
        info_gains.append(information_gain)
    return np.argmax(info_gains)    

In [94]:
def learn(X, y, n, impurity_measure):
    if len(X) == 0:
        return
    else:
        x_copy = X.copy()
        y_copy = y.copy()
        
        top_ig = information_gain(x_copy, y_copy, "entropy")
        threshold = np.mean(x_copy[x_copy.columns[top_ig]])
        le_idx = np.where(x_copy[x_copy.columns[top_ig]] <= threshold)
        g_idx = np.where(x_copy[x_copy.columns[top_ig]] > threshold)
    
        n.category = top_ig
        n.data = threshold
    
        left_child = mnode()
        right_child = mnode()
        
        left_child.data = le_idx
        right_child.data = g_idx
        
        #node.children[0] = left_child
        #node.children[1] = right_child
        
        n.add_child(1, le_idx, left_child)
        n.add_child(2, g_idx, right_child)
        for child in n.children:
            X_copy = pd.DataFrame(x_copy.values[child.data])
            Y_copy = pd.Series(y_copy.values[child.data])
            
            if(len(X_copy) == 1):
                child.isLeaf = True
                child.data = Y_copy[X_copy.index]
            elif len(np.unique(Y_copy.values)) == 1:
                child.isLeaf = True
                child.data = Y_copy.sample(n = 1)
            elif len(target_value_count(X_copy)) == 1:
                child.isleaf = True
                child.data = Y_copy.value_counts().argmax()
            else:
                learn(X_copy, Y_copy, child, "entropy") 

In [95]:
class mnode(object):
    
    def __init__(self):
        self.data = None
        self.parent = None
        self.children = []
        self.category = None
        self.isLeaf = False
        self.category = None
    
    def add_child(self, name, threshold, child):
        child.data = threshold
        self.children.append(child)
    

In [96]:
n = mnode()
learn(X_train, y_train, n, "entropy")
print(n.data)

0.04653328354200337
0.2789416696971627
0.28453260567552885
0.26979720612860447
0.25207583933117217
0.19942915059634547
0.23539430385491178
0.29413488658247555
0.15052274369602037
0.2733399632528597
0.2809545738548631
0.26108709712177713
0.2376007378098346
0.17924654398340412
0.23962725116089878
0.2900844448528854
0.1518729745307601
0.2033300877884967
0.22085948356716667
0.19855604585829667
0.18880006579988518
0.15801266963469152
0.1837438584362867
0.21373358158657618
0.08258400039974889
0.2901326420959123
0.34913587336176954
0.20711259464017706
0.28944333374780173
0.26266508676101585
0.2929317944475005
0.2955870646819738
0.026655486656954164
0.24696492794031943
0.264342596281383
0.204604228415032
0.2688088810377889
0.22481701468213222
0.2594241997324187
0.3186810564748721
0.11110041488973232
0.11109297211403746
0.07720800180775544
0.11978360167856117
0.13569821385206682
0.1418163630666256
0.12405864582514448
0.12962012676835744
0.16207907029836655
0.1797417550813858
0.19745755962632217

0.12255624891826566
0.12255624891826566
0.12255624891826566
0.31127812445913283
0.0
1.0
0.0
1.0
1.0
1.0
1.0
1.0
0.0
0.9709505944546686
0.0
0.01997309402197489
0.01997309402197489
0.4199730940219749
0.01997309402197489
0.3219280948873623
0.29989639116789113
0.19813134764391438
0.19432734946514918
0.24269725634146555
0.18314979888204763
0.18314979888204763
0.29989639116789113
0.1497614407675978
0.0
0.31127812445913294
0.2657121273840981
0.31127812445913294
0.1992035054291632
0.1992035054291632
0.1992035054291632
0.12255624891826589
0.0
0.5
0.5
0.5
0.31127812445913294
1.0
0.31127812445913294
0.5
0.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
0.0
0.5
0.5
0.5
0.8112781244591328
0.8112781244591328
0.8112781244591328
0.8112781244591328
0.0
0.2516291673878229
0.2516291673878229
0.2516291673878229
0.2516291673878229
0.9182958340544896
0.2516291673878229
0.2516291673878229
0.12513349202374746
0.05667986096548061
0.06194345664850953
0.032496698040655136
0.0354943163529744
0.05452073875953367
0.03588947175667778

1.0
1.0
1.0
0.0
0.06764328912522188
0.08171772971358249
0.19988154538918934
0.02660648914316921
0.09903131913013752
0.08563311406560947
0.1646357654020254
0.0
0.06955478165230056
0.0448381015706647
0.09256076973114902
0.03320621934649526
0.03320621934649526
0.25935455038465394
0.09256076973114902
0.0
0.09127744624168022
0.0016177510177042276
0.11774369689072062
0.0058021490143458365
0.034851554559677256
0.0058021490143458365
0.09127744624168022
0.0
0.04879494069539858
0.015712127384097885
0.09235938389499487
0.04879494069539858
0.04879494069539858
0.0032289436203635224
0.0032289436203635224
0.0
0.02024420715375619
0.005977711423774124
0.0
0.12808527889139454
0.02024420715375619
0.02024420715375619
0.12808527889139454
0.0
0.2516291673878229
0.2516291673878229
0.0
0.2516291673878229
0.2516291673878229
0.2516291673878229
0.2516291673878229
0.0
1.0
0.0
0.0
1.0
1.0
1.0
1.0
0.0
0.12255624891826566
0.12255624891826566
0.0
0.31127812445913283
0.31127812445913283
0.31127812445913283
0.311278124

1.0
1.0
1.0
1.0
1.0
0.0
0.0
0.0
0.0
0.31127812445913283
0.31127812445913283
0.0
0.0
0.0
0.2516291673878229
0.2516291673878229
0.0
0.9182958340544896
0.9182958340544896
0.2516291673878229
0.9182958340544896
0.0
0.31127812445913283
0.31127812445913283
0.12255624891826566
0.31127812445913283
0.31127812445913283
0.31127812445913283
0.12255624891826566
0.0
1.0
1.0
0.0
1.0
1.0
1.0
1.0
0.0
0.32227456730312065
0.3222745673031211
0.47612072114927484
0.38034283670031055
0.3458519893382608
0.3458519893382608
0.5341889905464643
0.0
0.5916727785823275
0.5916727785823275
0.5916727785823275
0.5916727785823275
0.5916727785823275
0.5916727785823275
0.5916727785823275
0.0
0.9182958340544894
0.9182958340544894
0.9182958340544894
0.9182958340544894
0.9182958340544894
0.9182958340544894
0.9182958340544894
0.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
0.0
0.31127812445913294
0.31127812445913294
0.0
0.5
0.5
0.8112781244591328
0.31127812445913294
0.0
0.2516291673878229
0.2516291673878229
0.0
0.9182958340544896
0.2516291673

0.0
0.2817040167966778
0.22317781278913928
0.28871004951737733
0.13670993773544282
0.1206498407272405
0.12789199041759014
0.3472362535249167
0.0
0.20000000000000018
0.40000000000000036
0.44643934467101576
0.275488750216347
0.275488750216347
0.3709505944546687
0.47548875021634696
0.0
0.41997309402197514
0.419973094021975
0.7219280948873624
0.41997309402197514
0.41997309402197514
0.419973094021975
0.0
0.0
0.31127812445913283
0.12255624891826566
0.31127812445913283
0.31127812445913283
0.31127812445913283
0.12255624891826566
0.0
0.0
1.0
1.0
1.0
1.0
1.0
1.0
0.0
0.0
0.5709505944546687
0.9709505944546686
0.5709505944546687
0.5709505944546687
0.5709505944546687
0.3219280948873622
0.7219280948873621
0.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
0.0
0.2516291673878229
0.9182958340544896
0.2516291673878229
0.2516291673878229
0.2516291673878229
0.2516291673878229
0.0
0.0
0.612197222702993
0.23588367293490142
0.6121972227029933
0.6849774484867257
0.38997500048077094
0.38997500048077094
0.7642045065086203
0.0
0.5

0.19087450462110933
0.19087450462110933
0.19087450462110933
0.19087450462110933
0.0
0.2516291673878229
0.2516291673878229
0.2516291673878229
0.9182958340544896
0.9182958340544896
0.9182958340544896
0.2516291673878229
0.0
0.20443400292496516
0.20443400292496516
0.20443400292496516
0.0
0.0
0.20443400292496516
0.20443400292496516
0.0
0.17095059445466854
0.17095059445466854
0.5709505944546684
0.5709505944546684
0.5709505944546684
0.17095059445466854
0.17095059445466854
0.0
0.2516291673878229
0.2516291673878229
0.2516291673878229
0.2516291673878229
0.2516291673878229
0.2516291673878229
0.2516291673878229
0.0
0.0
1.0
1.0
1.0
1.0
1.0
1.0
0.0
1.0
1.0
0.0
1.0
1.0
1.0
1.0
0.0
0.2516291673878229
0.9182958340544896
0.2516291673878229
0.9182958340544896
0.9182958340544896
0.2516291673878229
0.2516291673878229
0.0
0.0005847384492674301
0.003793106467631091
0.0005847384492674301
0.031550000913642684
0.04544661495314117
0.0005847384492674301
0.013991599885873285
0.0
0.00358507859030599
0.1212321374138

0.2858798771737143
0.25
0.27439747034769923
0.0
0.01997309402197489
0.17095059445466876
0.0
0.01997309402197489
0.0
0.2490224995673065
0.40000000000000013
0.0
0.01997309402197489
0.4199730940219749
0.01997309402197489
0.01997309402197489
0.01997309402197489
0.9709505944546686
0.4199730940219749
0.0
0.3219280948873623
0.01997309402197489
0.01997309402197489
0.01997309402197489
0.01997309402197489
0.01997309402197489
0.01997309402197489
0.0
0.0
0.31127812445913283
0.12255624891826566
0.31127812445913283
0.31127812445913283
0.12255624891826566
0.12255624891826566
0.0
0.0
1.0
0.0
1.0
1.0
1.0
1.0
0.0
0.5849625007211563
0.25162916738782304
0.5849625007211563
0.9182958340544896
1.0
0.3333333333333335
0.25162916738782304
0.0
0.0
0.2516291673878229
0.2516291673878229
0.2516291673878229
0.2516291673878229
0.2516291673878229
0.2516291673878229
0.0
0.0
1.0
1.0
1.0
1.0
1.0
1.0
0.0
0.9182958340544896
0.2516291673878229
0.9182958340544896
0.9182958340544896
0.2516291673878229
0.2516291673878229
0.251

0.3236640028667539
0.19247615356725056
0.1775234437767188
0.24830070798136195
0.10799377578998737
0.1999532097468424
0.28769835802146115
0.5471223079526166
0.28769835802146115
0.5053760525087285
0.3527771392771224
0.37465487976059153
0.6178100464140881
0.4616114014997219
0.7688538376160006
0.3244093931715559
0.3244093931715559
0.9910760598382227
0.5466316153937782
0.5466316153937782
0.5466316153937782
0.6960736118322679
0.5709505944546684
0.17095059445466854
0.17095059445466854
0.5709505944546684
0.17095059445466854
0.17095059445466854
0.32192809488736196
0.5709505944546684
0.0
0.2516291673878229
0.2516291673878229
0.9182958340544896
0.2516291673878229
0.2516291673878229
0.2516291673878229
0.2516291673878229
0.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
0.31127812445913294
0.5
0.31127812445913294
1.0
1.0
1.0
0.8112781244591328
0.0
1.0
1.0
0.0
1.0
1.0
1.0
1.0
0.35274578087187014
0.45066074636539266
0.2988252450030511
0.2785810378492952
0.26380917388354685
0.35274578087187036
0.12095203102640384
0

0.0
0.2078943480126667
0.12472493404031226
0.16246930914848612
0.36246930914848585
0.1624693091484859
0.2833830982290133
0.2406454496153465
0.0
0.4082857823008299
0.18606356007860736
0.40828578230082946
0.10218717094933338
0.3244093931715555
0.18606356007860736
0.18606356007860736
0.0
0.9709505944546686
0.5709505944546687
0.9709505944546686
0.5709505944546687
0.5709505944546687
0.5709505944546687
0.5709505944546687
0.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
0.0
0.2516291673878229
0.2516291673878229
0.9182958340544896
0.2516291673878229
0.2516291673878229
0.2516291673878229
0.2516291673878229
0.0
0.12255624891826566
0.31127812445913283
0.12255624891826566
0.31127812445913283
0.12255624891826566
0.31127812445913283
0.31127812445913283
0.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
0.0
0.19432734946514918
0.19813134764391416
0.19813134764391416
0.24269725634146555
0.19813134764391416
0.19813134764391416
0.19813134764391438
0.0
0.37744375108173434
0.4591479170272448
0.2516291673878228
0.37744375108173434
0.33333333

0.08201610046687735
0.10623427588160927
0.1595597021874191
0.05324221123472439
0.054611410832265506
0.09813287463099041
0.07715002627329914
0.04421237813163881
0.13464999549833534
0.1410669268961553
0.15391092115708993
0.19377179570751224
0.08538899598582983
0.07044832664923106
0.12850134325546225
0.09471163941243965
0.3158890705194817
0.06425533366039216
0.16737205304970448
0.09982152813941525
0.08980925721197774
0.24181499644540771
0.10775573918167458
0.18182981325574876
0.11007306515453141
0.057918886645990764
0.15058006239993116
0.17497753274763062
0.057918886645990764
0.2710382325345573
0.15563906222956647
0.11007306515453141
0.19813134764391416
0.19813134764391416
0.19432734946514874
0.44477166784364597
0.19813134764391416
0.19813134764391416
0.24269725634146555
0.1497614407675978
0.3059584928680419
0.3059584928680419
0.4695652111147073
0.30595849286804233
0.1838509254004217
0.3059584928680419
0.5916727785823277
0.3059584928680419
0.2516291673878229
0.2516291673878229
0.0
0.25162

0.34436093777043353
0.5487949406953985
0.34436093777043353
0.29879494069539847
0.34436093777043353
0.0
0.9182958340544894
0.9182958340544894
0.9182958340544894
0.9182958340544894
0.9182958340544894
0.9182958340544894
0.9182958340544894
0.0
0.0
1.0
1.0
1.0
1.0
1.0
1.0
0.0
0.4199730940219749
0.01997309402197489
0.01997309402197489
0.01997309402197489
0.01997309402197489
0.01997309402197489
0.01997309402197489
0.0
0.0
0.2516291673878229
0.2516291673878229
0.2516291673878229
0.2516291673878229
0.2516291673878229
0.2516291673878229
0.0
0.0
1.0
1.0
0.0
1.0
1.0
1.0
0.0
0.19555334819207637
0.7493221814795321
0.26420629790593075
0.7838130288415819
0.6299668749954281
0.5674354140487416
0.3835616043900347
0.41805245175208494
0.19811742113040376
0.9852281360342517
0.3059584928680419
0.1838509254004217
0.9852281360342517
0.4695652111147073
0.3059584928680419
0.3059584928680419
0.0
1.0
0.5
0.31127812445913294
0.5
0.5
0.5
0.31127812445913294
0.0
1.0
1.0
0.0
1.0
1.0
1.0
0.0
0.31668908831502063
0.65002

0.31127812445913283
0.0
0.0
1.0
1.0
1.0
1.0
1.0
1.0
0.0
0.5
0.5
1.0
0.5
0.31127812445913294
1.0
0.5
0.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
0.0
0.11235877274683936
0.17234735777250876
0.2075968679849347
0.24829939047414173
0.1530612952360464
0.1530612952360464
0.3256464505760148
0.0
0.08362813147086334
0.12448021444361879
0.29248125036057804
0.5207208396239078
0.5207208396239078
0.5932846232303122
0.29114688111028575
0.0
0.04879494069539858
0.0032289436203635224
0.46691718668869947
0.34758988139079716
0.20443400292496505
0.0032289436203635224
0.04879494069539858
0.0
0.19087450462110933
0.10917033867559889
0.10917033867559889
0.19087450462110933
0.10917033867559889
0.10917033867559889
0.3166890883150208
0.0
1.0
1.0
0.0
1.0
1.0
1.0
1.0
0.0
0.5
0.31127812445913294
0.8112781244591328
0.8112781244591328
0.8112781244591328
0.8112781244591328
0.31127812445913294
0.0
0.2516291673878229
0.2516291673878229
0.2516291673878229
0.2516291673878229
0.9182958340544896
0.2516291673878229
0.2516291673878229
0.0

0.10218717094933316
0.5709505944546684
0.5709505944546684
0.9709505944546685
0.9709505944546685
0.9709505944546685
0.9709505944546685
0.17095059445466854
0.17095059445466854
0.9182958340544896
0.9182958340544896
0.2516291673878229
0.2516291673878229
0.9182958340544896
0.9182958340544896
0.2516291673878229
0.2516291673878229
0.109170338675599
0.0
0.9182958340544896
0.2516291673878229
0.0
0.044110417748401076
0.2516291673878229
0.4591479170272448
0.11196601106639914
0.2426871838145359
0.16341330822429168
0.11856748936786876
0.14422359657582273
0.12202746127974606
0.1397029845727995
0.14422359657582273
0.18905266854301606
0.15356543894636276
0.6840384356390417
0.07205662510638433
0.3022447138008828
0.07205662510638433
0.3022447138008828
0.007234486724834621
0.007214618474517209
0.1566566149130071
0.14091112046008591
0.07278022578373289
0.36778267378968765
0.07278022578373289
0.36778267378968765
0.007214618474517209
0.17095059445466865
0.17095059445466865
0.3219280948873623
0.3219280948873

0.5469585496803915
0.36947569914300704
0.4180245676273584
0.24366730306073414
0.43721025060632046
0.3127614097326217
0.1589493833535336
0.45443400292496516
0.29879494069539847
0.40563906222956647
0.31127812445913294
0.29879494069539847
0.5612781244591329
0.5487949406953985
0.29879494069539847
0.33333333333333326
0.4591479170272448
0.10917033867559889
0.2516291673878228
0.37744375108173434
0.33333333333333326
0.4591479170272448
0.33333333333333326
0.9182958340544894
0.0
0.0
0.9182958340544894
0.9182958340544894
0.9182958340544894
0.9182958340544894
0.9182958340544894
0.0
0.0
0.0
1.0
1.0
1.0
1.0
1.0
0.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
0.14976144076759734
0.5617677112802779
0.37994952946209626
0.4130884364257579
0.400205759146095
0.14976144076759734
0.5617677112802779
0.3315796225857799
0.25162916738782304
0.0
0.3333333333333335
0.5849625007211563
0.3333333333333335
0.3333333333333335
1.0
0.5849625007211563
0.2516291673878229
0.0
0.2516291673878229
0.2516291673878229
0.2516291673878229
0.2516

0.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
0.12255624891826566
0.12255624891826566
0.12255624891826566
0.31127812445913283
0.31127812445913283
0.31127812445913283
0.12255624891826566
0.31127812445913283
0.0
1.0
1.0
0.0
1.0
1.0
1.0
1.0
0.43552050428266686
0.27137503526137663
0.5900048960119098
0.27924778248783766
0.22943684069673975
0.22222222222222232
0.2881717226496152
0.2024395456667467
0.5612781244591329
0.29879494069539847
0.29879494069539847
0.25
0.45443400292496516
0.5
0.5
0.20443400292496494
0.0
0.10917033867559889
0.37744375108173434
0.33333333333333326
0.2516291673878228
0.33333333333333326
0.33333333333333326
0.2516291673878228
0.0
0.12255624891826566
0.12255624891826566
0.31127812445913283
0.31127812445913283
0.12255624891826566
0.31127812445913283
0.31127812445913283
0.0
1.0
0.0
1.0
1.0
1.0
1.0
1.0
0.0
1.0
0.0
1.0
1.0
1.0
1.0
1.0
0.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
0.17095059445466865
0.2364527976600279
0.3219280948873622
0.3219280948873622
0.19999999999999996
0.20740339211469672
0.236452

0.9182958340544894
0.9182958340544894
0.9182958340544894
0.9182958340544894
0.9182958340544894
0.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
0.419973094021975
0.419973094021975
0.419973094021975
0.419973094021975
0.41997309402197514
0.41997309402197514
0.17095059445466854
0.41997309402197514
0.9182958340544894
0.9182958340544894
0.9182958340544894
0.9182958340544894
0.9182958340544894
0.9182958340544894
0.9182958340544894
0.9182958340544894
0.0
0.0
0.0
0.0
1.0
1.0
1.0
1.0
0.16301505333797994
0.2701625944992907
0.22639794349028453
0.23734139875305305
0.32411928545045887
0.2390281198143418
0.27801059599867894
0.24997157507711032
0.4085114273808972
0.4416823878601943
0.3195748203925737
0.2988252450030511
0.5935178892225355
0.4564542518259431
0.5453908588142662
0.4416823878601943
0.7219280948873624
0.419973094021975
0.7219280948873624
0.419973094021975
0.419973094021975
0.419973094021975
0.9709505944546687
0.419973094021975
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
0.38997500048077094
0.6849774484867257
0.6121972

0.699513850319966
0.0
0.9182958340544894
0.9182958340544894
0.9182958340544894
0.9182958340544894
0.9182958340544894
0.9182958340544894
0.9182958340544894
0.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
0.0
0.8112781244591329
1.0
1.0
1.0
0.8112781244591329
1.0
0.8112781244591329
0.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
0.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
0.31668908831502085
0.5408520829727552
0.12581458369391152
0.4591479170272448
0.12581458369391152
1.0
0.4591479170272448
0.20751874963942196
0.2516291673878229
0.9182958340544896
0.2516291673878229
0.9182958340544896
0.2516291673878229
0.2516291673878229
0.2516291673878229
0.2516291673878229
0.07265052900800262
0.28456495225134626
0.28456495225134626
0.40392025873545023
0.3803428367003101
0.4384111060975
0.3803428367003101
0.2264966828541568
0.30595849286804144
0.18385092540042125
0.6995138503199656
0.9852281360342513
0.30595849286804144
0.5916727785823273
0.30595849286804144
0.5916727785823273
0.5
0.31127812445913294
0.5
1.0
0.5
1.0
0.5
0.8112781244591328
1.0
0.0

0.0923405259821024
0.17270758383651552
0.18677347311103443
0.05030645830252478
0.11143306662301855
0.035907013097200124
0.20208541211317987
0.09463762990369506
0.1036450430422664
0.14973450180972114
0.24324132902070295
0.11218936953209213
0.06138599666235778
0.061458590819218895
0.0765746623540362
0.15990799568736946
0.18787138895285493
0.2565243386667093
0.3726608774610891
0.21881472361493537
0.21881472361493537
0.21881472361493515
0.16074645421774525
0.2565243386667093
0.2916919971380594
0.6995138503199654
0.0
0.6995138503199654
0.6995138503199654
0.12808527889139398
0.6995138503199654
0.8631205685666308
0.32192809488736196
0.32192809488736196
0.0
0.5709505944546684
0.32192809488736196
0.17095059445466854
0.17095059445466854
0.17095059445466854
0.2516291673878229
0.9182958340544896
0.0
0.9182958340544896
0.9182958340544896
0.2516291673878229
0.9182958340544896
0.2516291673878229
0.0
1.0
0.0
0.0
1.0
1.0
1.0
1.0
0.19087450462110944
0.0
0.4591479170272448
1.0
0.08170416594551044
0.08170

0.0
0.40563906222956647
0.75
0.29356444319959674
0.6556390622295665
0.6556390622295665
0.15563906222956647
0.6100730651545316
0.0
0.31127812445913294
0.31127812445913294
0.31127812445913294
1.0
1.0
0.5
1.0
0.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
0.0
0.31127812445913283
0.31127812445913283
0.31127812445913283
0.31127812445913283
0.31127812445913283
0.12255624891826566
0.12255624891826566
0.0
0.0
1.0
1.0
1.0
1.0
1.0
1.0
0.032902793300143784
0.15493901967543833
0.07963021125120928
0.08960068188917525
0.07008950197730002
0.04288715048844294
0.09953038108792267
0.14356089992632004
0.10994105590724867
0.4963544570463365
0.26993124265759993
0.20120940047662472
0.5604010269485467
0.22455344788962828
0.2736360083617351
0.40482112751237675
0.09209350450389087
0.33817003009903956
0.16074645421774525
0.2188147236149356
0.4339479145480034
0.16074645421774525
0.2188147236149356
0.37266087746108933
0.07290559532005603
0.17095059445466865
0.17095059445466865
0.3219280948873623
0.17095059445466865
0.3219280948

0.01997309402197489
0.4199730940219749
0.01997309402197489
0.01997309402197489
0.01997309402197489
0.01997309402197489
0.01997309402197489
0.2516291673878229
0.2516291673878229
0.2516291673878229
0.2516291673878229
0.2516291673878229
0.2516291673878229
0.2516291673878229
0.2516291673878229
0.0
1.0
0.0
1.0
1.0
1.0
1.0
1.0
0.6849774484867257
0.38997500048077094
0.38997500048077094
0.4627552262645036
0.16775277825854884
0.4627552262645036
0.6849774484867257
0.4627552262645036
0.0
0.0
0.31127812445913283
0.0
0.0
0.0
0.31127812445913283
1.0
0.7219280948873621
0.9709505944546687
0.5709505944546687
0.9709505944546687
0.5709505944546687
0.5709505944546687
0.9709505944546686
0.5709505944546687
0.9182958340544894
0.9182958340544894
0.9182958340544894
0.9182958340544894
0.9182958340544894
0.9182958340544894
0.9182958340544894
0.9182958340544894
0.0
0.0
0.0
1.0
1.0
1.0
1.0
0.0
0.05990091585967994
0.05205572004659187
0.04437419995422287
0.09889596963502356
0.10662352063736558
0.041619733235557455
0

1.0
1.0
1.0
1.0
1.0
1.0
0.1209520310264034
0.21287749372661402
0.27094242174853744
0.24305959849402403
0.24305959849402403
0.24305959849402403
0.3948950998563656
0.531762739920197
0.09235938389499498
0.2935644431995964
0.13792538097003
0.19920350542916282
0.13792538097003
0.13792538097003
0.13792538097003
0.13792538097003
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
0.1908745046211091
0.45914791702724456
0.9182958340544893
0.45914791702724456
0.45914791702724456
0.45914791702724456
0.6500224216483539
0.9182958340544893
0.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
0.12255624891826566
0.31127812445913283
0.31127812445913283
0.12255624891826566
0.31127812445913283
0.12255624891826566
0.8112781244591328
0.8112781244591328
0.17124922156807054
0.13276511682997283
0.15240145576798403
0.18372135776427756
0.16426637118651355
0.17385381729014204
0.17396090168607836
0.10418675187300197
0.23747753274763017
0.1393988504091137
0.15138729830502173
0.26852607588879973
0.24283772741246334
0.2776949226789722
0.27950809424595535


0.30595849286804144
0.30595849286804144
0.9852281360342513
0.30595849286804144
0.30595849286804144
0.5916727785823273
0.30595849286804144
0.46956521111470684
0.31127812445913294
0.31127812445913294
0.5
0.8112781244591328
0.5
0.5
0.5
0.8112781244591328
0.2516291673878229
0.2516291673878229
0.2516291673878229
0.2516291673878229
0.2516291673878229
0.2516291673878229
0.2516291673878229
0.9182958340544896
0.20340156491092998
0.1137162687307236
0.18397612148223663
0.11747125320500018
0.24204350825199805
0.13694469841603274
0.16354676509937294
0.19734274996713053
0.24964572464644386
0.1326105357175562
0.19813241605859488
0.10657993219467743
0.3411982085103613
0.24451096667743633
0.31382131154614257
0.22226882768222467
0.5133978044039615
0.3799495294620967
0.4130884364257579
0.37994952946209626
0.5617677112802784
0.74358589309846
0.5949066182439395
0.6952159862221436
0.9182958340544893
0.6666666666666665
0.6666666666666665
0.6666666666666665
0.9999999999999998
0.6666666666666665
0.918295834054

0.0975738379169857
0.08241151111398093
0.05521903055015054
0.07445837567275859
0.1295379962424139
0.07780867536204727
0.1005578259780524
0.09322431309967705
0.0765414483837743
0.11846916551797948
0.07075255547519133
0.11499244333691339
0.14970088050531993
0.16247160590105603
0.12262866651057402
0.16299037033323804
0.023749182048527828
0.14818821115892722
0.07749763780570129
0.07077736549524438
0.12326327116704339
0.05752804128786426
0.07342968785929349
0.021968202190609354
0.11435134322143092
0.11435134322143092
0.12952159992020285
0.019113247983335357
0.4407681514249233
0.2909944834379399
0.07840431930888458
0.07697859764590564
0.1295052110835988
0.3113233929017807
0.4485756660224114
0.2629534860254643
0.08493930238604763
0.2629534860254643
0.3113233929017807
0.12715688845060713
0.3219280948873623
0.01997309402197489
0.01997309402197489
0.01997309402197489
0.01997309402197489
0.01997309402197489
0.01997309402197489
0.3219280948873623
0.8112781244591328
0.31127812445913283
0.3112781244

0.5916727785823275
0.31127812445913283
0.12255624891826566
0.31127812445913283
0.31127812445913283
0.12255624891826566
0.12255624891826566
0.31127812445913283
0.31127812445913283
0.0
0.0
1.0
1.0
1.0
1.0
1.0
1.0
0.0
0.9182958340544894
0.9182958340544894
0.9182958340544894
0.9182958340544894
0.9182958340544894
0.9182958340544894
0.9182958340544894
0.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
0.23777146126924098
0.3598790287368612
0.16310135001089954
0.3207303568337907
0.5714285714285718
0.5714285714285716
0.3149368513732407
0.43454913999520284
0.3059584928680419
0.699513850319966
0.1838509254004217
0.3059584928680419
0.9852281360342517
0.5916727785823277
0.9852281360342517
0.1838509254004217
0.2516291673878229
0.2516291673878229
0.2516291673878229
0.2516291673878229
0.2516291673878229
0.2516291673878229
0.2516291673878229
0.2516291673878229
0.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
0.31127812445913283
0.31127812445913283
0.31127812445913283
0.31127812445913283
0.31127812445913283
0.31127812445913283
0.31127812

0.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
0.31127812445913283
0.31127812445913283
0.31127812445913283
0.31127812445913283
0.31127812445913283
0.31127812445913283
0.8112781244591328
0.31127812445913283
0.19704399805002337
0.31320208998448584
0.19704399805002337
0.29114688111028575
0.29114688111028575
0.29114688111028575
0.19704399805002337
0.3540541729572413
0.46956521111470684
0.577406282852345
0.46956521111470684
0.5916727785823273
0.30595849286804144
0.5916727785823273
0.5916727785823273
0.18385092540042125
0.0
0.2516291673878229
0.9182958340544896
0.2516291673878229
0.2516291673878229
0.2516291673878229
0.9182958340544896
0.2516291673878229
0.5
0.8112781244591328
0.31127812445913294
0.5
0.5
0.5
0.5
0.31127812445913294
0.2516291673878229
0.9182958340544896
0.2516291673878229
0.2516291673878229
0.9182958340544896
0.9182958340544896
0.9182958340544896
0.2516291673878229
0.3219280948873623
0.17095059445466865
0.3219280948873623
0.17095059445466865
0.17095059445466865
0.17095059445466865
0.0729055

0.15604519526230476
0.19179633831433618
0.2596837011791151
0.14394264024183157
0.2973637142241694
0.1192184621870358
0.12887916903138885
0.1161646995479062
0.18559345844177488
0.22142785744264315
0.35036183949567623
0.25697039155792156
0.4180245676273586
0.17095059445466876
0.12451124978365335
0.12451124978365335
0.2954618442383219
0.3709505944546685
0.5567796494470394
0.3999999999999999
0.6954618442383218
0.044110417748401076
0.0
0.0
0.0
0.044110417748401076
0.2516291673878229
0.4591479170272448
0.2516291673878229
0.2516291673878229
0.9182958340544896
0.2516291673878229
0.2516291673878229
0.2516291673878229
0.9182958340544896
0.2516291673878229
0.9182958340544896
0.5
0.8112781244591328
0.31127812445913294
0.8112781244591328
0.8112781244591328
1.0
1.0
0.8112781244591328
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
0.06991005367674302
0.14269027946047563
0.10218717094933333
0.14269027946047563
0.10218717094933333
0.10218717094933333
0.06991005367674302
0.06991005367674302
0.12255624891826566
0.31127

1.0
1.0
1.0
1.0
1.0
1.0
0.0
0.2516291673878229
0.2516291673878229
0.2516291673878229
0.9182958340544896
0.9182958340544896
0.9182958340544896
0.9182958340544896
0.0
0.5
0.5
0.5
0.31127812445913294
0.31127812445913294
1.0
0.5
0.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
0.9182958340544896
1.0
0.9182958340544896
1.0
0.9182958340544896
1.0
0.0
0.9182958340544894
0.9182958340544894
0.9182958340544894
0.9182958340544894
0.9182958340544894
0.9182958340544894
0.9182958340544894
0.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
0.0
0.9182958340544894
0.9182958340544894
0.9182958340544894
0.9182958340544894
0.9182958340544894
0.9182958340544894
0.9182958340544894
0.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
0.6887218755408671
0.29114688111028575
0.29114688111028575
0.2704260414863775
0.29114688111028575
0.4591479170272448
0.12448021444361879
0.44541483066220056
0.0
0.2657121273840979
0.2657121273840979
0.2500000000000001
0.20443400292496494
0.20443400292496494
0.2657121273840979
0.4669171866886994
0.0
0.9182958340544894
0.9182958

In [16]:
def printer(n):
    print(n.data)
    for child in n.children:
        if child.children != None:
            print(child.data)
            print(child.category)
            printer(child)        

In [17]:
printer(n)

0.24035089399744528
0.13220343137254903
7
0.13220343137254903
0.2765394242803504
2
0.2765394242803504
0.22159663865546197
2
0.22159663865546197
0.024089403973509907
7
0.024089403973509907
0.018020833333333323
5
0.018020833333333323
0.008675675675675681
7
0.008675675675675681
0.007176470588235295
5
0.007176470588235295
0.10222222222222223
2
0.10222222222222223
0.11833333333333335
1
0.11833333333333335
0    1
dtype: int64
None
0    1
dtype: int64
0.14
1
0.14
0    3
dtype: int64
None
0    3
dtype: int64
0    2
dtype: int64
None
0    2
dtype: int64
0.005916666666666666
5
0.005916666666666666
1    4
dtype: int64
None
1    4
dtype: int64
2.3333333333333335
0
2.3333333333333335
0    3
dtype: int64
None
0    3
dtype: int64
0.11499999999999999
2
0.11499999999999999
0    4
dtype: int64
None
0    4
dtype: int64
0    3
dtype: int64
None
0    3
dtype: int64
0.0065625
7
0.0065625
0    4
dtype: int64
None
0    4
dtype: int64
0.12375
2
0.12375
0    5
dtype: int64
None
0    5
dtype: int64
0.04500000000

0.3125
0    7
dtype: int64
None
0    7
dtype: int64
0    13
dtype: int64
None
0    13
dtype: int64
0    6
dtype: int64
None
0    6
dtype: int64
0.08131578947368422
3
0.08131578947368422
0.04195
6
0.04195
0.07549999999999998
3
0.07549999999999998
0.074375
3
0.074375
0    7
dtype: int64
None
0    7
dtype: int64
0.17614285714285713
4
0.17614285714285713
0.3333333333333333
1
0.3333333333333333
0.325
1
0.325
0    7
dtype: int64
None
0    7
dtype: int64
0    6
dtype: int64
None
0    6
dtype: int64
0    6
dtype: int64
None
0    6
dtype: int64
0.18175
4
0.18175
0.2525
2
0.2525
0    7
dtype: int64
None
0    7
dtype: int64
0    6
dtype: int64
None
0    6
dtype: int64
1    7
dtype: int64
None
1    7
dtype: int64
0    7
dtype: int64
None
0    7
dtype: int64
0.33
1
0.33
0.315
1
0.315
0    5
dtype: int64
None
0    5
dtype: int64
0.245
2
0.245
0    6
dtype: int64
None
0    6
dtype: int64
0    5
dtype: int64
None
0    5
dtype: int64
0.25166666666666665
2
0.25166666666666665
0.33999999999999997
1
0.339

0.42
0    9
dtype: int64
None
0    9
dtype: int64
0    10
dtype: int64
None
0    10
dtype: int64
0.42333333333333334
1
0.42333333333333334
0    12
dtype: int64
None
0    12
dtype: int64
0.4325
1
0.4325
0    11
dtype: int64
None
0    11
dtype: int64
0    8
dtype: int64
None
0    8
dtype: int64
0.07491666666666667
6
0.07491666666666667
1    9
dtype: int64
None
1    9
dtype: int64
0.4275
1
0.4275
0.4125
1
0.4125
0    10
dtype: int64
None
0    10
dtype: int64
0    12
dtype: int64
None
0    12
dtype: int64
0.4425
1
0.4425
0    9
dtype: int64
None
0    9
dtype: int64
0    12
dtype: int64
None
0    12
dtype: int64
0.11988709677419357
7
0.11988709677419357
0.4052083333333332
4
0.4052083333333332
0.10833333333333334
3
0.10833333333333334
0.4
1
0.4
0    16
dtype: int64
None
0    16
dtype: int64
0    9
dtype: int64
None
0    9
dtype: int64
0.1125
3
0.1125
1    8
dtype: int64
None
1    8
dtype: int64
1    11
dtype: int64
None
1    11
dtype: int64
0.19441666666666668
5
0.19441666666666668
0.44
1
0.

0.24483597285067873
2.1875
0
2.1875
0.12511538461538466
3
0.12511538461538466
0.4613924050632914
1
0.4613924050632914
1.5227272727272727
0
1.5227272727272727
0.1504285714285714
7
0.1504285714285714
0.09650000000000002
6
0.09650000000000002
0.4144
4
0.4144
0.435
1
0.435
0.4275
1
0.4275
0    9
dtype: int64
None
0    9
dtype: int64
0    10
dtype: int64
None
0    10
dtype: int64
0    9
dtype: int64
None
0    9
dtype: int64
0    8
dtype: int64
None
0    8
dtype: int64
0.45166666666666666
4
0.45166666666666666
0.13975
7
0.13975
0.4525
1
0.4525
0    7
dtype: int64
None
0    7
dtype: int64
0    12
dtype: int64
None
0    12
dtype: int64
1    11
dtype: int64
None
1    11
dtype: int64
0.4525
1
0.4525
0    8
dtype: int64
None
0    8
dtype: int64
0    15
dtype: int64
None
0    15
dtype: int64
0.16150000000000003
7
0.16150000000000003
0.4516666666666667
1
0.4516666666666667
2    10
dtype: int64
None
2    10
dtype: int64
0.4583333333333333
1
0.4583333333333333
0    11
dtype: int64
None
0    11
dtype:

dtype: int64
None
1    9
dtype: int64
0.24020000000000002
5
0.24020000000000002
2    8
dtype: int64
None
2    8
dtype: int64
0.5225
1
0.5225
0    9
dtype: int64
None
0    9
dtype: int64
0    8
dtype: int64
None
0    8
dtype: int64
0.1646881188118812
7
0.1646881188118812
2.0454545454545454
0
2.0454545454545454
0.149625
7
0.149625
0.4877083333333334
1
0.4877083333333334
0.31675000000000003
5
0.31675000000000003
1.2
0
1.2
0.135875
6
0.135875
2    7
dtype: int64
None
2    7
dtype: int64
0    9
dtype: int64
None
0    9
dtype: int64
0    8
dtype: int64
None
0    8
dtype: int64
3    8
dtype: int64
None
3    8
dtype: int64
0.12285714285714287
3
0.12285714285714287
0.5984999999999999
4
0.5984999999999999
0.3833333333333333
2
0.3833333333333333
2    7
dtype: int64
None
2    7
dtype: int64
0.5033333333333333
1
0.5033333333333333
0    9
dtype: int64
None
0    9
dtype: int64
0    8
dtype: int64
None
0    8
dtype: int64
0    9
dtype: int64
None
0    9
dtype: int64
0.14049999999999999
6
0.14049999999

dtype: int64
0    10
dtype: int64
None
0    10
dtype: int64
0.6948333333333333
4
0.6948333333333333
0    12
dtype: int64
None
0    12
dtype: int64
0    14
dtype: int64
None
0    14
dtype: int64
0.416923076923077
2
0.416923076923077
0.53125
1
0.53125
0.13125
3
0.13125
0    10
dtype: int64
None
0    10
dtype: int64
0.515
1
0.515
0    15
dtype: int64
None
0    15
dtype: int64
0    11
dtype: int64
None
0    11
dtype: int64
0.684625
4
0.684625
0.12
3
0.12
0    9
dtype: int64
None
0    9
dtype: int64
0    10
dtype: int64
None
0    10
dtype: int64
0.54
1
0.54
0    9
dtype: int64
None
0    9
dtype: int64
0    11
dtype: int64
None
0    11
dtype: int64
0.518
1
0.518
1    9
dtype: int64
None
1    9
dtype: int64
1.5
0
1.5
0    12
dtype: int64
None
0    12
dtype: int64
0    8
dtype: int64
None
0    8
dtype: int64
0.15447727272727274
6
0.15447727272727274
0.13959090909090907
6
0.13959090909090907
0.1284
6
0.1284
0    12
dtype: int64
None
0    12
dtype: int64
0.49833333333333335
1
0.49833333333333335

0    10
dtype: int64
0.4533333333333333
2
0.4533333333333333
1    11
dtype: int64
None
1    11
dtype: int64
0    10
dtype: int64
None
0    10
dtype: int64
2.125
0
2.125
1.0135714285714286
4
1.0135714285714286
0.5708333333333333
1
0.5708333333333333
0.54
1
0.54
0    8
dtype: int64
None
0    8
dtype: int64
0    10
dtype: int64
None
0    10
dtype: int64
0.12625
3
0.12625
0    12
dtype: int64
None
0    12
dtype: int64
1    8
dtype: int64
None
1    8
dtype: int64
0    12
dtype: int64
None
0    12
dtype: int64
0    10
dtype: int64
None
0    10
dtype: int64
0.4443500000000001
5
0.4443500000000001
0.569655172413793
1
0.569655172413793
0.15269230769230768
3
0.15269230769230768
0.55
1
0.55
0.44875
2
0.44875
0.535
1
0.535
0    12
dtype: int64
None
0    12
dtype: int64
0    9
dtype: int64
None
0    9
dtype: int64
1    8
dtype: int64
None
1    8
dtype: int64
0.8375
4
0.8375
2.0
0
2.0
0    8
dtype: int64
None
0    8
dtype: int64
0    10
dtype: int64
None
0    10
dtype: int64
0    8
dtype: int64
None

6
0.19569999999999999
1    11
dtype: int64
None
1    11
dtype: int64
0.5733333333333333
1
0.5733333333333333
0    11
dtype: int64
None
0    11
dtype: int64
0    9
dtype: int64
None
0    9
dtype: int64
0    15
dtype: int64
None
0    15
dtype: int64
0.168125
3
0.168125
0.305
7
0.305
0.56875
1
0.56875
0    13
dtype: int64
None
0    13
dtype: int64
0.5825
1
0.5825
0    17
dtype: int64
None
0    17
dtype: int64
0    13
dtype: int64
None
0    13
dtype: int64
0    10
dtype: int64
None
0    10
dtype: int64
0.575
1
0.575
0.57
1
0.57
0    20
dtype: int64
None
0    20
dtype: int64
0    12
dtype: int64
None
0    12
dtype: int64
0    10
dtype: int64
None
0    10
dtype: int64
0.2867352941176471
7
0.2867352941176471
0.19283333333333333
6
0.19283333333333333
0.571
1
0.571
0.8178333333333333
4
0.8178333333333333
0    12
dtype: int64
None
0    12
dtype: int64
0    9
dtype: int64
None
0    9
dtype: int64
0.5874999999999999
1
0.5874999999999999
0    12
dtype: int64
None
0    12
dtype: int64
0    11
dtype:

dtype: int64
0    8
dtype: int64
None
0    8
dtype: int64
0.615
1
0.615
0    10
dtype: int64
None
0    10
dtype: int64
0    8
dtype: int64
None
0    8
dtype: int64
2.5
0
2.5
0    11
dtype: int64
None
0    11
dtype: int64
0    10
dtype: int64
None
0    10
dtype: int64
0.14765306122448982
3
0.14765306122448982
0.5596428571428571
5
0.5596428571428571
0.48181818181818187
2
0.48181818181818187
1.2
0
1.2
0.5428750000000001
5
0.5428750000000001
0    8
dtype: int64
None
0    8
dtype: int64
1    9
dtype: int64
None
1    9
dtype: int64
0    8
dtype: int64
None
0    8
dtype: int64
0.6316666666666667
1
0.6316666666666667
1.75
0
1.75
0    10
dtype: int64
None
0    10
dtype: int64
0    9
dtype: int64
None
0    9
dtype: int64
0.6475
1
0.6475
0    11
dtype: int64
None
0    11
dtype: int64
0    12
dtype: int64
None
0    12
dtype: int64
0.20883333333333334
6
0.20883333333333334
1    8
dtype: int64
None
1    8
dtype: int64
0    14
dtype: int64
None
0    14
dtype: int64
0.2476428571428571
6
0.247642857142

0.4709999999999999
2
0.4709999999999999
0.6016666666666667
1
0.6016666666666667
0    11
dtype: int64
None
0    11
dtype: int64
0.6125
1
0.6125
0    10
dtype: int64
None
0    10
dtype: int64
0    11
dtype: int64
None
0    11
dtype: int64
1    10
dtype: int64
None
1    10
dtype: int64
0.6425
1
0.6425
0    14
dtype: int64
None
0    14
dtype: int64
0.6549999999999999
1
0.6549999999999999
0    9
dtype: int64
None
0    9
dtype: int64
0    11
dtype: int64
None
0    11
dtype: int64
2.1666666666666665
0
2.1666666666666665
0.31823333333333337
7
0.31823333333333337
0.6178571428571429
1
0.6178571428571429
0.5775
1
0.5775
0    9
dtype: int64
None
0    9
dtype: int64
0    13
dtype: int64
None
0    13
dtype: int64
4    9
dtype: int64
None
4    9
dtype: int64
1.2086875
4
1.2086875
0.6266666666666666
1
0.6266666666666666
0    10
dtype: int64
None
0    10
dtype: int64
0    8
dtype: int64
None
0    8
dtype: int64
0.16299999999999998
3
0.16299999999999998
0.605
1
0.605
0.4825
2
0.4825
0    8
dtype: int64


dtype: int64
0.5116666666666667
2
0.5116666666666667
0.5025
2
0.5025
0    13
dtype: int64
None
0    13
dtype: int64
0    10
dtype: int64
None
0    10
dtype: int64
0    8
dtype: int64
None
0    8
dtype: int64
1    12
dtype: int64
None
1    12
dtype: int64
0.6117450980392156
5
0.6117450980392156
0.33934782608695657
6
0.33934782608695657
0.31300000000000006
6
0.31300000000000006
1.4415714285714285
4
1.4415714285714285
0.39449999999999996
7
0.39449999999999996
0    15
dtype: int64
None
0    15
dtype: int64
1    12
dtype: int64
None
1    12
dtype: int64
0.6383333333333333
1
0.6383333333333333
0.5025
2
0.5025
0    15
dtype: int64
None
0    15
dtype: int64
0    20
dtype: int64
None
0    20
dtype: int64
0    18
dtype: int64
None
0    18
dtype: int64
0.4170714285714286
7
0.4170714285714286
1.6666666666666667
0
1.6666666666666667
0    19
dtype: int64
None
0    19
dtype: int64
0.6525000000000001
1
0.6525000000000001
0    12
dtype: int64
None
0    12
dtype: int64
0    10
dtype: int64
None
0    10


0.46453125
0.7942142857142857
5
0.7942142857142857
0    11
dtype: int64
None
0    11
dtype: int64
1.3333333333333333
0
1.3333333333333333
0.72
1
0.72
0    9
dtype: int64
None
0    9
dtype: int64
0    10
dtype: int64
None
0    10
dtype: int64
0    11
dtype: int64
None
0    11
dtype: int64
0.5772222222222221
2
0.5772222222222221
1.9576250000000002
4
1.9576250000000002
0    17
dtype: int64
None
0    17
dtype: int64
2    12
dtype: int64
None
2    12
dtype: int64
0.19999999999999998
3
0.19999999999999998
1.6666666666666667
0
1.6666666666666667
0    12
dtype: int64
None
0    12
dtype: int64
0.695
1
0.695
0    10
dtype: int64
None
0    10
dtype: int64
0    29
dtype: int64
None
0    29
dtype: int64
1    11
dtype: int64
None
1    11
dtype: int64
0.569557142857143
7
0.569557142857143
0.42217187500000003
6
0.42217187500000003
0.5107916666666668
7
0.5107916666666668
0.698913043478261
1
0.698913043478261
1.77975
4
1.77975
0.5435714285714286
2
0.5435714285714286
0.6816666666666666
1
0.68166666666666

In [34]:
def predict_row(x,node):
    while len(x) > 0:
        while node.isLeaf == False: 
            attr_var = x[node.category] 
            if attr_var <= node.data:
                child_node = node.children[0]
            elif attr_var > node.data:
                child_node = node.children[1]
            if child_node.isLeaf: 
                return child_node.data.values.item()
            node = child_node

In [35]:
def predict(X, node):
    counter = 0
    copy = X.copy()
    result = {}
    for i in X.values: 
        result[counter] = (predict_row(i,node))
        counter += 1
    return result

In [36]:
pred = predict(X_test, n)

In [39]:
def accuracy(y_true, y_pred): 
        result = 0
        for idx, y_ in enumerate(y_pred.values()): 
            if y_ == y_true.values[idx]: result+=1
        return (result/len(y_true))


In [40]:
accuracy(y_test, pred)

0.19808612440191387

In [53]:
from sklearn import tree
clf = tree.DecisionTreeClassifier()

In [54]:
clf.fit(X_train, y_train)

DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=None,
            splitter='best')

In [55]:
y_ = clf.predict(X_test)

In [56]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test, y_)

0.20287081339712917