In [1]:
import pandas as pd
import math
import numpy as np

data = pd.read_csv("dataset.csv")
features = [feat for feat in data]
features.remove("answer")

In [2]:
class Node:
  def __init__(self):
    self.children = []
    self.value = ""
    self.isLeaf = False
    self.pred = ""

In [3]:
def entropy(example):
  P = 0.0
  N = 0.0
  for _, row in example.iterrows():
    if row["answer"] == "yes":
      P += 1
    else:
      N += 1
  if P == 0.0 or N == 0.0:
    return 0.0
  else:
    p = P / (P + N)
    n = N / (P + N)
    return -(p * math.log(p,2) + n * math.log(n,2))


In [4]:
def info_gain(example, attr):
    uniq = np.unique(example[attr])
    #print ("\n",uniq)
    gain = entropy(example)
    #print ("\n",gain)
    for u in uniq:
        subdata = example[example[attr] == u]
        #print ("\n",subdata)
        sub_e = entropy(subdata)
        gain -= (float(len(subdata)) / float(len(example))) * sub_e
        #print ("\n",gain)
    return gain

In [5]:
def ID3(example, attrs):
    root = Node()

    max_gain = 0
    max_feat = ""
    for feature in attrs:
        #print ("\n",example)
        gain = info_gain(example, feature)
        if gain > max_gain:
            max_gain = gain
            max_feat = feature
    root.value = max_feat
    #print ("\nMax feature attr",max_feat)
    uniq = np.unique(example[max_feat])
    #print ("\n",uniq)
    for u in uniq:
        #print ("\n",u)
        subdata = example[example[max_feat] == u]
        #print ("\n",subdata)
        if entropy(subdata) == 0.0:
            newNode = Node()
            newNode.isLeaf = True
            newNode.value = u
            newNode.pred = np.unique(subdata["answer"])
            root.children.append(newNode)
        else:
            dummyNode = Node()
            dummyNode.value = u
            new_attrs = attrs.copy()
            new_attrs.remove(max_feat)
            child = ID3(subdata, new_attrs)
            dummyNode.children.append(child)
            root.children.append(dummyNode)

    return root

In [6]:
def printTree(root: Node, depth=0):
    for i in range(depth):
        print("\t", end="")
    print(root.value, end="")
    if root.isLeaf:
        print(" -> ", root.pred)
    print()
    for child in root.children:
        printTree(child, depth + 1)



In [7]:

def classify(root: Node, new):
    for child in root.children:
        if child.value == new[root.value]:
            if child.isLeaf:
                print ("Predicted Label for new example", new," is:", child.pred)
                exit
            else:
                classify (child.children[0], new)

In [8]:
root = ID3(data, features)
print("Decision Tree is:")
printTree(root)
print ("------------------")

new = {"outlook":"sunny", "temperature":"hot", "humidity":"normal", "wind":"strong"}
classify (root, new)

Decision Tree is:
outlook
	overcast ->  ['yes']

	rain
		wind
			strong ->  ['no']

			weak ->  ['yes']

	sunny
		humidity
			high ->  ['no']

			normal ->  ['yes']

------------------
Predicted Label for new example {'outlook': 'sunny', 'temperature': 'hot', 'humidity': 'normal', 'wind': 'strong'}  is: ['yes']
