<a href="https://colab.research.google.com/github/sahil301290/MachineLearning/blob/main/ID3_Decision_Tree.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Implementation of ID3 Decision Tree Classifier:

1. Entropy of a Set

2. Average Information Entropy of attribute given a set

3. Information Gain (IG) of each attribute and attribute with max IG.

In [1]:
# Find Entropy Function

import numpy as np
def find_entropy(df):
  Class = df.keys()[-1]
  values = df[Class].unique()
  entropy = 0
  for value in values:
    prob = df[Class].value_counts()[value]/len(df[Class])
    entropy += -prob * np.log2(prob)
  return np.float(entropy)

In [2]:
# Find entropy attribute
def find_entropy_attribute(df, attribute):
  Class = df.keys()[-1]
  target_values = df[Class].unique()
  attribute_values = df[attribute].unique()
  avg_entropy = 0
  for value in attribute_values:
    entropy = 0
    for value1 in target_values:
      num = len(df[attribute][df[attribute] == value][df[Class] == value1])
      den = len(df[attribute][df[attribute] == value])
      prob = num/den
      entropy += -prob * np.log2(prob + 0.000001)
    avg_entropy += (den/len(df))*entropy
  return np.float(avg_entropy)

In [3]:
# Find Winner
def find_winner(df):
  IG = []
  for key in df.keys()[:-1]:
    IG.append(find_entropy(df) - find_entropy_attribute(df, key))
  return df.keys()[:-1][np.argmax(IG)]

In [4]:
def get_subtable(df, attribute, value):
  return df[df[attribute] == value].reset_index(drop = True)

In [5]:
def buildtree(df, tree = None):
  node = find_winner(df)
  attvalue = np.unique(df[node])
  Class = df.keys()[-1]
  if tree is None:
    tree = {}
    tree[node] = {}
  for value in attvalue:
    subtable = get_subtable(df,node,value)
    Clvalue, counts = np.unique(subtable[Class], return_counts = True)
    if len(counts) == 1:
      tree[node][value] = Clvalue[0]
    else:
      tree[node][value] = buildtree(subtable)
  return tree

In [6]:
import pandas as pd
df = pd.read_csv('weather.csv')

In [7]:
tree = buildtree(df)

In [8]:
import pprint
pprint.pprint(tree)

{'Outlook': {'overcast': 1,
             'rainy': {'Humidity': {'high': 0, 'normal': 1}},
             'sunny': {'Windy': {0: 1, 1: 0}}}}


In [9]:
def predict(inst, tree):
  for node in tree.keys():
    value = inst[node]
    tree = tree[node][value]
    prediction = 0
    if type(tree) is dict:
      prediction = predict(inst, tree)
    else:
      prediction = tree
  return prediction

In [10]:
df1 = pd.read_csv('weather_test.csv')

In [11]:
Y_label = []
for i in range(len(df1)):
  inst = df1.iloc[i,:]
  prediction = predict(inst, tree)
  Y_label.append(prediction)
print(Y_label)

[0, 1]


In [12]:
from sklearn import metrics
print(metrics.classification_report(df1.iloc[:,-1], Y_label))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00         1
           1       1.00      1.00      1.00         1

    accuracy                           1.00         2
   macro avg       1.00      1.00      1.00         2
weighted avg       1.00      1.00      1.00         2

