In [0]:
import numpy as np
import math
import csv

class Node:
    def __init__(self, attribute):
        self.attribute = attribute
        self.children = []
        self.answer = ""
        
    def __str__(self):
        return self.attribute

def read_data(filename):

    with open(filename, 'r') as csvfile:
        datareader = csv.reader(csvfile)
        metadata = next(datareader)
        traindata = []
        for row in datareader:
            traindata.append(row)
    
    return (metadata, traindata)

def subtables(data, col, delete):
    dict = {}
    items = np.unique(data[:, col]) # get unique values in particular column
    
    count = np.zeros((items.shape[0], 1), dtype = np.int32)   #number of row = number of values 
    
    for x in range(items.shape[0]):
        for y in range(data.shape[0]):
            if data[y, col] == items[x]:
                count[x] += 1
    #count has the data of number of times each value is present in
                
    for x in range(items.shape[0]):
        dict[items[x]] = np.empty((int(count[x]), data.shape[1]), dtype="|S32")
         
        pos = 0
        for y in range(data.shape[0]):
            if data[y, col] == items[x]:
                dict[items[x]][pos] = data[y]
                pos += 1     
        
        if delete:
           dict[items[x]] = np.delete(dict[items[x]], col, 1)
    return items, dict    
        
def entropy(S):
    items = np.unique(S)
    if items.size == 1:
        return 0
    
    counts = np.zeros((items.shape[0], 1))
    sums = 0
    
    for x in range(items.shape[0]):
   
        counts[x] = sum(S == items[x]) / (S.size)
        


    for count in counts:
        sums += -1 * count * math.log(count, 2)
    
    return sums
    
def gain_ratio(data, col):
    items, dict = subtables(data, col, delete=False) 
    #item is the unique value and dict is the data corresponding to it
    total_size = data.shape[0]
    entropies = np.zeros((items.shape[0], 1))
      
    for x in range(items.shape[0]):
        ratio = dict[items[x]].shape[0]/(total_size)
        entropies[x] = ratio * entropy(dict[items[x]][:, -1])
        
        
    total_entropy = entropy(data[:, -1])
   
    
    for x in range(entropies.shape[0]):
        total_entropy -= entropies[x]
        
    return total_entropy

def create_node(data, metadata):
    if (np.unique(data[:, -1])).shape[0] == 1:
        node = Node("")
        node.answer = hinp.unique(data[:, -1])
        return node
     
    gains = np.zeros((data.shape[1] - 1, 1))
    #size of gains= number of attribute to calculate gain
    
    
    for col in range(data.shape[1] - 1):
        gains[col] = gain_ratio(data, col)
        
    split = np.argmax(gains)
  
    
    node = Node(metadata[split])    
    metadata = np.delete(metadata, split, 0)
                          
    
    items, dict = subtables(data, split, delete=True)
    
    for x in range(items.shape[0]):
        child = create_node(dict[items[x]], metadata)
        node.children.append((items[x], child))
    
    return node        
    
def empty(size):
    s = ""
    for x in range(size):
        s += "   "
    return s

def print_tree(node, level):
    if node.answer != "":
        print(empty(level), node.answer)
        return
        
    print(empty(level), node.attribute)
    
    for value, n in node.children:
        print(empty(level + 1), value)
        print_tree(n, level + 2)
        

metadata, traindata = read_data("/content/drive/My Drive/Tennis.csv")
data = np.array(traindata)
node = create_node(data, metadata)
print_tree(node, 0)

 outlook
    overcast
       [b'yes']
    rainy
       windy
          b'Strong'
             [b'no']
          b'Weak'
             [b'yes']
    sunny
       humidity
          b'high'
             [b'no']
          b'normal'
             [b'yes']


In [0]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/drive
