In [1]:
import numpy as np
import sklearn
from sklearn.datasets import load_wine
import math
from sklearn.model_selection import train_test_split

In [2]:
data = load_wine()
x = data.data
y = data.target
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3)

In [3]:
class Node():
  def __init__(self, root, attribute, mid):
    self.root = root
    self.attribute = attribute
    self.mid = mid
    self.children = []
    self.classification = None

  def define_children(self, child):
    self.children.append(child)

In [4]:
class Decision_tree():

  #def __init__(self)

  def calculate_entropy(self, x, y):
    ocurrences = []
    entropy = 0
    for i in range(3):
      ocurrences = np.count_nonzero(y == i)
      if ocurrences != 0:
        entropy += -((ocurrences/len(y)*math.log2(ocurrences/len(y))))
    return entropy
  
  def calculate_aie(self, x, y, attribute):
    aie = 0
    mid = x[:, attribute].mean()
    for i in range(3):
      indices = np.where(y == i)
      actual_array = x[indices, attribute]
      total_values = len(indices[0])
      positives = np.count_nonzero(actual_array <= mid)
      
      if positives != 0:
        entropy = -((positives/total_values*math.log2(positives/total_values)))
        aie += ((total_values/len(y))* entropy)
    return aie
  
  def fit(self, x, y):
    node_root = self.calculate_gain(x, y)
    indices = np.where(x[:, node_root] <= x[:, node_root].mean())
    #x = np.delete(x, node_root, axis=1)
    xr = x[indices].copy()
    yr = y[indices].copy()
    xl = np.delete(x, indices, axis=0)
    yl = np.delete(y, indices)

    
    self.root = Node(0, node_root, x[:, node_root].mean())

    self.root.define_children(self.fit_recursive(xr, yr, self.root))
    self.root.define_children(self.fit_recursive(xl, yl, self.root))
  def fit_recursive(self, x, y, root):
    if len(np.unique(y)) != 1:
      node_root = self.calculate_gain(x, y)
      indices = np.where(x[:, node_root] <= x[:, node_root].mean())
      #x = np.delete(x, node_root, axis=1)
      xr = x[indices].copy()
      yr = y[indices].copy()
      xl = np.delete(x, indices, axis=0)
      yl = np.delete(y, indices)
      actual_root = Node(root, node_root, x[:, node_root].mean())
      

      actual_root.define_children(self.fit_recursive(xr, yr, actual_root))
      actual_root.define_children(self.fit_recursive(xl, yl, actual_root))

      return actual_root
    else:
      leaf = Node(root, 0, 0)
      leaf.classification = y[0]
      return leaf


  def calculate_gain(self, x, y):
    entropy = self.calculate_entropy(x, y)
    values = []
    for i in range(x.shape[1]):
      aie = self.calculate_aie(x, y, i)
      values.append(entropy - aie)
    max_value = max(values)
    index = values.index(max_value)
    return index
  
  def predict(self, x):
    return self.predict_recursive(x, self.root)
  
  def predict_recursive(self, x, root):
    if root.classification == None:
      attribute = root.attribute
      #print(attribute)
      mid = root.mid
      if x[attribute] <= mid:
        return self.predict_recursive(x, root.children[0])
      else:
        return self.predict_recursive(x, root.children[1])
    else:
      return root.classification

  def test(self, x, y):
    total_rights = 0
    for actual_x, actual_y in zip(x, y):
      prediction = self.predict(actual_x)
      if prediction == actual_y:
        total_rights += 1
    
    return total_rights/ len(y)

In [8]:
dt = Decision_tree()
dt.fit(x_train, y_train)
dt.test(x_test, y_test)

0.7777777777777778