# Decision Tree Error Functions

This notebook demonstrates three common error functions used for decision trees:
1. misclassification rate,
2. entropy,
3. and Gini index.

In [33]:
import numpy as np

def cprob(data, k):
    return np.sum(data == k) / data.shape[0]

def misclassification_rate(preds, target):
    pi_target = cprob(preds, target)
    return 1 - pi_target

def entropy(targets, n_classes):
    result = 0
    for i in range(n_classes):
        pi_c = cprob(targets, i)
        result -= pi_c * np.log(pi_c)

    return result

def gini_index(targets, n_classes):
    sum = 0
    for i in range(n_classes):
        pi_c = cprob(targets, i)
        sum += pi_c**2

    return 1 - sum

In [34]:
y = np.random.randint(2, size=10)
print(y)
print(f'Class Conditional Probability for 1 = {cprob(y, 1)}')
print(f'Misclassification Rate for 1 = {misclassification_rate(y, 1)}')
print(f'Entropy for y = {entropy(y, 2)}')
print(f'Gini index = {gini_index(y, 2)}')

[0 0 1 1 0 0 1 0 1 1]
Class Conditional Probability for 1 = 0.5
Misclassification Rate for 1 = 0.5
Entropy for y = 1.0
Gini index = 0.5
