In [104]:
import math
import numpy as np
from copy import copy
import graphviz
from sklearn import tree
from google.colab import drive
test_labels = np.load ('/content/drive/MyDrive/hw3_material (1)/dt/test_labels.npy')
test_set = np.load ('/content/drive/MyDrive/hw3_material (1)/dt/test_set.npy')
train_labels = np.load ('/content/drive/MyDrive/hw3_material (1)/dt/train_labels.npy')
train_set = np.load ('/content/drive/MyDrive/hw3_material (1)/dt/train_set.npy')

In [105]:
def entropy(bucket):
    """
    Calculates the entropy.
    :param bucket: A list of size num_classes. bucket[i] is the number of
    examples that belong to class i.
    :return: A float. Calculated entropy.
    """

    """
    Entropy(S) = - (p1 * log(p1)) - (p2 * log(p2)) 
    p1 is the proportion of examples in class 1
    p2 is the proportion of examples in class 2
    p2 = 1 - p1 is the proportion of examples in class 2
    """

    total_no_instances = sum(bucket)

    if total_no_instances == 0:
        return 0

    calculated_entropy = 0

    # loop through the different classes and calculate their proportions
    for i in range (len(bucket)):
        p = (bucket[i] / total_no_instances)

        if p != 0:
            calculated_entropy += (-p * math.log2(p))

    return calculated_entropy

In [106]:
def info_gain(parent_bucket, left_bucket, right_bucket):
    """
    Calculates the information gain. A bucket is a list of size num_classes.
    bucket[i] is the number of examples that belong to class i.
    :param parent_bucket: Bucket belonging to the parent node. It contains the
    number of examples that belong to each class before the split.
    :param left_bucket: Bucket belonging to the left child after the split.
    :param right_bucket: Bucket belonging to the right child after the split.
    :return: A float. Calculated information gain.
    """

    """
     When an attribute A splits the set S into subsets Si
     we compute the average entropy
     and compare the sum to the entropy of the original set S
    """

    entropy_original_set = entropy(parent_bucket)

    information_gain = entropy_original_set

    information_gain -= ((sum(left_bucket) / sum(parent_bucket)) * entropy(left_bucket))
    information_gain -= ((sum(right_bucket) / sum(parent_bucket)) * entropy(right_bucket))

    return information_gain


In [107]:
def gini(bucket):
    """
    Calculates the gini index.
    :param bucket: A list of size num_classes. bucket[i] is the number of
    examples that belong to class i.
    :return: A float. Calculated gini index.
    """

    total_no_instances = sum(bucket)
    if total_no_instances == 0:
        return 1

    gini_index = 1

    for i in range(len(bucket)):
        p = (bucket[i] / total_no_instances)
        gini_index -= (p*p)

    return gini_index

In [108]:
def avg_gini_index(left_bucket, right_bucket):
    """
    Calculates the average gini index. A bucket is a list of size num_classes.
    bucket[i] is the number of examples that belong to class i.
    :param left_bucket: Bucket belonging to the left child after the split.
    :param right_bucket: Bucket belonging to the right child after the split.
    :return: A float. Calculated average gini index.
    """
    total_no_instances = sum(left_bucket) + sum(right_bucket)

    avg_gini = (sum(left_bucket) / total_no_instances) * gini(left_bucket)
    avg_gini += (sum(right_bucket) / total_no_instances) * gini(right_bucket)

    return avg_gini

In [109]:
def selectionSort(data, labels, attr_index):
    for i in range(data.shape[0]):
        minimum = 10000
        minimum_index = i

        for j in range(i, data.shape[0]):
            if data[j][attr_index] < minimum:
                minimum = data[j][attr_index]
                minimum_index = j


        # sort the data
        temp = copy(data[i])
        data[i] = copy(data[minimum_index])
        data[minimum_index] = temp

        # sort the label
        temp = copy(labels[i])
        labels[i] = copy(labels[minimum_index])
        labels[minimum_index] = temp

    return data, labels

In [110]:
def calculate_split_values(data, labels, num_classes, attr_index, heuristic_name):
    """
    For every possible values to split the data for the attribute indexed by
    attribute_index, it divides the data into buckets and calculates the values
    returned by the heuristic function named heuristic_name. The split values
    should be the average of the closest 2 values. For example, if the data has
    2.1 and 2.2 in it consecutively for the values of attribute index by attr_index,
    then one of the split values should be 2.15.
    :param data: An (N, M) shaped numpy array. N is the number of examples in the
    current node. M is the dimensionality of the data. It contains the values for
    every attribute for every example.
    :param labels: An (N, ) shaped numpy array. It contains the class values in
    it. For every value, 0 <= value < num_classes.
    :param num_classes: An integer. The number of classes in the dataset.
    :param attr_index: An integer. The index of the attribute that is going to
    be used for the splitting operation. This integer indexs the second dimension
    of the data numpy array.
    :param heuristic_name: The name of the heuristic function. It should either be
    'info_gain' of 'avg_gini_index' for this homework.
    :return: An (L, 2) shaped numpy array. L is the number of split values. The
    first column is the split values and the second column contains the calculated
    heuristic values for their splits.
    """

    data, labels = selectionSort(data, labels, attr_index)

    split_value_of_heuristic = np.zeros((data.shape[0] - 1, 2))
    left_bucket = np.zeros(max(labels) + 1)
    right_bucket = np.zeros(max(labels) + 1)
    if heuristic_name == 'avg_gini_index':
        for i in range(data.shape[0] - 1):
            split_value = (data[i][attr_index] + data[i+1][attr_index]) / 2
            for j in range(data.shape[0]):
                if data[j][attr_index] <= split_value:
                    left_bucket[labels[j]] = left_bucket[labels[j]] + 1
                else: 
                    right_bucket[labels[j]] = right_bucket[labels[j]] + 1
               
            split_value_of_heuristic[i][0] = split_value
            split_value_of_heuristic[i][1] = avg_gini_index(left_bucket.tolist(), right_bucket.tolist())

            left_bucket = np.zeros(max(labels) + 1)
            right_bucket = np.zeros(max(labels) + 1)

    elif heuristic_name == 'info_gain':
        for i in range(data.shape[0] - 1):
            split_value = (data[i][attr_index] + data[i+1][attr_index]) / 2
            for j in range(data.shape[0]):
                if data[j][attr_index] <= split_value:
                    left_bucket[labels[j]] = left_bucket[labels[j]] + 1
                else: 
                    right_bucket[labels[j]] = right_bucket[labels[j]] + 1

            parent_bucket = []
            for k in range(len(left_bucket)):
                parent_bucket.append(left_bucket[k] + right_bucket[k])
            split_value_of_heuristic[i][0] = split_value
            split_value_of_heuristic[i][1] = info_gain(parent_bucket, left_bucket.tolist(), right_bucket.tolist())
            
            left_bucket = np.zeros(max(labels) + 1)
            right_bucket = np.zeros(max(labels) + 1)

    else:
        print("Invalid Heuristic function")

    return split_value_of_heuristic

In [111]:
def chi_squared_test(left_bucket, right_bucket):
    """
    Calculates chi squared value and degree of freedom between the selected attribute
    and the class attribute. A bucket is a list of size num_classes. bucket[i] is the
    number of examples that belong to class i.
    :param left_bucket: Bucket belonging to the left child after the split.
    :param right_bucket: Bucket belonging to the right child after the split.
    :return: A float and and integer. Chi squared value and degree of freedom.
    """

    """
        Chi squared value = (((Actual - Expected) ** 2) / Expected) 
        Degree of freedom =  (number of classes - 1) * (number of nodes - 1)
        We calculate the Chi squared for the split as the sum of Chi squared for each node.
        Higher the chi-square value more will be the purity of the nodes after a split.
        Expected => worst case where the split didn't cause any kind of inclination towards purity. 
        In other words, the distribution of classes in the child node didn't change with respect to the parent class. 
    """

    parent_bucket = []
    total_class_no = 0

    for i in range(len(left_bucket)):
        amount_of_instance = left_bucket[i] + right_bucket[i]
        parent_bucket.append(amount_of_instance)
        if amount_of_instance != 0:
            total_class_no += 1


    # number of nodes is 2 because we have only left and right child (it is binary)
    # len(left_bucket) gives the number of classes
    degree_of_freedom = (total_class_no - 1) * (2 - 1) 


    # calculation of chi square
    chi_squared_value = 0
    for i in range(len(left_bucket)):
        if parent_bucket[i] != 0:
            chi_squared_value += (((left_bucket[i] - ((parent_bucket[i] / sum(parent_bucket)) * sum(left_bucket))) ** 2) / ((parent_bucket[i] / sum(parent_bucket)) * sum(left_bucket))) 
            chi_squared_value += (((right_bucket[i] - ((parent_bucket[i] / sum(parent_bucket)) * sum(right_bucket))) ** 2) / ((parent_bucket[i] / sum(parent_bucket)) * sum(right_bucket))) 

    return chi_squared_value, degree_of_freedom

In [112]:
parent_bucket = [3, 2, 3]
left_bucket = [3, 0, 2]
right_bucket = [0, 2, 1]

print('entropy:', abs(entropy(parent_bucket) - 1.5612781244591325) < 10 ** -5)

print('info_gain:', abs(info_gain(parent_bucket, left_bucket, right_bucket) - 0.610073065154531) < 10 ** -5)

print('gini:', abs(gini(parent_bucket) - 0.65625) < 10 ** -5)

print('avg_gini_index:', abs(avg_gini_index(left_bucket, right_bucket) - 0.4666666666666667) < 10 ** -5)

data = np.asarray([
    [0.96712763, 0.27892349, 0.69429896, 0.04024055],
    [0.0576444, 0.33726678, 0.57879485, 0.81960005],
    [0.70768221, 0.30983012, 0.80722421, 0.13924751],
    [0.42084337, 0.7296714, 0.00308904, 0.24135345],
    [0.65534721, 0.44364458, 0.63468942, 0.27418721],
    [0.98472834, 0.6466202, 0.18471949, 0.9535479],
    [0.63624549, 0.30568322, 0.41870169, 0.85743963],
    [0.17610217, 0.20381821, 0.68492418, 0.57177705],
    [0.21855323, 0.97823166, 0.38690695, 0.79345037],
    [0.53118909, 0.74468352, 0.88166667, 0.50417511]
])

values_info_gain_gt = np.asarray([
    [0.24137085, 0.07898214],
    [0.29230335, 0.17095059],
    [0.30775667, 0.2812909],
    [0.32354845, 0.41997309],
    [0.39045568, 0.12451125],
    [0.54513239, 0.25642589],
    [0.6881458, 0.09127745],
    [0.73717746, 0.00740339],
    [0.86145759, 0.07898214]
])

values_avg_gini_index_gt = np.asarray([
    [0.24137085, 0.44444444],
    [0.29230335, 0.4],
    [0.30775667, 0.34285714],
    [0.32354845, 0.26666667],
    [0.39045568, 0.4],
    [0.54513239, 0.31666667],
    [0.6881458, 0.41904762],
    [0.73717746, 0.475],
    [0.86145759, 0.44444444]
])

labels = np.asarray([0, 1, 0, 1, 0, 1, 0, 0, 0, 1])


values_info_gain = calculate_split_values(data, labels, 2, 1, 'info_gain')

values_avg_gini_index = calculate_split_values(data, labels, 2, 1, 'avg_gini_index')

print('calculate_split_values (info_gain): ', np.all(np.abs(values_info_gain - values_info_gain_gt) < 10 ** -5))
print('calculate_split_values (avg_gini_index): ',
      np.all(np.abs(values_avg_gini_index - values_avg_gini_index_gt) < 10 ** -5))


chi_squared, df = chi_squared_test(left_bucket, right_bucket)
print('chi_squared:', abs(chi_squared - 5.155555555555557) < 10 ** -5)
print('degree_of_freedom:', df == 2)

left_bucket2 = [0 , 3, 2]
right_bucket2 = [0, 1, 4]
chi_squared2, df2 = chi_squared_test(left_bucket2, right_bucket2)
print('chi_squared:', abs(chi_squared2 - 1.6666666666666665) < 10 ** -5)
print('degree_of_freedom:', df2 == 1)


entropy: True
info_gain: True
gini: True
avg_gini_index: True
calculate_split_values (info_gain):  True
calculate_split_values (avg_gini_index):  True
chi_squared: True
degree_of_freedom: True
chi_squared: True
degree_of_freedom: True


In [113]:
# ID3 Algorithm:
# Is top-down meaning that it starts building the tree from the top
# and it is greedy meaning that at each iteration we select the best
# feature at the present moment to create a node

print(train_set.shape)
print(train_labels)
print(train_labels.shape)

(120, 4)
[0 0 0 1 2 1 0 1 0 1 2 0 2 2 0 1 0 2 2 1 0 0 0 1 0 2 0 1 1 0 0 1 1 0 1 0 2
 1 2 1 2 0 0 1 2 2 0 0 0 1 0 0 2 2 1 2 2 0 1 2 1 1 2 1 1 2 1 1 1 1 1 1 1 2
 2 0 0 2 0 2 0 0 2 1 0 1 2 2 2 1 1 2 1 2 2 2 0 2 1 1 0 2 1 1 1 1 1 0 0 0 0
 1 2 2 0 2 0 1 2 0]
(120,)


In [114]:
def find_buckets_and_subtrees(data, data_label, split_value, attr_index):
    left_bucket = np.zeros(max(data_label) + 1)
    right_bucket = np.zeros(max(data_label) + 1)
    left_subtree = []
    left_subtree_label = []
    right_subtree = []
    right_subtree_label = []
    for i in range(data.shape[0]):
        if data[i][attr_index] <= split_value:
            left_bucket[data_label[i]] = left_bucket[data_label[i]] + 1
            left_subtree.append(data[i])
            left_subtree_label.append(data_label[i])
        else: 
            right_bucket[data_label[i]] = right_bucket[data_label[i]] + 1
            right_subtree.append(data[i])
            right_subtree_label.append(data_label[i])

    left_tree = np.zeros((len(left_subtree), data.shape[1]))
    for i in range(len(left_subtree)):
        left_tree[i] = left_subtree[i]

    right_tree = np.zeros((len(right_subtree), data.shape[1]))
    for i in range(len(right_subtree)):
        right_tree[i] = right_subtree[i]

    return left_bucket, right_bucket, left_tree, right_tree, left_subtree_label, right_subtree_label

In [115]:
class Tree:
    def __init__(self, classes, separating_feature, splitting_value):
        self.separating_feature = separating_feature
        self.splitting_value = splitting_value
        self.children = []
        self.classes = classes

In [116]:
def calculate_parent_bucket(left_bucket, right_bucket):
    parent_bucket = []
    for i in range(max(len(left_bucket), len(right_bucket))):
        if (i <= (len(left_bucket) - 1)) & (i <= (len(right_bucket) - 1)):
            parent_bucket.append(left_bucket[i] + right_bucket[i])
        elif (i > (len(left_bucket) - 1)) & (i <= (len(right_bucket) - 1)):
            parent_bucket.append(right_bucket[i])
        else:
            parent_bucket.append(left_bucket[i])
    
    return parent_bucket

In [117]:
# This function is for visualization
def print_tree(root):
  thislevel = [root]

  while thislevel:
    nextlevel = []
    for n in thislevel:
      print("classes: ", n.classes, "   separating feature index: ", n.separating_feature, "   splitting value: ", n.splitting_value, end=' ')
      if n.children[0] is not None: 
          nextlevel.append(n.children[0])
          nextlevel.append(n.children[1])
          print(" children[0] = ", n.children[0].classes, "   children[1] = ", n.children[1].classes)
      else:
          print(" children = NULL")

    thislevel = nextlevel


In [118]:
def ID3_algorithm(data, data_label, heuristic_function):
    heuristic_of_attributes = np.zeros((train_set.shape[1], 2))
    for attribute in range(data.shape[1]):
        split_value_of_feature = calculate_split_values(data, data_label, 3, attribute, heuristic_function)
        if heuristic_function == 'avg_gini_index':
            max_info_gain_feature = np.amin(split_value_of_feature, axis=0)
        else:
            max_info_gain_feature = np.amax(split_value_of_feature, axis=0)
        
        result = np.nonzero(split_value_of_feature == max_info_gain_feature[1])
        heuristic_of_attributes[attribute] = split_value_of_feature[result[0][0]]

    # result[0][0] holds the index of the feature that has the maximum information gain
    # heuristic_of_attributes[result[0][0]][0] holds the split value of the attribute that gives the most information 
    # and result[0][1] gives the value of information gain for that attribute
    max_info_gain_feature = np.amax(heuristic_of_attributes, axis=0)
    result = np.nonzero(heuristic_of_attributes == max_info_gain_feature[1])

    left_bucket, right_bucket, left_subtree, right_subtree, left_subtree_label, right_subtree_label = find_buckets_and_subtrees(data, data_label, heuristic_of_attributes[result[0][0]][0], result[0][0])
    chi, freedom = chi_squared_test(left_bucket, right_bucket)
    # The degree of freedom is fixed for a given dataset
    # In this homework, you will use 90% confidence.
    # There are 3 classes and 2 subtrees. So the freedom is (3-1) * (2-1) = 2
    # 0.211 = 21.1%
    root = Tree(calculate_parent_bucket(left_bucket, right_bucket), result[0][0], heuristic_of_attributes[result[0][0]][0])
    # You are going to stop growing the tree when there is no association between the attribute selected
    # by the heuristic function, i.e., information gain and average gain index, and the class variable
    if (chi / 100) > 0.211:
        left = ID3_algorithm(left_subtree, left_subtree_label, heuristic_function)
        right = ID3_algorithm(right_subtree, right_subtree_label, heuristic_function)
        root.children = [left, right]

    else:
        root.children = [None, None]
    
    return root


In [119]:
# this function is used to check if the node is pure or not. 
def check_purity_of_node(tree):
    amount_of_empty_classes = 0
    for i in range(len(tree.classes)):
        if tree.classes[i] == 0:
            amount_of_empty_classes += 1
    
    if (amount_of_empty_classes != len(tree.classes) - 1):
        return False
    else:
        return True

In [120]:
def ID3_algorithm_without_prepruning(data, data_label, heuristic_function):
    if len(data) > 1:
        heuristic_of_attributes = np.zeros((train_set.shape[1], 2))
        for attribute in range(data.shape[1]):
            split_value_of_feature = calculate_split_values(data, data_label, 3, attribute, heuristic_function)
            if heuristic_function == 'avg_gini_index':
                max_info_gain_feature = np.amin(split_value_of_feature, axis=0)
            else:
                max_info_gain_feature = np.amax(split_value_of_feature, axis=0)
            result = np.nonzero(split_value_of_feature == max_info_gain_feature[1])
            heuristic_of_attributes[attribute] = split_value_of_feature[result[0][0]]

        # result[0][0] holds the index of the feature that has the maximum information gain
        # heuristic_of_attributes[result[0][0]][0] holds the split value of the attribute that gives the most information 
        # and result[0][1] gives the value of information gain for that attribute
        max_info_gain_feature = np.amax(heuristic_of_attributes, axis=0)
        result = np.nonzero(heuristic_of_attributes == max_info_gain_feature[1])

        left_bucket, right_bucket, left_subtree, right_subtree, left_subtree_label, right_subtree_label = find_buckets_and_subtrees(data, data_label, heuristic_of_attributes[result[0][0]][0], result[0][0])
    
        # The degree of freedom is fixed for a given dataset
        # In this homework, you will use 90% confidence.
        # There are 3 classes and 2 subtrees. So the freedom is (3-1) * (2-1) = 2
        # 0.211 = 21.1%
        root = Tree(calculate_parent_bucket(left_bucket, right_bucket), result[0][0], heuristic_of_attributes[result[0][0]][0])
        # we are going to stop growing a branch of the tree if its pure
        if check_purity_of_node(root) == False:
            left = ID3_algorithm_without_prepruning(left_subtree, left_subtree_label, heuristic_function)
            right = ID3_algorithm_without_prepruning(right_subtree, right_subtree_label, heuristic_function)
            root.children = [left, right]

        else:
            root.children = [None, None]
    else:
        root = Tree([], None, None)
        root.children = [None, None]
    return root


In [121]:
print("Pre Pruning\n\n")
root_based_on_info_gain = ID3_algorithm(train_set, train_labels, "info_gain")
print("Visualization of Decision Tree based on Info gain with prepruning")
print_tree(root_based_on_info_gain)

root_based_on_gini_index = ID3_algorithm(train_set, train_labels, "avg_gini_index")
print("\n\nVisualization of Decision Tree based on Gini Index with prepruning")
print_tree(root_based_on_gini_index)

print("\n\nWithout Pre Pruning\n")
root_based_on_info_gain_without_prepruning = ID3_algorithm_without_prepruning(train_set, train_labels, "info_gain")
print("\nVisualization of Decision Tree based on Info gain without prepruning")
print_tree(root_based_on_info_gain_without_prepruning)

root_based_on_gini_index_without_prepruning = ID3_algorithm_without_prepruning(train_set, train_labels, "avg_gini_index")
print("\n\nVisualization of Decision Tree based on Gini Index without prepruning")
print_tree(root_based_on_gini_index_without_prepruning)

Pre Pruning


Visualization of Decision Tree based on Info gain with prepruning
classes:  [40.0, 43.0, 37.0]    separating feature index:  2    splitting value:  1.9  children[0] =  [40.0]    children[1] =  [0.0, 43.0, 37.0]
classes:  [40.0]    separating feature index:  0    splitting value:  4.35  children = NULL
classes:  [0.0, 43.0, 37.0]    separating feature index:  3    splitting value:  1.7  children[0] =  [0.0, 42.0, 4.0]    children[1] =  [0.0, 1.0, 33.0]
classes:  [0.0, 42.0, 4.0]    separating feature index:  2    splitting value:  4.9  children = NULL
classes:  [0.0, 1.0, 33.0]    separating feature index:  2    splitting value:  4.8  children = NULL


Visualization of Decision Tree based on Gini Index with prepruning
classes:  [40.0, 43.0, 37.0]    separating feature index:  1    splitting value:  3.3  children[0] =  [15.0, 42.0, 33.0]    children[1] =  [25.0, 1.0, 4.0]
classes:  [15.0, 42.0, 33.0]    separating feature index:  1    splitting value:  2.9  children = NULL


RecursionError: ignored

In [122]:
def predict_label(tree, data):
    while (tree.children[0] is not None):
        if tree.splitting_value > data[tree.separating_feature]:
            tree = tree.children[0]
        else:
            tree = tree.children[1]

    max_value = max(tree.classes) 
    most_common_class = tree.classes.index(max_value) 
    return most_common_class

In [123]:
def find_accuracy(predictions, actual_label):
    error = 0
    for i in range(len(predictions)):
        if predictions[i] != actual_label[i]:
            error += 1

    # return the accuracy
    return (1 - (error / len(predictions)))

In [124]:
predictions = []
for i in range(len(test_labels)):
    predictions.append(predict_label(root_based_on_info_gain, test_set[i]))

print(predictions)

print("Accuracy based on info gain with preprunning = ", find_accuracy(predictions, test_labels))

[1, 0, 0, 2, 2, 2, 0, 2, 2, 1, 1, 0, 2, 2, 0, 1, 1, 0, 2, 0, 0, 0, 2, 1, 2, 2, 1, 0, 1, 2]
Accuracy based on info gain with preprunning =  0.9666666666666667


In [125]:
predictions = []
for i in range(len(test_labels)):
    predictions.append(predict_label(root_based_on_gini_index, test_set[i]))

print(predictions)

print("Accuracy based on gini index with preprunning = ", find_accuracy(predictions, test_labels))

[1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1]
Accuracy based on gini index with preprunning =  0.43333333333333335


In [126]:
predictions = []
for i in range(len(test_labels)):
    predictions.append(predict_label(root_based_on_info_gain_without_prepruning, test_set[i]))

print(predictions)

print("Accuracy based on info gain without preprunning = ", find_accuracy(predictions, test_labels))

[1, 0, 0, 2, 2, 2, 0, 2, 2, 1, 1, 0, 2, 2, 0, 1, 1, 0, 2, 0, 0, 0, 2, 1, 2, 2, 1, 0, 1, 2]
Accuracy based on info gain without preprunning =  0.9666666666666667


In [127]:
root_based_on_gini_index = ID3_algorithm(train_set, train_labels, "avg_gini_index")
print("\n\nVisualization of Decision Tree based on Gini Index with prepruning")
print_tree(root_based_on_gini_index)
#root_based_on_gini_index = ID3_algorithm(train_set, train_labels, "info_gain")
#print("\n\nVisualization of Decision Tree based on Gini Index with prepruning")
#print_tree(root_based_on_gini_index)



Visualization of Decision Tree based on Gini Index with prepruning
classes:  [40.0, 43.0, 37.0]    separating feature index:  1    splitting value:  3.3  children[0] =  [15.0, 42.0, 33.0]    children[1] =  [25.0, 1.0, 4.0]
classes:  [15.0, 42.0, 33.0]    separating feature index:  1    splitting value:  2.9  children = NULL
classes:  [25.0, 1.0, 4.0]    separating feature index:  1    splitting value:  3.4  children = NULL
