# Importing necessary libraries:

In [None]:
import random
import numpy as np
import pandas as pd

## Decision Tree class (without scikit-learn):

In [None]:
class DecisionTree:
    def __init__(self):
        self.tree = None

    def entropy(self, target):

        classes, counts = np.unique(target, return_counts=True)
        probabilities = counts / len(target)
        entropy = -np.sum(probabilities * np.log2(probabilities + 1e-10))       # add a small epsilon to avoid log(0)
        return entropy

    def information_gain(self, data, target, attribute):

        original_entropy = self.entropy(target)

        values, counts = np.unique(data[attribute], return_counts=True)
        print(values, counts,attribute)

        weighted_entropy=[]
        for i in range(len(values)):
          weighted_entropy.append((counts[i] / np.sum(counts)) * self.entropy(target[data[attribute] == values[i]]))

        weighted_entropy = sum(weighted_entropy)

        print("wei:",weighted_entropy)

        information_gain = original_entropy - weighted_entropy
        return information_gain

    def find_best_attribute(self, data, target, attributes):                    # finding attribute with highest information gain
        if len(attributes) == 0:
            return None                                                         # no attribute left to split on

        gains = [self.information_gain(data, target, attr) for attr in attributes]
        print("gains: ",gains)
        print("attributes :",attributes)
        max_gain = max(gains)
        print("max: ",max_gain)

        max_attribute = np.argmax(gains)

        best_attribute = attributes[max_attribute]
        print("best attribute: ",best_attribute)

        return best_attribute

    def build_tree(self, data, target, attributes):

        if len(np.unique(target)) == 1:
            return target.iloc[0]

        if len(attributes) == 0:
            return np.argmax(np.bincount(target))

        best_attribute = self.find_best_attribute(data, target, attributes)     # recursive case
        tree = {best_attribute: {}}

        for value in np.unique(data[best_attribute]):
            subset_data = data[data[best_attribute] == value]
            subset_target = target[data[best_attribute] == value]
            subset_attributes = [attr for attr in attributes if attr != best_attribute]

            subtree = self.build_tree(subset_data, subset_target, subset_attributes)
            tree[best_attribute][value] = subtree

        return tree



    def fit(self, data, target):
        attributes = data.columns.tolist()
        self.tree = self.build_tree(data, target, attributes)
        print(self.tree)

    def predict_instance(self, instance, tree):
        if not isinstance(tree, dict):
            return tree
        attribute = list(tree.keys())[0]
        value = instance[attribute]
        subtree = tree[attribute][value]
        return self.predict_instance(instance, subtree)

    def predict(self, data):
        return [self.predict_instance(instance, self.tree) for _, instance in data.iterrows()]

## Getting Predicitions:

In [None]:
data = {
    'Outlook': ['Sunny', 'Sunny', 'Overcast', 'Rain', 'Rain', 'Rain', 'Overcast', 'Sunny', 'Sunny', 'Rain', 'Sunny', 'Overcast', 'Overcast', 'Rain'],
    'Temperature': ['Hot', 'Hot', 'Hot', 'Mild', 'Cool', 'Cool', 'Cool', 'Mild', 'Cool', 'Mild', 'Mild', 'Mild', 'Hot', 'Mild'],
    'Humidity': ['High', 'High', 'High', 'High', 'Normal', 'Normal', 'Normal', 'High', 'Normal', 'Normal', 'Normal', 'High', 'Normal', 'High'],
    'Windy': ['Weak', 'String', 'Weak', 'Weak', 'Weak', 'String', 'String', 'Weak', 'Weak', 'Weak', 'String', 'String', 'Weak', 'String'],
    'PlayTennis': ['No', 'No', 'Yes', 'Yes', 'Yes', 'No', 'Yes', 'No', 'Yes', 'Yes', 'Yes', 'Yes', 'Yes', 'No']
}


df = pd.DataFrame(data)

print(df)

     Outlook Temperature Humidity   Windy PlayTennis
0      Sunny         Hot     High    Weak         No
1      Sunny         Hot     High  String         No
2   Overcast         Hot     High    Weak        Yes
3       Rain        Mild     High    Weak        Yes
4       Rain        Cool   Normal    Weak        Yes
5       Rain        Cool   Normal  String         No
6   Overcast        Cool   Normal  String        Yes
7      Sunny        Mild     High    Weak         No
8      Sunny        Cool   Normal    Weak        Yes
9       Rain        Mild   Normal    Weak        Yes
10     Sunny        Mild   Normal  String        Yes
11  Overcast        Mild     High  String        Yes
12  Overcast         Hot   Normal    Weak        Yes
13      Rain        Mild     High  String         No


In [None]:
df['PlayTennis'] = df['PlayTennis'].map({'No': 0, 'Yes': 1})

X_tennis = df.drop('PlayTennis', axis=1)
y_tennis = df['PlayTennis']

In [None]:
tennis_dt_model = DecisionTree()
tennis_dt_model.fit(X_tennis, y_tennis)

predictions = tennis_dt_model.predict(X_tennis)

['Overcast' 'Rain' 'Sunny'] [4 5 5] Outlook
wei: 0.6935361386488728
['Cool' 'Hot' 'Mild'] [4 4 6] Temperature
wei: 0.9110633927231372
['High' 'Normal'] [7 7] Humidity
wei: 0.7884504570197504
['String' 'Weak'] [6 8] Windy
wei: 0.8921589279738225
gains:  [0.2467498197332193, 0.02922256565895487, 0.1518355013623417, 0.0481270304082696]
attributes : ['Outlook', 'Temperature', 'Humidity', 'Windy']
max:  0.2467498197332193
best attribute:  Outlook
['Cool' 'Mild'] [2 3] Temperature
wei: 0.9509775001441545
['High' 'Normal'] [2 3] Humidity
wei: 0.9509775001441545
['String' 'Weak'] [2 3] Windy
wei: -1.4426951601859516e-10
gains:  [0.019973094021975113, 0.019973094021975113, 0.9709505943103991]
attributes : ['Temperature', 'Humidity', 'Windy']
max:  0.9709505943103991
best attribute:  Windy
['Cool' 'Hot' 'Mild'] [1 2 2] Temperature
wei: 0.3999999997980227
['High' 'Normal'] [3 2] Humidity
wei: -1.4426951601859516e-10
['String' 'Weak'] [2 3] Windy
wei: 0.9509775001441545
gains:  [0.5709505943681069

In [None]:
print("Predictions:", predictions)

Predictions: [0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0]
