In [1]:
import numpy as np
import pandas as pd

In [2]:
sample_data = {
    'plant_age_days': [90, 60, 80, 50],
    'height_cm': [105, 70, 95, 60],
    'leaf_color': ['yellow', 'green', 'dark green', 'green'],
    'rainfall': ['high', 'medium', 'medium', 'low'],
    'soil_moisture': ['medium', 'low', 'medium', 'low'],
    'ready': ['yes', 'no', 'yes', 'no'] 
}

df = pd.DataFrame(sample_data)

In [None]:
class Node:
    def __init__(self, feature=None, threshold=None, left=None, right=None, *, value=None):
        self.feature = feature       
        self.threshold = threshold    
        self.left = left              
        self.right = right            
        self.value = value            


class MyDecisionTree:
    def __init__(self, min_samples_split=2, max_depth=100):
        self.min_samples_split = min_samples_split
        self.max_depth = max_depth
        self.root = None

    def fit(self, X, y):
      
        dataset = np.column_stack((X, y))
        self.root = self._grow_tree(dataset)

    def _calculate_entropy(self, y):
        class_labels = np.unique(y)
        entropy = 0
        for cls in class_labels:
            p_cls = len(y[y == cls]) / len(y)
            entropy += -p_cls * np.log2(p_cls)
        return entropy

    def _information_gain(self, y, y_left, y_right):
        weight_left = len(y_left) / len(y)
        weight_right = len(y_right) / len(y)
        return self._calculate_entropy(y) - (weight_left * self._calculate_entropy(y_left) + weight_right * self._calculate_entropy(y_right))

    def _best_split(self, dataset, num_features):
        best_gain = -1
        split_idx, split_thresh = None, None
        
        
        for feat_idx in range(num_features):
            X_column = dataset[:, feat_idx]
            unique_values = np.unique(X_column)

           
            for threshold in unique_values:
                
                if isinstance(threshold, (int, float, np.number)):
                    left_indices = np.where(X_column <= threshold)[0]
                    right_indices = np.where(X_column > threshold)[0]
                else:
                    left_indices = np.where(X_column == threshold)[0]
                    right_indices = np.where(X_column != threshold)[0] 

                if len(left_indices) == 0 or len(right_indices) == 0:
                    continue

                y = dataset[:, -1]
                y_left, y_right = y[left_indices], y[right_indices]
                
                gain = self._information_gain(y, y_left, y_right)

                if gain > best_gain:
                    best_gain = gain
                    split_idx = feat_idx
                    split_thresh = threshold

        return split_idx, split_thresh

    def _grow_tree(self, dataset, depth=0):
        X, y = dataset[:, :-1], dataset[:, -1]
        n_samples, n_features = X.shape
        n_labels = len(np.unique(y))

    
        if n_labels == 1 or depth >= self.max_depth or n_samples < self.min_samples_split:
            
            vals, counts = np.unique(y, return_counts=True)
            most_common = vals[np.argmax(counts)]
            return Node(value=most_common)

        
        best_idx, best_thresh = self._best_split(dataset, n_features)

        
        if best_idx is None:
            vals, counts = np.unique(y, return_counts=True)
            most_common = vals[np.argmax(counts)]
            return Node(value=most_common)

        
        if isinstance(best_thresh, (int, float, np.number)):
             left_idxs = np.where(X[:, best_idx] <= best_thresh)[0]
             right_idxs = np.where(X[:, best_idx] > best_thresh)[0]
        else:
             left_idxs = np.where(X[:, best_idx] == best_thresh)[0]
             right_idxs = np.where(X[:, best_idx] != best_thresh)[0]

        left = self._grow_tree(dataset[left_idxs, :], depth + 1)
        right = self._grow_tree(dataset[right_idxs, :], depth + 1)
        
        return Node(best_idx, best_thresh, left, right)

    def predict_one(self, x, node):
        if node.value is not None:
            return node.value
        
        feature_val = x[node.feature]
        
   
        go_left = False
        if isinstance(node.threshold, (int, float, np.number)):
            if feature_val <= node.threshold: go_left = True
        else:
            if feature_val == node.threshold: go_left = True
            
        if go_left:
            return self.predict_one(x, node.left)
        else:
            return self.predict_one(x, node.right)

    def predict(self, X):
        return [self.predict_one(x, self.root) for x in X]
    
    
    def print_tree(self, node=None, indent=""):
        if node is None: node = self.root
        if node.value is not None:
            print(node.value)
        else:
            col_name = df.columns[node.feature]
            condition = f"{col_name}"
            if isinstance(node.threshold, (int, float, np.number)):
                print(f"{indent}{condition} <= {node.threshold} ?")
            else:
                print(f"{indent}{condition} == {node.threshold} ?")
            
            print(f"{indent} --> True: ", end="")
            self.print_tree(node.left, indent + "  ")
            print(f"{indent} --> False: ", end="")
            self.print_tree(node.right, indent + "  ")


X_train = df.iloc[:, :-1].values
y_train = df.iloc[:, -1].values

model = MyDecisionTree(max_depth=3)
model.fit(X_train, y_train)

print("--- Struktur Decision Tree ---")
model.print_tree()

print("\n--- Prediksi Data Training ---")
predictions = model.predict(X_train)
print("Asli   :", y_train)
print("Prediksi:", predictions)


accuracy = np.sum(y_train == predictions) / len(y_train)
print(f"Akurasi: {accuracy * 100}%")