In [29]:
import numpy as np
from sklearn.datasets import load_iris, load_wine
from sklearn.model_selection import train_test_split

## Numpy way

In [13]:
class SimpleDicisionTree:
    def __init__(self, max_depth=None):
        self.max_depth = max_depth
        self.left = None
        self.right = None
        self.feature_index = None
        self.threshold = None
        self.value = None
        
    def fit(self, X,y, depth=0):
        if depth == self.max_depth or len(np.unique(y))==1:
            self.value = np.argmax(np.bincount(y.astype(int)))
            return
        
        n_samples, n_features = X.shape
        best_gini = 1.0
        for feature in range(n_features):
            thresholds = np.unique(X[:, feature])
            for threshold in thresholds:
                left_indices = X[:, feature] <= threshold
                right_indices = X[:, feature] > threshold
            if sum(left_indices) == 0 or sum(right_indices)==0:
                continue
            gini = self._calculate_gini(y[left_indices], y[right_indices])
            if gini < best_gini:
                best_gini = gini
                self.feature_index = feature
                self.threshold = threshold
        if self.feature_index is not None:
            left_indices = X[:, self.feature_index] <= self.threhold
            right_indices = X[:, self.feature_index] > self.threhold
            self.left = SimpleDicisionTree(max_depth=self.max_depth)
            self.left.fit(X[left_indices], y[left_indices], deph+1)
            self.right = SimpleDicisionTree(max_depth=self.max_depth)
            self.right.fit(X[right_indices], y[right_indices], deph+1)
        else:
            self.value = np.argmax(np.bincount(y.astype(int)))
            
    def _calulate_gini(self, left_labels, right_labels):
        total = len(left_labels) + len(right_labels)
        gini_left = 1.0 - sum([(left_labels == c).mean()**2 for c in np.unique(left_labels)])
        gini_right = 1.0 - sum([(right_labels == c).mean()**2 for c in np.unique(right_labels)])
        return (len(left_labels) * gini_left + len(right_labels) * gini_right)/total
        
    def predict(self, X):
        if self.feature_index is None:
            return np.array([self.value] * len(X))
        else:
            left_indices = X[:, self.feature_index] <= self.threshold
            right_indices = X[:, self.feature_index] > self.threshold
            predictions = np.zeros(len(X), dtype=int)
            predictions[left_indices] = self.left.predict(X[left_indices])
            predictions[right_indices] = self.right.predict(X[right_indices])
            return predictions
            
        
class SimpleXGBoost:
    def __init__(self, n_estimators=100, max_depth=3, learning_rate=0.1):
        self.n_estimators = n_estimators
        self.max_depth = max_depth
        self.learning_rate = learning_rate
        self.estimators = []
        
        
    def fit(self, X, y):
        y = np.asarray(y, dtype=int)
        y_pred = np.zeros(len(y))
        
        for _ in range(self.n_estimators):
            residual = y - y_pred
            tree = SimpleDicisionTree(max_depth=self.max_depth)
            tree.fit(X, residual)
            y_pred += self.learning_rate * tree.predict(X)
            self.estimators.append(tree)
            
    def predict(self, X):
        predictions = np.zeros((len(X), len(self.estimators)))
        for i, tree in enumerate(self.estimators):
            predictions[:, i] = tree.predict(X)
        return np.array([np.bincount(row.astype(int)).argmax() for row in predictions])
        

In [4]:
#LOad the dataset
iris = load_iris()
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.2, random_state=42)

In [15]:
xgb = SimpleXGBoost(n_estimators=100, max_depth=3, learning_rate=0.1)

In [16]:
xgb.fit(X_train, y_train)
predictions = xgb.predict(X_test)

In [17]:
#Evaluating the model
accuracy = np.mean(predictions == y_test)
print(f"Accuracy: {accuracy}")

Accuracy: 0.3333333333333333


## Sklearn

In [26]:
!pip install xgboost



In [27]:
from xgboost import XGBClassifier

In [30]:
#LOad the dataset
wine = load_wine()
X_train, X_test, y_train, y_test = train_test_split(wine.data, wine.target, test_size=0.2, random_state=42)

In [31]:
xgb = XGBClassifier(n_estimators=100, max_depth=3, learning_rate=0.1)

In [32]:
xgb.fit(X_train, y_train)
predictions = xgb.predict(X_test)

In [33]:
#Evaluating the model
accuracy = np.mean(predictions == y_test)
print(f"Accuracy: {accuracy}")

Accuracy: 0.9722222222222222
