In [2]:
from sklearn.base import BaseEstimator, ClassifierMixin, clone
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import LabelEncoder
import numpy as np
import pandas as pd
import random as rd

In [27]:
class SimpleMultiClassBoosting(BaseEstimator, ClassifierMixin):
    def __init__(self, base_estimator=None, n_estimators=50):
        self.base_estimator = base_estimator if base_estimator is not None else DecisionTreeClassifier(max_depth=1)
        self.n_estimators = n_estimators
        self.learners = []
        self.learner_weights = []
        self.label_encoder = LabelEncoder()

    def fit(self, X, y):
        # Convert labels to [0, n_classes-1]
        Y = self.label_encoder.fit_transform(y)
        n_classes = len(np.unique(Y))
        # Initialize weights uniformly
        n_samples = X.shape[0]
        self.weights = (1/n_samples) * np.ones(n_samples, dtype=float)
        for _ in range(self.n_estimators):
            learner = DecisionTreeClassifier(max_depth=2)
            learner.fit(X, Y, sample_weight=self.weights)
            pred = learner.predict(X)
            missClassified = pred != y
            learner_error = np.sum(self.weights[missClassified]) / np.sum(self.weights)
            # Compute weighted error rate (misclassification rate)
            learner_weight = np.log((1-learner_error) / learner_error) + np.log(n_classes - 1)
            # Compute learner weight using SAMME algorithm
            print("learning error : ", learner_error)
            if learner_error >= 1 - (1 / n_classes):
                continue

            # Increase the weights of misclassified samples
            for idx in range(n_samples):
                if missClassified[idx] == True:
                    self.weights[idx] *= np.exp(learner_weight)    
            self.weights /= np.sum(self.weights)    
            # Save the current learner
            self.learners.append(learner)
            self.learner_weights.append(learner_weight)

    def predict(self, X):
        # Collect predictions from each learner

        # Weighted vote for each sample's prediction across all learners

        # Final prediction is the one with the highest weighted vote

        # Convert back to original class labels
        predictionsOfLearners = []
        for learner in self.learners:
            predictionsOfLearners.append(learner.predict(X))
        predictionsOfLearners = np.array(predictionsOfLearners)
        prediction = np.empty(X.shape[0])        
        for i in range(X.shape[0]):
            labels = np.unique(predictionsOfLearners[:, i])
            votes = {label : 0 for label in labels}
            for j in range(len(predictionsOfLearners[:, i])):
                for label in labels:
                    if predictionsOfLearners[j, i] == label:
                        votes[label] += self.learner_weights[j]
            finalPrediction = max(votes, key=votes.get)
            prediction[i] = self.label_encoder.inverse_transform(np.array([finalPrediction]))
        return prediction


In [4]:
df = pd.read_csv('a.csv')
df

Unnamed: 0,x,y,z
0,-0.5,-0.5,1
1,0.5,-0.5,0
2,-0.5,0.5,0
3,0.5,0.5,1


In [6]:
X = df.iloc[:, :-1]
Y = df.iloc[:, -1]

In [28]:
for i in range(1, 2):    
    m = SimpleMultiClassBoosting(n_estimators=i)
    m.fit(X, Y)

learning error :  0.0


  learner_weight = np.log((1-learner_error) / learner_error) + np.log(n_classes - 1)
