In [1]:
import numpy as np
import pandas as pd
from sklearn import datasets
from sklearn.model_selection import train_test_split
import scipy

In [2]:
from abc import ABC, abstractmethod

In [3]:
class BinaryClassifier(ABC):
    @abstractmethod
    def fit(self, X, y):
        pass

    @abstractmethod
    def predict(self, X):
        pass

In [4]:
def filter_data(key, data, target_name=None):
    if target_name is None:
        return data.iloc[-1].where(data.iloc[-1] == key, 1, 0, inplace=True)
    else:
        return data[target_name].where(data[target_name] == key, 1, 0, inplace=True)

def take_subsample(classes, data, target_name=None):
    if target_name is None:
        data = data[data.iloc[-1] == classes[0] or data.iloc[-1] == classes[1]]
        return data.where(data.iloc[-1] == classes[0], 0, 1)
    else:
        data = data[data[target_name] == classes[0] or data[target_name] == classes[1]]
        return data.where(data[target_name] == classes[0], 0, 1)

In [5]:
class MulticlassClassifier:
    strategies = ["one-vs-all", "all-vs-all"]

    def __init__(self, binary_classifier=BinaryClassifier, mode=None):
        self.bin_clf = binary_classifier
        self.mode = mode
        self.classifiers = [self.bin_clf]
        self.classes = None
        self.subsamples = None

    def fit(self, X, y):
        self.classes = np.unique(y)

        if self.mode == self.strategies[0]:
            self.classifiers *= len(self.classes)
            for i in range(len(self.classes)):
                data = filter_data(self.classes[i], X.copy())
                X, y = data[:-1], data[-1]
                self.classifiers[i].fit(X=X, y=y)

        elif self.mode == self.strategies[1]:
            num_of_classifiers = (len(self.classes) * (len(self.classes) + 1)) // 2
            self.classifiers *= num_of_classifiers
            self.subsamples = []
            cur_cls = 0
            for i in range(len(self.classes)):
                for j in range(i + 1, len(self.classes)):
                    self.subsamples.append((i, j))
                    data = take_subsample((i, j), X.copy())
                    X, y = data[:-1], data[-1]
                    self.classifiers[cur_cls].fit(X=X, y=y)
                    cur_cls += 1

    def predict(self, X, threshold=0.5):
        y_pred = [None] * len(X)

        if self.mode == self.strategies[0]:
            y_proba = pd.DataFrame({'proba': [None] * len(X)})

            for cls in range(len(self.classifiers)):
                proba = pd.DataFrame({'class': self.classifiers[cls].predict(X=X)})
                y_proba = pd.concat([y_proba, proba], axis=1)

            self._most_likely_class(y_proba, y_pred)

        elif self.mode == self.strategies[1]:
            predictions = pd.DataFrame({'pred': [None] * len(X)})

            for k in range(len(self.classifiers)):
                proba = self.classifiers[k].predict(X=X)
                pred = pd.Series(np.where(proba < threshold, self.subsamples[k][0], self.subsamples[k][1]))
                predictions = pd.concat([predictions, pred], axis=1)

            self._voting_of_classifiers(predictions, y_pred)

        return y_pred

    def _voting_of_classifiers(self, predictions, y_pred):
        for index, row in predictions.iterrows():
            classes = [0] * len(self.classes)
            lead_cls = 0
            for i in range(1, len(row)):
                classes[row[i]] += 1
                if classes[row[i]] > classes[lead_cls]:
                    lead_cls = row[i]
            y_pred[index] = lead_cls

    @staticmethod
    def _most_likely_class(y_proba, y_pred):
        for index, row in y_proba.iterrows():
            max_p = 0
            for cls in range(1, len(row)):
                if row[cls] > max_p:
                    max_p = row[cls]
                    y_pred[index] = cls
