In [16]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.datasets import fetch_openml
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_validate
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler

data= fetch_openml('mnist_784', version=1, parser='auto')  # data from https://www.openml.org/d/554
dfData = pd.DataFrame(np.c_[data["data"], data["target"]],
                      columns = data["feature_names"] + ["target"])
def numberOfLoops(num):
    if num in ["0","6","9"]:
        return 1
    elif num == "8":
        return 2
    return 0
dfData["numLoops"] = dfData["target"].apply(numberOfLoops)
dfData["hasLoop"] = dfData["numLoops"].apply(lambda x: True if x>0 else False)

In [17]:
img_pipeline = Pipeline([("mm_scaler", MinMaxScaler())])
y = dfData[["target", "numLoops", "hasLoop"]]
dfData = dfData.drop(["target", "numLoops", "hasLoop"], axis=1)
X = dfData.copy()
X_transf = img_pipeline.fit_transform(X)

In [18]:
stratSplit = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=0)
for train_index, test_index in stratSplit.split(X_transf, y):
    X_train = X_transf[train_index]
    X_test = X_transf[test_index]
    
    y_train = y.iloc[train_index]
    y_test = y.iloc[test_index]

In [None]:
log_reg = LogisticRegression(C=1e5, max_iter=100)
log_reg.fit(X_train, y_train.values)
log_reg.predict(X_test)[0]

In [None]:
from sklearn.multioutput import MultiOutputClassifier
y_train["target"] = y_train["target"].apply(lambda x: int(x))
y_train["hasLoop"] = y_train["hasLoop"].apply(lambda x: int(x))

# y_test["target"] = y_test["target"].apply(lambda x: int(x))
log_reg = LogisticRegression(C=1e5)
log_reg_multi_label = MultiOutputClassifier(log_reg)
log_reg_multi_label.fit(X_train, y_train.values)
log_reg_multi_label.predict(X_test)[0]

In [24]:
from sklearn.base import clone

class customMultilabel():
    def __init__(self, clf):
        self.clf = clf
        self.clfs = []
        
    def fit(self, X, y):
        if len(y.shape) == 2:
            self.numOutputs = y.shape[-1]
        elif len(y.shape) == 1:
            self.numOutputs = 1
        else:
            print("Unexpected target shape")
            raise(RuntimeError)
            
        for i in range(self.numOutputs):
            self.clfs.append(clone(self.clf))
            self.clfs[i].fit(X, y.iloc[:,i])
            
    def predict(self, X):
        output = None
        for i in range(self.numOutputs):
            r = self.clfs[i].predict(X)
            if output is None:
                output = r.copy()
            else:
                output = np.c_[output, r]
            
        return output

In [None]:
cml_log_reg = customMultilabel(LogisticRegression(C=1e5))
cml_log_reg.fit(X_train, y_train)
output = cml_log_reg.predict(X_test)

In [14]:
output[:5]

array([['0', 1, True],
       ['0', 1, True],
       ['4', 0, False],
       ['6', 1, True],
       ['1', 0, False]], dtype=object)

In [15]:
y_test[:5]

Unnamed: 0,target,numLoops,hasLoop
29205,0,1,True
5801,0,1,True
18245,4,0,False
41828,6,1,True
24289,1,0,False
