In [138]:
%load_ext lab_black

The lab_black extension is already loaded. To reload it, use:
  %reload_ext lab_black


In [139]:
from sklearn.base import ClassifierMixin
from sklearn.datasets import load_breast_cancer
from sklearn.linear_model import SGDClassifier
import numpy as np
from pyscipopt import Model, quicksum, log, exp
import math

In [189]:
class SLIMBinaryClassifier(ClassifierMixin):
    def __init__(self, groups=None, Lambda=100, eps=0.1):
        """
        SLIMClassifier assumes transformations are already
        applied, and we're doing integer programming

        Prior transformations are applied in the
        SLIMTransformer.

        TODO: Group sparsity constraints?
        """
        self.m = Model()
        self.Lambda = Lambda
        self.eps = 0.1

        self.groups = groups

        self.M = None  # Lambda * max (X)

        # there are N + 3P variables
        # to be optimized in SLIM algorithm
        self.coef_ = None  # P
        self.alpha_ = None  # N
        self.beta_ = None  # P
        self.gamma_ = None  # P

    def fit(self, X, y):
        n_instances = X.shape[0]
        n_feats = X.shape[1]

        self.coef_ = {}
        self.alpha_ = {}
        self.beta_ = {}
        self.gamma_ = {}

        for i in range(n_feats):
            self.coef_[i] = self.m.addVar(f"c{i}", vtype="INTEGER")
            self.beta_[i] = self.m.addVar(f"b{i}", vtype="B")
            self.gamma_[i] = self.m.addVar(f"g{i}", lb=0)  # R+

        for i in range(n_instances):
            self.alpha_[i] = self.m.addVar(f"a{i}", vtype="B")

        self.M = self.Lambda * np.max(X)
        self.C_0 = 0.01
        self.C_1 = 0.01

        # add constraints.
        for i in range(n_instances):
            self.m.addCons(
                y[i] * sum([X[i, j] * self.coef_[j] for j in range(n_feats)])
                <= (self.M * (1 - self.alpha_[i]) + self.eps)
            )
            self.m.addCons(
                y[i] * sum([X[i, j] * self.coef_[j] for j in range(n_feats)])
                >= (-self.M * (self.alpha_[i]) + self.eps)
            )

        for i in range(n_feats):
            self.m.addCons(self.coef_[i] >= -self.Lambda * self.beta_[i])
            self.m.addCons(self.coef_[i] <= self.Lambda * self.beta_[i])

            self.m.addCons(self.coef_[i] <= self.gamma_[i])
            self.m.addCons(self.coef_[i] >= -self.gamma_[i])

        # set param constraints
        self.m.setObjective(
            +(1 / n_instances) * quicksum(self.alpha_[i] for i in range(n_instances))
            + self.C_0 * quicksum(self.beta_[i] for i in range(n_feats))
            + self.C_1 * quicksum(self.gamma_[i] for i in range(n_feats)),
            "minimize",
        )

        # set objective...the slow way
        for i in range(n_instances):
            self.m.setObjective(
                (y[i] * -sum([X[i, j] * self.coef_[j] for j in range(n_feats)]))
            )

        # set objective regularization
        # self.m.setObjective(self.C_0 * np.max([self.coef_[i] for i in range(n_feats)]))
        # self.m.setObjective(
        #     self.C_1 * quicksum(np.abs(self.coef_[i]) for i in range(n_feats))
        # )

        self.m.optimize()
        self.sol = self.m.getBestSol()
        return self

In [190]:
svm_mod = SGDClassifier()
mod = SLIMBinaryClassifier()

In [191]:
data = load_breast_cancer()

In [192]:
svm_mod.fit(data.data, data.target)
svm_mod.score(data.data, data.target)

0.8963093145869947

In [193]:
X = np.array([[1, 0, 1, 1], [1, 0, 0, 1], [0, 0, 1, 0], [0, 1, 1, 1]])
y = np.array([1, 1, 0, 0])

In [194]:
mod.fit(X, y)

<__main__.SLIMBinaryClassifier at 0x7f6aeed28fd0>

In [195]:
# get the best solution
mod.sol

{}