In [3]:
%load_ext lab_black

In [4]:
from sklearn.base import ClassifierMixin
from sklearn.datasets import load_breast_cancer
from sklearn.linear_model import SGDClassifier
import numpy as np
from pyscipopt import Model, quicksum, log, exp
import math

In [28]:
class SLIMBinaryClassifier(ClassifierMixin):
    def __init__(self, groups=None, Lambda=100, eps=0.1):
        """
        SLIMClassifier assumes transformations are already
        applied, and we're doing integer programming

        Prior transformations are applied in the
        SLIMTransformer.

        TODO: Group sparsity constraints?
        """
        self.m = Model()
        self.Lambda = Lambda
        self.eps = 0.1

        self.groups = groups

        self.M = None  # Lambda * max (X)

        # there are N + 3P variables
        # to be optimized in SLIM algorithm
        self.coef_ = None  # P
        self.alpha_ = None  # N
        self.beta_ = None  # P
        self.gamma_ = None  # P

    def fit(self, X, y):
        n_instances = X.shape[0]
        n_feats = X.shape[1]

        self.coef_ = {}
        self.alpha_ = {}
        self.beta_ = {}
        self.gamma_ = {}

        for i in range(n_feats):
            self.coef_[i] = self.m.addVar(f"c{i}", vtype="INTEGER")
            self.beta_[i] = self.m.addVar(f"b{i}", vtype="B")
            self.gamma_[i] = self.m.addVar(f"g{i}", lb=0)  # R+

        for i in range(n_instances):
            self.alpha_[i] = self.m.addVar(f"a{i}", vtype="B")

        self.M = self.Lambda * np.max(X)
        self.C_0 = 0.1
        self.C_1 = 0.1

        # add constraints.
        for i in range(n_instances):
            self.m.addCons(
                y[i] * sum([X[i, j] * self.coef_[j] for j in range(n_feats)])
                <= (self.M * (1 - self.alpha_[i]) + self.eps)
            )
            self.m.addCons(
                y[i] * sum([X[i, j] * self.coef_[j] for j in range(n_feats)])
                >= (-self.M * (self.alpha_[i]) + self.eps)
            )

        for i in range(n_feats):
            self.m.addCons(self.coef_[i] >= -self.Lambda * self.beta_[i])
            self.m.addCons(self.coef_[i] <= self.Lambda * self.beta_[i])

            self.m.addCons(self.coef_[i] <= self.gamma_[i])
            self.m.addCons(self.coef_[i] >= -self.gamma_[i])

        # set objective...the slow way
        for i in range(n_instances):
            self.m.setObjective((1 / n_instances) * self.alpha_[i])

        self.m.setObjective(
            (1 / n_instances) * quicksum(self.alpha_[i] for i in range(n_instances))
            + self.C_0 * quicksum(self.beta_[j] for j in range(n_feats))
            + self.C_1 * quicksum(self.gamma_[j] for j in range(n_feats))
        )

        self.m.optimize()
        self.sol = self.m.getBestSol()

        self.coef = []
        for i in range(n_feats):
            self.coef.append(self.sol[self.coef_[i]])
        return self

In [29]:
svm_mod = SGDClassifier()
mod = SLIMBinaryClassifier()

In [30]:
data = load_breast_cancer()

In [31]:
svm_mod.fit(data.data, data.target)
svm_mod.score(data.data, data.target)

0.9226713532513181

In [32]:
X = np.array([[1, 0, 1, 1], [1, 0, 0, 1], [0, 0, 1, 0], [0, 1, 1, 1]])
y = np.array([1, 1, 0, 0])

In [33]:
mod = SLIMBinaryClassifier()
mod.fit(data.data, data.target)
mod.coef

[-0.0,
 -0.0,
 -0.0,
 -0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 1.0,
 -0.0,
 -0.0,
 -0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 -0.0,
 -0.0,
 -0.0,
 -0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 -0.0,
 0.0]