# Constrained Ordinal Regression Tests

In [1]:
%run -m ipy_startup

In [23]:
from scipy import optimize
from py_utils import math

class Regressor(object):
    
    def __init__(self, k, epsilon=1e-128):
        self.k = k
        self.epsilon = epsilon
        
    def _sigmoid(self, x):
        return math.sigmoid(x, clip=True)
    
    def _objective(self, pv, X, y):
        
        # Parameters given as linear params first, then K-1 intercept terms
        pvb = pv[:-(self.k-1)]
        pva = pv[-(self.k-1):]
        n = len(y)
        
        if np.any(np.diff(pva) < 0):
            print('found unordered pva: ', pva)
            return np.inf
        
        lp = 0
        for i in range(n):
            k = y[i]
            Xi = X[i]
            
            s0 = 1 if k == self.k else self._sigmoid(np.dot(Xi, pvb) + pva[k-1])
            s1 = 0 if k == 1 else self._sigmoid(np.dot(Xi, pvb) + pva[k-2])
            assert 0 <= s0 <= 1
            assert 0 <= s1 <= 1
            p = s0 - s1
            assert p >= 0
            if p < self.epsilon:
                return np.inf
            else:
                lp += np.log(p)
            
        # print('params: ', pv)
        # print('logprob: ', -lp)
        return -lp / n

    def _jacobian(self, pv, X, y):
        # Assume y is 1 through K
        
        assert np.all(np.in1d(y, np.arange(1, self.k + 1)))
        
        # Parameters given as linear params first, then K-1 intercept terms
        pvb = pv[:-(self.k-1)]
        pva = pv[-(self.k-1):]
        n = len(y)
        
        # Initialize gradient vectors to accmulate in loop
        gb = np.zeros(X.shape[1])
        ga = np.zeros(self.k - 1)
        
        gnan = np.repeat(np.nan, len(gb) + len(ga))
        
        for i in range(n):
            k = y[i]
            Xi = X[i]
            
            # Helper values for gradient calculation
            #if k != self.k:
                #print(np.dot(Xi, pvb), pva[k-1])
            s0 = 1 if k == self.k else self._sigmoid(np.dot(Xi, pvb) + pva[k-1])
            s1 = 0 if k == 1 else self._sigmoid(np.dot(Xi, pvb) + pva[k-2])
            spq0 = s0 * ( 1 - s0 )
            spq1 = s1 * ( 1 - s1 )
            
            if (s0 - s1) < self.epsilon:
                return gnan
            
            # Accumulate gradient components
            if k < self.k:
                ga[k-1] += spq0 / ( s0 - s1 )
            if k > 1:
                ga[k-2] -= spq1 / ( s0 - s1 )
            gb += (Xi * (spq0 - spq1)) / (s0 - s1)
        
        
        j = np.concatenate((gb, ga)) / n
        print('jac', j)
        return -j 
    
    def _constraint(self, i):
        def con_val(pv):
            return pv[i] - pv[i-1]

        def con_jac(pv):
            j = np.zeros(len(pv))
            j[i] = 1.0
            j[i-1] = -1.0
            return j

        return {'type': 'ineq', 'fun': con_val, 'jac': con_jac}

#     def check_gradient(self, X, y):
#         err = []
        
#         e = optimize.check_grad(
#             self._objective,
#             self._jacobian,
#             pv,
#             X, y
#         )
#         self.err.append(err)
        
    def fit(self, X, y):
        pa0 = np.linspace(-1, 1, num=self.k-1)
        pb0 = np.zeros(X.shape[1])
        pv0 = np.concatenate((pb0, pa0))
        
        constraints = [self._constraint(i) for i in range(X.shape[1]+1, len(pv0))]
        
        self.err_ = []
        def callback(pv):
            err = optimize.check_grad(
                self._objective,
                self._jacobian,
                pv,
                X, y
            )
            self.err_.append((err, np.array(pv)))
            
        self.optimize_result_ = optimize.minimize(
            self._objective,
            pv0,
            jac=self._jacobian,
            args=(X, y),
            #bounds=bounds,
            method='SLSQP',
            constraints=constraints,
            callback=callback
        )
        
        self.err_ = pd.DataFrame(self.err_, columns=['error', 'parameters'])
        
    def predict(self, X):
        n = len(X)
        pv = self.optimize_result_.x
        pvb = pv[:-(self.k-1)]
        pva = pv[-(self.k-1):]
        yb = np.dot(X, pvb)[:,np.newaxis]
        ya = np.tile(pva, (n, 1))
        p = self._sigmoid(ya + yb)
        assert p.ndim > 1
        p = np.hstack(( np.zeros((n, 1)), p, np.ones((n, 1)) ))
        return np.diff(p, axis=1)

In [24]:
from sklearn.datasets import load_iris
d = load_iris()
X, y = pd.DataFrame(d['data'], columns=d['feature_names']), pd.Series(d['target'] + 1)

In [25]:
y.value_counts()

3    50
2    50
1    50
dtype: int64

In [26]:
X.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [27]:
est = Regressor(3)
np.seterr(all='raise')
est.fit(X.values, y.values)

jac [ -57.82673357   16.22950045 -149.42837347  -65.13731936   15.27997573
  -15.27997573]
jac [ 541.31110799  305.72717459  283.97023599   77.99007106   39.11965912
   59.74473248]
jac [ 238.39585445  157.51032044   69.29844057    7.63296407   28.47673784
   19.7285013 ]
jac [  3.01402169   6.98561853 -16.84754604 -10.49963036 -16.37766971
  18.94994269]
jac [  3.01402169   6.98561853 -16.84754604 -10.49963036 -16.37766971
  18.94994269]
jac [  9.93117243   7.87469322  -5.70713268  -5.98869476 -14.38522389
  17.55784227]
jac [  9.93117243   7.87469322  -5.70713268  -5.98869476 -14.38522389
  17.55784227]
jac [ 11.2867434    5.71308337   3.45009706  -1.6364484   -8.56674705
  11.22050659]
jac [ 11.2867434    5.71308337   3.45009706  -1.6364484   -8.56674705
  11.22050659]
jac [ 6.5183794   2.87624717  2.5453162  -1.30021355 -5.36387238  6.98319931]
jac [ 6.5183794   2.87624717  2.5453162  -1.30021355 -5.36387238  6.98319931]
jac [ 1.67394132  0.4147611   0.43733992 -1.46722172 -3.13312

In [28]:
est.optimize_result_

     fun: 0.044308988676268875
     jac: array([  1.72731330e-06,   1.65225611e-04,   1.26189734e-04,
        -2.67110122e-04,  -2.40358731e-04,  -3.67668607e-04])
 message: 'Optimization terminated successfully.'
    nfev: 43
     nit: 42
    njev: 42
  status: 0
 success: True
       x: array([  4.06895878,   7.54175429,  -8.54492583, -16.59275761,
       -16.18746025,  23.00792325])

In [51]:
est.optimize_result_

     fun: 5.949281432323897
     jac: array([  6.15962336e-03,   2.93501175e-03,   4.83942576e-03,
         1.69139198e-03,   6.88615028e-06,   9.88924373e-04])
 message: 'Optimization terminated successfully.'
    nfev: 46
     nit: 37
    njev: 37
  status: 0
 success: True
       x: array([  2.46562861,   6.68282197,  -9.43111804, -18.28911399,
         6.76940527,  42.64389706])

In [29]:
pd.set_option('display.max_colwidth', 1000)
est.err_

Unnamed: 0,error,parameters
0,2.326518e-07,"[-0.385511557131, 0.108196669637, -0.99618915648, -0.434248795706, -0.898133495163, 0.898133495163]"
1,4.80679e-08,"[0.415346479798, 0.755203423029, -1.05055504702, -0.545641934287, -0.754426513962, 0.94168454026]"
2,6.665359e-08,"[0.517868883768, 0.909047074979, -1.27279390891, -0.678899000863, -0.84988368992, 1.08313834009]"
3,4.872432e-08,"[0.849086041942, 1.3692154708, -1.9383020434, -1.10227412911, -1.24116377862, 1.62157882218]"
4,3.668758e-08,"[1.17689729477, 1.80564080417, -2.57170330707, -1.52323054292, -1.66078420204, 2.18724420084]"
5,2.96036e-08,"[1.6303071055, 2.40296939168, -3.44139654589, -2.11286656075, -2.25177183447, 2.98193671708]"
6,2.039142e-08,"[2.14375872558, 3.0773760306, -4.42608546389, -2.78877541128, -2.92731834171, 3.89013280016]"
7,1.263059e-08,"[2.73471659688, 3.85224594016, -5.56027402957, -3.57406692202, -3.71040024183, 4.94273060894]"
8,1.877242e-08,"[3.37744498957, 4.69324912745, -6.79425907146, -4.43449104558, -4.56705855355, 6.09393447604]"
9,7.206062e-09,"[4.05102929601, 5.57227849085, -8.08725971367, -5.34174117913, -5.46916443338, 7.30585504927]"


In [30]:
est.err_[est.err_['error'] > 1]

Unnamed: 0,error,parameters


In [31]:
y_pred = est.predict(X)
y_pred[:5]

array([[  9.99999845e-01,   1.55142469e-07,   0.00000000e+00],
       [  9.99984801e-01,   1.51991944e-05,   0.00000000e+00],
       [  9.99996771e-01,   3.22909757e-06,   0.00000000e+00],
       [  9.99943048e-01,   5.69518091e-05,   0.00000000e+00],
       [  9.99999890e-01,   1.09624879e-07,   0.00000000e+00]])

In [32]:
pd.DataFrame({'yp': np.argmax(y_pred, axis=1), 'yt': y}).groupby(['yp', 'yt']).size().unstack()

yt,1,2,3
yp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,50.0,,
1,,49.0,1.0
2,,1.0,49.0


In [193]:
y.value_counts()

3    50
2    50
1    50
dtype: int64