Skip to content

Commit

Permalink
more
Browse files Browse the repository at this point in the history
  • Loading branch information
fabianp committed Nov 29, 2014
1 parent e59bcf4 commit a52bf80
Showing 1 changed file with 41 additions and 14 deletions.
55 changes: 41 additions & 14 deletions minirank/ordinal.py
Expand Up @@ -7,7 +7,7 @@
import numpy as np
from scipy import optimize, linalg, stats

from sklearn import base, metrics
from sklearn import base, metrics, linear_model

from metrics import pairwise_disagreement

Expand Down Expand Up @@ -95,13 +95,34 @@ def obj_multiclass(x0, X, y, alpha, n_class):
W = np.concatenate((W, Wk), axis=1)
X = np.concatenate((X, np.ones((n_samples, 1))), axis=1)
Y = np.zeros((n_samples, n_class))
Y[:] = - 1./(n_class - 1)
Y[:] = - 1./(n_class - 1.)
for i in range(n_samples):
Y[i, y[i]] = 1.

L = np.abs(np.arange(n_class)[:, None] - np.arange(n_class))
L = np.ones((n_class, n_class)) - np.eye(n_class)
obj = (L[y] * np.fmax(X.dot(W) - Y, 0)).sum() / float(n_samples)


Wt = W[:n_features]
penalty = alpha * np.trace(Wt.T.dot(Wt))
return obj + penalty

def obj_multiclass2(x0, X, y, alpha, n_class):
n_samples, n_features = X.shape
W = x0.reshape((n_features + 1, n_class-1))
Wk = - W.sum(1)[:, None]
W = np.concatenate((W, Wk), axis=1)
X = np.concatenate((X, np.ones((n_samples, 1))), axis=1)
Y = np.zeros((n_samples, n_class))
Y[:] = - 1./(n_class - 1.)
for i in range(n_samples):
Y[i, y[i]] = 1.

# L = np.abs(np.arange(n_class)[:, None] - np.arange(n_class))
L = np.ones((n_class, n_class)) - np.eye(n_class)
obj = (L[y] * np.fmax(X.dot(W) - Y, 0)).sum() / float(n_samples)

#1/0
Wt = W[:n_features]
penalty = alpha * np.trace(Wt.T.dot(Wt))
return obj + penalty
Expand Down Expand Up @@ -164,7 +185,7 @@ def threshold_predict(X, w, theta):
return pred


def multiclass_fit(X, y, alpha, n_class, maxiter=100000):
def multiclass_fit(X, y, alpha, n_class, maxiter=5000000):
"""
Multiclass classification with absolute error cost
Expand All @@ -182,6 +203,10 @@ def multiclass_fit(X, y, alpha, n_class, maxiter=100000):
sol = optimize.minimize(obj_multiclass, x0, jac=False,
args=(X, y, alpha, n_class), method='L-BFGS-B',
options=options)

sol = optimize.minimize(obj_multiclass, sol.x, jac=False,
args=(X, y, alpha, n_class), method='L-BFGS-B',
options=options)
if not sol.success:
print(sol.message)
W = sol.x.reshape((n_features + 1, n_class-1))
Expand Down Expand Up @@ -243,12 +268,12 @@ def score(self, X, y):

np.random.seed(0)
from sklearn import datasets, metrics, svm, cross_validation
n_class = 3
n_samples = 20
n_class = 5
n_samples = 100
n_dim = 10

X, y = datasets.make_regression(n_samples=n_samples, n_features=n_dim,
n_informative=n_dim // 10)
n_informative=n_dim // 10, noise=20)

bins = stats.mstats.mquantiles(y, np.linspace(0, 1, n_class + 1))
y = np.digitize(y, bins[:-1])
Expand All @@ -269,15 +294,17 @@ def score(self, X, y):

cv = cross_validation.KFold(y.size)
for train, test in cv:
test = train
w, theta = threshold_fit(X[train], y[train], 0., n_class, mode='AE',
bounds=False)
pred = threshold_predict(X[test], w, theta)
print metrics.mean_absolute_error(pred, y[test])
#test = train

clf = linear_model.LogisticRegression(C=1e3).fit(X[train], y[train])
print clf.score(X[test], y[test])
#w, theta = threshold_fit(X[train], y[train], 0., n_class, mode='AE',
#bounds=False)
#pred = threshold_predict(X[test], w, theta)
#print metrics.accuracy_score(pred, y[test])

W = multiclass_fit(X[train], y[train], 0., n_class)
pred = multiclass_predict(X[test], W)
print pred, y[test]
print metrics.mean_absolute_error(pred, y[test])
print metrics.accuracy_score(pred, y[test])
break

0 comments on commit a52bf80

Please sign in to comment.