more

fabianp · Nov 29, 2014 · a52bf80 · a52bf80
1 parent e59bcf4
commit a52bf80
Showing 1 changed file with 41 additions and 14 deletions.
diff --git a/minirank/ordinal.py b/minirank/ordinal.py
@@ -7,7 +7,7 @@
 import numpy as np
 from scipy import optimize, linalg, stats
 
-from sklearn import base, metrics
+from sklearn import base, metrics, linear_model
 
 from metrics import pairwise_disagreement
 
@@ -95,13 +95,34 @@ def obj_multiclass(x0, X, y, alpha, n_class):
     W = np.concatenate((W, Wk), axis=1)
     X = np.concatenate((X, np.ones((n_samples, 1))), axis=1)
     Y = np.zeros((n_samples, n_class))
-    Y[:] = - 1./(n_class - 1)
+    Y[:] = - 1./(n_class - 1.)
     for i in range(n_samples):
         Y[i, y[i]] = 1.
 
-    L = np.abs(np.arange(n_class)[:, None] - np.arange(n_class))
+    L = np.ones((n_class, n_class)) - np.eye(n_class)
     obj = (L[y] * np.fmax(X.dot(W) - Y, 0)).sum() / float(n_samples)
 
+
+    Wt = W[:n_features]
+    penalty = alpha * np.trace(Wt.T.dot(Wt))
+    return obj + penalty
+
+def obj_multiclass2(x0, X, y, alpha, n_class):
+    n_samples, n_features = X.shape
+    W = x0.reshape((n_features + 1, n_class-1))
+    Wk = - W.sum(1)[:, None]
+    W = np.concatenate((W, Wk), axis=1)
+    X = np.concatenate((X, np.ones((n_samples, 1))), axis=1)
+    Y = np.zeros((n_samples, n_class))
+    Y[:] = - 1./(n_class - 1.)
+    for i in range(n_samples):
+        Y[i, y[i]] = 1.
+
+#    L = np.abs(np.arange(n_class)[:, None] - np.arange(n_class))
+    L = np.ones((n_class, n_class)) - np.eye(n_class)
+    obj = (L[y] * np.fmax(X.dot(W) - Y, 0)).sum() / float(n_samples)
+
+    #1/0
     Wt = W[:n_features]
     penalty = alpha * np.trace(Wt.T.dot(Wt))
     return obj + penalty
@@ -164,7 +185,7 @@ def threshold_predict(X, w, theta):
     return pred
 
 
-def multiclass_fit(X, y, alpha, n_class, maxiter=100000):
+def multiclass_fit(X, y, alpha, n_class, maxiter=5000000):
     """
     Multiclass classification with absolute error cost
 
@@ -182,6 +203,10 @@ def multiclass_fit(X, y, alpha, n_class, maxiter=100000):
     sol = optimize.minimize(obj_multiclass, x0, jac=False,
         args=(X, y, alpha, n_class), method='L-BFGS-B',
         options=options)
+
+    sol = optimize.minimize(obj_multiclass, sol.x, jac=False,
+        args=(X, y, alpha, n_class), method='L-BFGS-B',
+        options=options)
     if not sol.success:
         print(sol.message)
     W = sol.x.reshape((n_features + 1, n_class-1))
@@ -243,12 +268,12 @@ def score(self, X, y):
 
     np.random.seed(0)
     from sklearn import datasets, metrics, svm, cross_validation
-    n_class = 3
-    n_samples = 20
+    n_class = 5
+    n_samples = 100
     n_dim = 10
 
     X, y = datasets.make_regression(n_samples=n_samples, n_features=n_dim,
-        n_informative=n_dim // 10)
+        n_informative=n_dim // 10, noise=20)
 
     bins = stats.mstats.mquantiles(y, np.linspace(0, 1, n_class + 1))
     y = np.digitize(y, bins[:-1])
@@ -269,15 +294,17 @@ def score(self, X, y):
 
     cv = cross_validation.KFold(y.size)
     for train, test in cv:
-        test = train
-        w, theta = threshold_fit(X[train], y[train], 0., n_class, mode='AE',
-                                 bounds=False)
-        pred = threshold_predict(X[test], w, theta)
-        print metrics.mean_absolute_error(pred, y[test])
+        #test = train
+
+        clf = linear_model.LogisticRegression(C=1e3).fit(X[train], y[train])
+        print clf.score(X[test], y[test])
+        #w, theta = threshold_fit(X[train], y[train], 0., n_class, mode='AE',
+                                 #bounds=False)
+        #pred = threshold_predict(X[test], w, theta)
+        #print metrics.accuracy_score(pred, y[test])
 
         W = multiclass_fit(X[train], y[train], 0., n_class)
         pred = multiclass_predict(X[test], W)
-        print pred, y[test]
-        print metrics.mean_absolute_error(pred, y[test])
+        print metrics.accuracy_score(pred, y[test])
         break