Skip to content
This repository
Browse code

port version 1.91

+ support for linear support vector regression
  • Loading branch information...
commit 283f7f3d4e465cc07138aca536732520e2fe9ddf 1 parent bf0ec6f
Benedikt Waldvogel authored
128 README
@@ -29,11 +29,12 @@ The two most important methods that you might be interested in are:
29 29
30 30 -------------------------------------------------------------------------------
31 31
32   -LIBLINEAR is a simple package for solving large-scale regularized
33   -linear classification. It currently supports L2-regularized logistic
34   -regression/L2-loss support vector classification/L1-loss support vector
35   -classification, and L1-regularized L2-loss support vector classification/
36   -logistic regression. This document explains the usage of LIBLINEAR.
  32 +LIBLINEAR is a simple package for solving large-scale regularized linear
  33 +classification and regression. It currently supports
  34 +- L2-regularized logistic regression/L2-loss support vector classification/L1-loss support vector classification
  35 +- L1-regularized L2-loss support vector classification/L1-regularized logistic regression
  36 +- L2-regularized L2-loss support vector regression/L1-loss support vector regression.
  37 +This document explains the usage of LIBLINEAR.
37 38
38 39 To get started, please read the ``Quick Start'' section first.
39 40 For developers, please check the ``Library Usage'' section to learn
@@ -59,8 +60,8 @@ When to use LIBLINEAR but not LIBSVM
59 60
60 61 There are some large data for which with/without nonlinear mappings
61 62 gives similar performances. Without using kernels, one can
62   -efficiently train a much larger set via a linear classifier. These
63   -data usually have a large number of features. Document classification
  63 +efficiently train a much larger set via linear classification/regression.
  64 +These data usually have a large number of features. Document classification
64 65 is an example.
65 66
66 67 Warning: While generally liblinear is very fast, its default solver
@@ -128,25 +129,34 @@ and mark
128 129 Usage: train [options] training_set_file [model_file]
129 130 options:
130 131 -s type : set type of solver (default 1)
131   - 0 -- L2-regularized logistic regression (primal)
132   - 1 -- L2-regularized L2-loss support vector classification (dual)
133   - 2 -- L2-regularized L2-loss support vector classification (primal)
134   - 3 -- L2-regularized L1-loss support vector classification (dual)
135   - 4 -- multi-class support vector classification by Crammer and Singer
136   - 5 -- L1-regularized L2-loss support vector classification
137   - 6 -- L1-regularized logistic regression
138   - 7 -- L2-regularized logistic regression (dual)
  132 + 0 -- L2-regularized logistic regression (primal)
  133 + 1 -- L2-regularized L2-loss support vector classification (dual)
  134 + 2 -- L2-regularized L2-loss support vector classification (primal)
  135 + 3 -- L2-regularized L1-loss support vector classification (dual)
  136 + 4 -- multi-class support vector classification by Crammer and Singer
  137 + 5 -- L1-regularized L2-loss support vector classification
  138 + 6 -- L1-regularized logistic regression
  139 + 7 -- L2-regularized logistic regression (dual)
  140 + 11 -- L2-regularized L2-loss epsilon support vector regression (primal)
  141 + 12 -- L2-regularized L2-loss epsilon support vector regression (dual)
  142 + 13 -- L2-regularized L1-loss epsilon support vector regression (dual)
139 143 -c cost : set the parameter C (default 1)
  144 +-p epsilon : set the epsilon in loss function of epsilon-SVR (default 0.1)
140 145 -e epsilon : set tolerance of termination criterion
141 146 -s 0 and 2
142 147 |f'(w)|_2 <= eps*min(pos,neg)/l*|f'(w0)|_2,
143 148 where f is the primal function and pos/neg are # of
144 149 positive/negative data (default 0.01)
  150 + -s 11
  151 + |f'(w)|_2 <= eps*|f'(w0)|_2 (default 0.001)
145 152 -s 1, 3, 4 and 7
146 153 Dual maximal violation <= eps; similar to libsvm (default 0.1)
147 154 -s 5 and 6
148 155 |f'(w)|_inf <= eps*min(pos,neg)/l*|f'(w0)|_inf,
149 156 where f is the primal function (default 0.01)
  157 + -s 12 and 13\n"
  158 + |f'(alpha)|_1 <= eps |f'(alpha0)|,
  159 + where f is the dual function (default 0.1)
150 160 -B bias : if bias >= 0, instance x becomes [x; bias]; if < 0, no bias term added (default -1)
151 161 -wi weight: weights adjust the parameter C of different classes (see README for details)
152 162 -v n: n-fold cross validation mode
@@ -183,23 +193,40 @@ For L1-regularized logistic regression (-s 6), we solve
183 193
184 194 min_w \sum |w_j| + C \sum log(1 + exp(-y_i w^Tx_i))
185 195
  196 +For L2-regularized logistic regression (-s 7), we solve
  197 +
  198 +min_alpha 0.5(alpha^T Q alpha) + \sum alpha_i*log(alpha_i) + \sum (C-alpha_i)*log(C-alpha_i) - a constant
  199 + s.t. 0 <= alpha_i <= C,
  200 +
186 201 where
187 202
188 203 Q is a matrix with Q_ij = y_i y_j x_i^T x_j.
189 204
190   -For L2-regularized logistic regression (-s 7), we solve
  205 +For L2-regularized L2-loss SVR (-s 11), we solve
191 206
192   -min_alpha 0.5(alpha^T Q alpha) + \sum alpha_i*log(alpha_i) + \sum (C-alpha_i)*log(C-alpha_i) - a constant
193   - s.t. 0 <= alpha_i <= C,
  207 +min_w w^Tw/2 + C \sum max(0, |y_i-w^Tx_i|-epsilon)^2
  208 +
  209 +For L2-regularized L2-loss SVR dual (-s 12), we solve
  210 +
  211 +min_beta 0.5(beta^T (Q + lambda I/2/C) beta) - y^T beta + \sum |beta_i|
  212 +
  213 +For L2-regularized L1-loss SVR dual (-s 13), we solve
  214 +
  215 +min_beta 0.5(beta^T Q beta) - y^T beta + \sum |beta_i|
  216 + s.t. -C <= beta_i <= C,
  217 +
  218 +where
  219 +
  220 +Q is a matrix with Q_ij = x_i^T x_j.
194 221
195 222 If bias >= 0, w becomes [w; w_{n+1}] and x becomes [x; bias].
196 223
197 224 The primal-dual relationship implies that -s 1 and -s 2 give the same
198   -model, and -s 0 and -s 7 give the same.
  225 +model, -s 0 and -s 7 give the same, and -s 11 and -s 12 give the same.
199 226
200   -We implement 1-vs-the rest multi-class strategy. In training i
201   -vs. non_i, their C parameters are (weight from -wi)*C and C,
202   -respectively. If there are only two classes, we train only one
  227 +We implement 1-vs-the rest multi-class strategy for classification.
  228 +In training i vs. non_i, their C parameters are (weight from -wi)*C
  229 +and C, respectively. If there are only two classes, we train only one
203 230 model. Thus weight1*C vs. weight2*C is used. See examples below.
204 231
205 232 We also implement multi-class SVM by Crammer and Singer (-s 4):
@@ -224,7 +251,10 @@ and C^m_i = C if m = y_i,
224 251
225 252 Usage: predict [options] test_file model_file output_file
226 253 options:
227   --b probability_estimates: whether to predict probability estimates, 0 or 1 (default 0)
  254 +-b probability_estimates: whether to output probability estimates, 0 or 1 (default 0); currently for logistic regression only
  255 +
  256 +Note that -b is only needed in the prediction phase. This is different
  257 +from the setting of LIBSVM.
228 258
229 259 Examples
230 260 ========
@@ -267,8 +297,9 @@ Library Usage
267 297 - Function: model* train(const struct problem *prob,
268 298 const struct parameter *param);
269 299
270   - This function constructs and returns a linear classification model
271   - according to the given training data and parameters.
  300 + This function constructs and returns a linear classification
  301 + or regression model according to the given training data and
  302 + parameters.
272 303
273 304 struct problem describes the problem:
274 305
@@ -283,10 +314,10 @@ Library Usage
283 314 where `l' is the number of training data. If bias >= 0, we assume
284 315 that one additional feature is added to the end of each data
285 316 instance. `n' is the number of feature (including the bias feature
286   - if bias >= 0). `y' is an array containing the target values. And
287   - `x' is an array of pointers,
288   - each of which points to a sparse representation (array of feature_node) of one
289   - training vector.
  317 + if bias >= 0). `y' is an array containing the target values. (integers
  318 + in classification, real numbers in regression) And `x' is an array
  319 + of pointers, each of which points to a sparse representation (array
  320 + of feature_node) of one training vector.
290 321
291 322 For example, if we have the following training data:
292 323
@@ -311,7 +342,8 @@ Library Usage
311 342 [ ] -> (2,0.1) (4,1.4) (5,0.5) (6,1) (-1,?)
312 343 [ ] -> (1,-0.1) (2,-0.2) (3,0.1) (4,1.1) (5,0.1) (6,1) (-1,?)
313 344
314   - struct parameter describes the parameters of a linear classification model:
  345 + struct parameter describes the parameters of a linear classification
  346 + or regression model:
315 347
316 348 struct parameter
317 349 {
@@ -323,9 +355,10 @@ Library Usage
323 355 int nr_weight;
324 356 int *weight_label;
325 357 double* weight;
  358 + double p;
326 359 };
327 360
328   - solver_type can be one of L2R_LR, L2R_L2LOSS_SVC_DUAL, L2R_L2LOSS_SVC, L2R_L1LOSS_SVC_DUAL, MCSVM_CS, L1R_L2LOSS_SVC, L1R_LR, L2R_LR_DUAL.
  361 + solver_type can be one of L2R_LR, L2R_L2LOSS_SVC_DUAL, L2R_L2LOSS_SVC, L2R_L1LOSS_SVC_DUAL, MCSVM_CS, L1R_L2LOSS_SVC, L1R_LR, L2R_LR_DUAL, L2R_L2LOSS_SVR, L2R_L2LOSS_SVR_DUAL, L2R_L1LOSS_SVR_DUAL.
329 362
330 363 L2R_LR L2-regularized logistic regression (primal)
331 364 L2R_L2LOSS_SVC_DUAL L2-regularized L2-loss support vector classification (dual)
@@ -335,8 +368,12 @@ Library Usage
335 368 L1R_L2LOSS_SVC L1-regularized L2-loss support vector classification
336 369 L1R_LR L1-regularized logistic regression
337 370 L2R_LR_DUAL L2-regularized logistic regression (dual)
  371 + L2R_L2LOSS_SVR L2-regularized L2-loss support vector regression (primal)
  372 + L2R_L2LOSS_SVR_DUAL L2-regularized L2-loss support vector regression (dual)
  373 + L2R_L1LOSS_SVR_DUAL L2-regularized L1-loss support vector regression (dual)
338 374
339 375 C is the cost of constraints violation.
  376 + p is the sensitiveness of loss of support vector regression.
340 377 eps is the stopping criterion.
341 378
342 379 nr_weight, weight_label, and weight are used to change the penalty
@@ -368,7 +405,8 @@ Library Usage
368 405
369 406 param describes the parameters used to obtain the model.
370 407
371   - nr_class and nr_feature are the number of classes and features, respectively.
  408 + nr_class and nr_feature are the number of classes and features,
  409 + respectively. nr_class = 2 for regression.
372 410
373 411 The nr_feature*nr_class array w gives feature weights. We use one
374 412 against the rest for multi-class classification, so each feature
@@ -386,7 +424,7 @@ Library Usage
386 424
387 425 The array label stores class labels.
388 426
389   -- Function: void cross_validation(const problem *prob, const parameter *param, int nr_fold, int *target);
  427 +- Function: void cross_validation(const problem *prob, const parameter *param, int nr_fold, double *target);
390 428
391 429 This function conducts cross validation. Data are separated to
392 430 nr_fold folds. Under given parameters, sequentially each fold is
@@ -396,23 +434,25 @@ Library Usage
396 434
397 435 The format of prob is same as that for train().
398 436
399   -- Function: int predict(const model *model_, const feature_node *x);
  437 +- Function: double predict(const model *model_, const feature_node *x);
400 438
401   - This functions classifies a test vector using the given
402   - model. The predicted label is returned.
  439 + For a classification model, the predicted class for x is returned.
  440 + For a regression model, the function value of x calculated using
  441 + the model is returned.
403 442
404   -- Function: int predict_values(const struct model *model_,
  443 +- Function: double predict_values(const struct model *model_,
405 444 const struct feature_node *x, double* dec_values);
406 445
407   - This function gives nr_w decision values in the array
408   - dec_values. nr_w is 1 if there are two classes except multi-class
409   - svm by Crammer and Singer (-s 4), and is the number of classes otherwise.
  446 + This function gives nr_w decision values in the array dec_values.
  447 + nr_w=1 if regression is applied or the number of classes is two. An exception is
  448 + multi-class svm by Crammer and Singer (-s 4), where nr_w = 2 if there are two classes. For all other situations, nr_w is the
  449 + number of classes.
410 450
411   - We implement one-vs-the rest multi-class strategy (-s 0,1,2,3) and
412   - multi-class svm by Crammer and Singer (-s 4) for multi-class SVM.
  451 + We implement one-vs-the rest multi-class strategy (-s 0,1,2,3,5,6,7)
  452 + and multi-class svm by Crammer and Singer (-s 4) for multi-class SVM.
413 453 The class with the highest decision value is returned.
414 454
415   -- Function: int predict_probability(const struct model *model_,
  455 +- Function: double predict_probability(const struct model *model_,
416 456 const struct feature_node *x, double* prob_estimates);
417 457
418 458 This function gives nr_class probability estimates in the array
@@ -428,10 +468,12 @@ Library Usage
428 468 - Function: int get_nr_class(const model *model_);
429 469
430 470 The function gives the number of classes of the model.
  471 + For a regression model, 2 is returned.
431 472
432 473 - Function: void get_labels(const model *model_, int* label);
433 474
434 475 This function outputs the name of labels into an array called label.
  476 + For a regression model, label is unchanged.
435 477
436 478 - Function: const char *check_parameter(const struct problem *prob,
437 479 const struct parameter *param);
2  pom.xml
@@ -4,7 +4,7 @@
4 4 <artifactId>liblinear</artifactId>
5 5 <packaging>jar</packaging>
6 6 <name>liblinear</name>
7   - <version>1.9-SNAPSHOT</version>
  7 + <version>1.91-SNAPSHOT</version>
8 8 <description>Java port of Liblinear</description>
9 9 <url>http://www.bwaldvogel.de/liblinear-java/</url>
10 10
47 src/main/java/de/bwaldvogel/liblinear/L2R_L2_SvcFunction.java
@@ -2,49 +2,40 @@
2 2
3 3 class L2R_L2_SvcFunction implements Function {
4 4
5   - private final Problem prob;
6   - private final double[] C;
7   - private final int[] I;
8   - private final double[] z;
  5 + protected final Problem prob;
  6 + protected final double[] C;
  7 + protected final int[] I;
  8 + protected final double[] z;
9 9
10   - private int sizeI;
  10 + protected int sizeI;
11 11
12   - public L2R_L2_SvcFunction( Problem prob, double Cp, double Cn ) {
13   - int i;
  12 + public L2R_L2_SvcFunction( Problem prob, double[] C ) {
14 13 int l = prob.l;
15   - int[] y = prob.y;
16 14
17 15 this.prob = prob;
18 16
19 17 z = new double[l];
20   - C = new double[l];
21 18 I = new int[l];
22   -
23   - for (i = 0; i < l; i++) {
24   - if (y[i] == 1)
25   - C[i] = Cp;
26   - else
27   - C[i] = Cn;
28   - }
  19 + this.C = C;
29 20 }
30 21
31 22 public double fun(double[] w) {
32 23 int i;
33 24 double f = 0;
34   - int[] y = prob.y;
  25 + double[] y = prob.y;
35 26 int l = prob.l;
36 27 int w_size = get_nr_variable();
37 28
38 29 Xv(w, z);
  30 +
  31 + for (i = 0; i < w_size; i++)
  32 + f += w[i] * w[i];
  33 + f /= 2.0;
39 34 for (i = 0; i < l; i++) {
40 35 z[i] = y[i] * z[i];
41 36 double d = 1 - z[i];
42 37 if (d > 0) f += C[i] * d * d;
43 38 }
44   - f = 2 * f;
45   - for (i = 0; i < w_size; i++)
46   - f += w[i] * w[i];
47   - f /= 2.0;
48 39
49 40 return (f);
50 41 }
@@ -54,13 +45,12 @@ public int get_nr_variable() {
54 45 }
55 46
56 47 public void grad(double[] w, double[] g) {
57   - int i;
58   - int[] y = prob.y;
  48 + double[] y = prob.y;
59 49 int l = prob.l;
60 50 int w_size = get_nr_variable();
61 51
62 52 sizeI = 0;
63   - for (i = 0; i < l; i++) {
  53 + for (int i = 0; i < l; i++) {
64 54 if (z[i] < 1) {
65 55 z[sizeI] = C[i] * y[i] * (z[i] - 1);
66 56 I[sizeI] = i;
@@ -69,15 +59,14 @@ public void grad(double[] w, double[] g) {
69 59 }
70 60 subXTv(z, g);
71 61
72   - for (i = 0; i < w_size; i++)
  62 + for (int i = 0; i < w_size; i++)
73 63 g[i] = w[i] + 2 * g[i];
74 64 }
75 65
76 66 public void Hv(double[] s, double[] Hs) {
77 67 int i;
78   - int l = prob.l;
79 68 int w_size = get_nr_variable();
80   - double[] wa = new double[l];
  69 + double[] wa = new double[sizeI];
81 70
82 71 subXv(s, wa);
83 72 for (i = 0; i < sizeI; i++)
@@ -88,7 +77,7 @@ public void Hv(double[] s, double[] Hs) {
88 77 Hs[i] = s[i] + 2 * Hs[i];
89 78 }
90 79
91   - private void subXTv(double[] v, double[] XTv) {
  80 + protected void subXTv(double[] v, double[] XTv) {
92 81 int i;
93 82 int w_size = get_nr_variable();
94 83
@@ -112,7 +101,7 @@ private void subXv(double[] v, double[] Xv) {
112 101 }
113 102 }
114 103
115   - private void Xv(double[] v, double[] Xv) {
  104 + protected void Xv(double[] v, double[] Xv) {
116 105
117 106 for (int i = 0; i < prob.l; i++) {
118 107 Xv[i] = 0;
67 src/main/java/de/bwaldvogel/liblinear/L2R_L2_SvrFunction.java
... ... @@ -0,0 +1,67 @@
  1 +package de.bwaldvogel.liblinear;
  2 +
  3 +/**
  4 + * @since 1.91
  5 + */
  6 +public class L2R_L2_SvrFunction extends L2R_L2_SvcFunction {
  7 +
  8 + private double p;
  9 +
  10 + public L2R_L2_SvrFunction( Problem prob, double[] C, double p ) {
  11 + super(prob, C);
  12 + this.p = p;
  13 + }
  14 +
  15 + @Override
  16 + public double fun(double[] w) {
  17 + double f = 0;
  18 + double[] y = prob.y;
  19 + int l = prob.l;
  20 + int w_size = get_nr_variable();
  21 + double d;
  22 +
  23 + Xv(w, z);
  24 +
  25 + for (int i = 0; i < w_size; i++)
  26 + f += w[i] * w[i];
  27 + f /= 2;
  28 + for (int i = 0; i < l; i++) {
  29 + d = z[i] - y[i];
  30 + if (d < -p)
  31 + f += C[i] * (d + p) * (d + p);
  32 + else if (d > p) f += C[i] * (d - p) * (d - p);
  33 + }
  34 +
  35 + return f;
  36 + }
  37 +
  38 + @Override
  39 + public void grad(double[] w, double[] g) {
  40 + double[] y = prob.y;
  41 + int l = prob.l;
  42 + int w_size = get_nr_variable();
  43 +
  44 + sizeI = 0;
  45 + for (int i = 0; i < l; i++) {
  46 + double d = z[i] - y[i];
  47 +
  48 + // generate index set I
  49 + if (d < -p) {
  50 + z[sizeI] = C[i] * (d + p);
  51 + I[sizeI] = i;
  52 + sizeI++;
  53 + } else if (d > p) {
  54 + z[sizeI] = C[i] * (d - p);
  55 + I[sizeI] = i;
  56 + sizeI++;
  57 + }
  58 +
  59 + }
  60 + subXTv(z, g);
  61 +
  62 + for (int i = 0; i < w_size; i++)
  63 + g[i] = w[i] + 2 * g[i];
  64 +
  65 + }
  66 +
  67 +}
25 src/main/java/de/bwaldvogel/liblinear/L2R_LrFunction.java
@@ -7,23 +7,14 @@
7 7 private final double[] D;
8 8 private final Problem prob;
9 9
10   - public L2R_LrFunction( Problem prob, double Cp, double Cn ) {
11   - int i;
  10 + public L2R_LrFunction( Problem prob, double[] C ) {
12 11 int l = prob.l;
13   - int[] y = prob.y;
14 12
15 13 this.prob = prob;
16 14
17 15 z = new double[l];
18 16 D = new double[l];
19   - C = new double[l];
20   -
21   - for (i = 0; i < l; i++) {
22   - if (y[i] == 1)
23   - C[i] = Cp;
24   - else
25   - C[i] = Cn;
26   - }
  17 + this.C = C;
27 18 }
28 19
29 20
@@ -56,11 +47,15 @@ private void XTv(double[] v, double[] XTv) {
56 47 public double fun(double[] w) {
57 48 int i;
58 49 double f = 0;
59   - int[] y = prob.y;
  50 + double[] y = prob.y;
60 51 int l = prob.l;
61 52 int w_size = get_nr_variable();
62 53
63 54 Xv(w, z);
  55 +
  56 + for (i = 0; i < w_size; i++)
  57 + f += w[i] * w[i];
  58 + f /= 2.0;
64 59 for (i = 0; i < l; i++) {
65 60 double yz = y[i] * z[i];
66 61 if (yz >= 0)
@@ -68,17 +63,13 @@ public double fun(double[] w) {
68 63 else
69 64 f += C[i] * (-yz + Math.log(1 + Math.exp(yz)));
70 65 }
71   - f = 2.0 * f;
72   - for (i = 0; i < w_size; i++)
73   - f += w[i] * w[i];
74   - f /= 2.0;
75 66
76 67 return (f);
77 68 }
78 69
79 70 public void grad(double[] w, double[] g) {
80 71 int i;
81   - int[] y = prob.y;
  72 + double[] y = prob.y;
82 73 int l = prob.l;
83 74 int w_size = get_nr_variable();
84 75
529 src/main/java/de/bwaldvogel/liblinear/Linear.java
@@ -28,7 +28,7 @@
28 28 *
29 29 * <p><em>The port was done by Benedikt Waldvogel (mail at bwaldvogel.de)</em></p>
30 30 *
31   - * @version 1.9-SNAPSHOT
  31 + * @version 1.91-SNAPSHOT
32 32 */
33 33 public class Linear {
34 34
@@ -48,7 +48,7 @@
48 48 /**
49 49 * @param target predicted classes
50 50 */
51   - public static void crossValidation(Problem prob, Parameter param, int nr_fold, int[] target) {
  51 + public static void crossValidation(Problem prob, Parameter param, int nr_fold, double[] target) {
52 52 int i;
53 53 int[] fold_start = new int[nr_fold + 1];
54 54 int l = prob.l;
@@ -73,7 +73,7 @@ public static void crossValidation(Problem prob, Parameter param, int nr_fold, i
73 73 subprob.n = prob.n;
74 74 subprob.l = l - (end - begin);
75 75 subprob.x = new Feature[subprob.l][];
76   - subprob.y = new int[subprob.l];
  76 + subprob.y = new double[subprob.l];
77 77
78 78 k = 0;
79 79 for (j = 0; j < begin; j++) {
@@ -119,7 +119,7 @@ private static GroupClassesReturn groupClasses(Problem prob, int[] perm) {
119 119 int i;
120 120
121 121 for (i = 0; i < l; i++) {
122   - int this_label = prob.y[i];
  122 + int this_label = (int)prob.y[i];
123 123 int j;
124 124 for (j = 0; j < nr_class; j++) {
125 125 if (this_label == label[j]) {
@@ -314,7 +314,7 @@ static void closeQuietly(Closeable c) {
314 314 } catch (Throwable t) {}
315 315 }
316 316
317   - public static int predict(Model model, Feature[] x) {
  317 + public static double predict(Model model, Feature[] x) {
318 318 double[] dec_values = new double[model.nr_class];
319 319 return predictValues(model, x, dec_values);
320 320 }
@@ -322,7 +322,7 @@ public static int predict(Model model, Feature[] x) {
322 322 /**
323 323 * @throws IllegalArgumentException if model is not probabilistic (see {@link Model#isProbabilityModel()})
324 324 */
325   - public static int predictProbability(Model model, Feature[] x, double[] prob_estimates) throws IllegalArgumentException {
  325 + public static double predictProbability(Model model, Feature[] x, double[] prob_estimates) throws IllegalArgumentException {
326 326 if (!model.isProbabilityModel()) {
327 327 throw new IllegalArgumentException("probability output is only supported for logistic regression");
328 328 }
@@ -333,7 +333,7 @@ public static int predictProbability(Model model, Feature[] x, double[] prob_est
333 333 else
334 334 nr_w = nr_class;
335 335
336   - int label = predictValues(model, x, prob_estimates);
  336 + double label = predictValues(model, x, prob_estimates);
337 337 for (int i = 0; i < nr_w; i++)
338 338 prob_estimates[i] = 1 / (1 + Math.exp(-prob_estimates[i]));
339 339
@@ -351,7 +351,7 @@ public static int predictProbability(Model model, Feature[] x, double[] prob_est
351 351 return label;
352 352 }
353 353
354   - public static int predictValues(Model model, Feature[] x, double[] dec_values) {
  354 + public static double predictValues(Model model, Feature[] x, double[] dec_values) {
355 355 int n;
356 356 if (model.bias >= 0)
357 357 n = model.nr_feature + 1;
@@ -379,9 +379,12 @@ public static int predictValues(Model model, Feature[] x, double[] dec_values) {
379 379 }
380 380 }
381 381
382   - if (model.nr_class == 2)
383   - return (dec_values[0] > 0) ? model.label[0] : model.label[1];
384   - else {
  382 + if (model.nr_class == 2) {
  383 + if (model.solverType.isSupportVectorRegression())
  384 + return dec_values[0];
  385 + else
  386 + return (dec_values[0] > 0) ? model.label[0] : model.label[1];
  387 + } else {
385 388 int dec_max_idx = 0;
386 389 for (int i = 1; i < model.nr_class; i++) {
387 390 if (dec_values[i] > dec_values[dec_max_idx]) dec_max_idx = i;
@@ -390,7 +393,6 @@ public static int predictValues(Model model, Feature[] x, double[] dec_values) {
390 393 }
391 394 }
392 395
393   -
394 396 static void printf(Formatter formatter, String format, Object... args) throws IOException {
395 397 formatter.format(format, args);
396 398 IOException ioException = formatter.ioException();
@@ -416,11 +418,13 @@ public static void saveModel(Writer modelOutput, Model model) throws IOException
416 418 printf(formatter, "solver_type %s\n", model.solverType.name());
417 419 printf(formatter, "nr_class %d\n", model.nr_class);
418 420
419   - printf(formatter, "label");
420   - for (int i = 0; i < model.nr_class; i++) {
421   - printf(formatter, " %d", model.label[i]);
  421 + if (model.label != null) {
  422 + printf(formatter, "label");
  423 + for (int i = 0; i < model.nr_class; i++) {
  424 + printf(formatter, " %d", model.label[i]);
  425 + }
  426 + printf(formatter, "\n");
422 427 }
423   - printf(formatter, "\n");
424 428
425 429 printf(formatter, "nr_feature %d\n", nr_feature);
426 430 printf(formatter, "bias %.16g\n", model.bias);
@@ -471,7 +475,7 @@ private static int GETI(byte[] y, int i) {
471 475 * L1-loss and L2-loss SVM dual problems
472 476 *<pre>
473 477 * min_\alpha 0.5(\alpha^T (Q + D)\alpha) - e^T \alpha,
474   - * s.t. 0 <= alpha_i <= upper_bound_i,
  478 + * s.t. 0 <= \alpha_i <= upper_bound_i,
475 479 *
476 480 * where Qij = yi yj xi^T xj and
477 481 * D is a diagonal matrix
@@ -522,19 +526,28 @@ private static void solve_l2r_l1l2_svc(Problem prob, double[] w, double eps, dou
522 526 upper_bound[2] = Cp;
523 527 }
524 528
525   - for (i = 0; i < w_size; i++)
526   - w[i] = 0;
527 529 for (i = 0; i < l; i++) {
528   - alpha[i] = 0;
529 530 if (prob.y[i] > 0) {
530 531 y[i] = +1;
531 532 } else {
532 533 y[i] = -1;
533 534 }
  535 + }
  536 +
  537 + // Initial alpha can be set here. Note that
  538 + // 0 <= alpha[i] <= upper_bound[GETI(i)]
  539 + for (i = 0; i < l; i++)
  540 + alpha[i] = 0;
  541 +
  542 + for (i = 0; i < w_size; i++)
  543 + w[i] = 0;
  544 + for (i = 0; i < l; i++) {
534 545 QD[i] = diag[GETI(y, i)];
535 546
536 547 for (Feature xi : prob.x[i]) {
537   - QD[i] += xi.getValue() * xi.getValue();
  548 + double val = xi.getValue();
  549 + QD[i] += val * val;
  550 + w[xi.getIndex() - 1] += y[i] * alpha[i] * val;
538 551 }
539 552 index[i] = i;
540 553 }
@@ -635,12 +648,206 @@ private static void solve_l2r_l1l2_svc(Problem prob, double[] w, double eps, dou
635 648 info("nSV = %d" + NL, nSV);
636 649 }
637 650
  651 + // To support weights for instances, use GETI(i) (i)
  652 + private static int GETI_SVR(int i) {
  653 + return 0;
  654 + }
  655 +
  656 + /**
  657 + * A coordinate descent algorithm for
  658 + * L1-loss and L2-loss epsilon-SVR dual problem
  659 + *
  660 + * min_\beta 0.5\beta^T (Q + diag(lambda)) \beta - p \sum_{i=1}^l|\beta_i| + \sum_{i=1}^l yi\beta_i,
  661 + * s.t. -upper_bound_i <= \beta_i <= upper_bound_i,
  662 + *
  663 + * where Qij = xi^T xj and
  664 + * D is a diagonal matrix
  665 + *
  666 + * In L1-SVM case:
  667 + * upper_bound_i = C
  668 + * lambda_i = 0
  669 + * In L2-SVM case:
  670 + * upper_bound_i = INF
  671 + * lambda_i = 1/(2*C)
  672 + *
  673 + * Given:
  674 + * x, y, p, C
  675 + * eps is the stopping tolerance
  676 + *
  677 + * solution will be put in w
  678 + *
  679 + * See Algorithm 4 of Ho and Lin, 2012
  680 + */
  681 + private static void solve_l2r_l1l2_svr(Problem prob, double[] w, Parameter param) {
  682 + int l = prob.l;
  683 + double C = param.C;
  684 + double p = param.p;
  685 + int w_size = prob.n;
  686 + double eps = param.eps;
  687 + int i, s, iter = 0;
  688 + int max_iter = 1000;
  689 + int active_size = l;
  690 + int[] index = new int[l];
  691 +
  692 + double d, G, H;
  693 + double Gmax_old = Double.POSITIVE_INFINITY;
  694 + double Gmax_new, Gnorm1_new;
  695 + double Gnorm1_init = 0; // initialize to 0 to get rid of Eclipse warning/error
  696 + double[] beta = new double[l];
  697 + double[] QD = new double[l];
  698 + double[] y = prob.y;
  699 +
  700 + // L2R_L2LOSS_SVR_DUAL
  701 + double[] lambda = new double[] {0.5 / C};
  702 + double[] upper_bound = new double[] {Double.POSITIVE_INFINITY};
  703 +
  704 + if (param.solverType == SolverType.L2R_L1LOSS_SVR_DUAL) {
  705 + lambda[0] = 0;
  706 + upper_bound[0] = C;
  707 + }
  708 +
  709 + // Initial beta can be set here. Note that
  710 + // -upper_bound <= beta[i] <= upper_bound
  711 + for (i = 0; i < l; i++)
  712 + beta[i] = 0;
  713 +
  714 + for (i = 0; i < w_size; i++)
  715 + w[i] = 0;
  716 + for (i = 0; i < l; i++) {
  717 + QD[i] = 0;
  718 + for (Feature xi : prob.x[i]) {
  719 + double val = xi.getValue();
  720 + QD[i] += val * val;
  721 + w[xi.getIndex() - 1] += beta[i] * val;
  722 + }
  723 +
  724 + index[i] = i;
  725 + }
  726 +
  727 +
  728 + while (iter < max_iter) {
  729 + Gmax_new = 0;
  730 + Gnorm1_new = 0;
  731 +
  732 + for (i = 0; i < active_size; i++) {
  733 + int j = i + random.nextInt(active_size - i);
  734 + swap(index, i, j);
  735 + }
  736 +
  737 + for (s = 0; s < active_size; s++) {
  738 + i = index[s];
  739 + G = -y[i] + lambda[GETI_SVR(i)] * beta[i];
  740 + H = QD[i] + lambda[GETI_SVR(i)];
  741 +
  742 + for (Feature xi : prob.x[i]) {
  743 + int ind = xi.getIndex() - 1;
  744 + double val = xi.getValue();
  745 + G += val * w[ind];
  746 + }
  747 +
  748 + double Gp = G + p;
  749 + double Gn = G - p;
  750 + double violation = 0;
  751 + if (beta[i] == 0) {
  752 + if (Gp < 0)
  753 + violation = -Gp;
  754 + else if (Gn > 0)
  755 + violation = Gn;
  756 + else if (Gp > Gmax_old && Gn < -Gmax_old) {
  757 + active_size--;
  758 + swap(index, s, active_size);
  759 + s--;
  760 + continue;
  761 + }
  762 + } else if (beta[i] >= upper_bound[GETI_SVR(i)]) {
  763 + if (Gp > 0)
  764 + violation = Gp;
  765 + else if (Gp < -Gmax_old) {
  766 + active_size--;
  767 + swap(index, s, active_size);
  768 + s--;
  769 + continue;
  770 + }
  771 + } else if (beta[i] <= -upper_bound[GETI_SVR(i)]) {
  772 + if (Gn < 0)
  773 + violation = -Gn;
  774 + else if (Gn > Gmax_old) {
  775 + active_size--;
  776 + swap(index, s, active_size);
  777 + s--;
  778 + continue;
  779 + }
  780 + } else if (beta[i] > 0)
  781 + violation = Math.abs(Gp);
  782 + else
  783 + violation = Math.abs(Gn);
  784 +
  785 + Gmax_new = Math.max(Gmax_new, violation);
  786 + Gnorm1_new += violation;
  787 +
  788 + // obtain Newton direction d
  789 + if (Gp < H * beta[i])
  790 + d = -Gp / H;
  791 + else if (Gn > H * beta[i])
  792 + d = -Gn / H;
  793 + else
  794 + d = -beta[i];
  795 +
  796 + if (Math.abs(d) < 1.0e-12) continue;
  797 +
  798 + double beta_old = beta[i];
  799 + beta[i] = Math.min(Math.max(beta[i] + d, -upper_bound[GETI_SVR(i)]), upper_bound[GETI_SVR(i)]);
  800 + d = beta[i] - beta_old;
  801 +
  802 + if (d != 0) {
  803 + for (Feature xi : prob.x[i]) {
  804 + w[xi.getIndex() - 1] += d * xi.getValue();
  805 + }
  806 + }
  807 + }
  808 +
  809 + if (iter == 0) Gnorm1_init = Gnorm1_new;
  810 + iter++;
  811 + if (iter % 10 == 0) info(".");
  812 +
  813 + if (Gnorm1_new <= eps * Gnorm1_init) {
  814 + if (active_size == l)
  815 + break;
  816 + else {
  817 + active_size = l;
  818 + info("*");
  819 + Gmax_old = Double.POSITIVE_INFINITY;
  820 + continue;
  821 + }
  822 + }
  823 +
  824 + Gmax_old = Gmax_new;
  825 + }
  826 +
  827 + info("%noptimization finished, #iter = %d%n", iter);
  828 + if (iter >= max_iter) info("%nWARNING: reaching max number of iterations%nUsing -s 11 may be faster%n%n");
  829 +
  830 + // calculate objective value
  831 + double v = 0;
  832 + int nSV = 0;
  833 + for (i = 0; i < w_size; i++)
  834 + v += w[i] * w[i];
  835 + v = 0.5 * v;
  836 + for (i = 0; i < l; i++) {
  837 + v += p * Math.abs(beta[i]) - y[i] * beta[i] + 0.5 * lambda[GETI_SVR(i)] * beta[i] * beta[i];
  838 + if (beta[i] != 0) nSV++;
  839 + }
  840 +
  841 + info("Objective value = %f%n", v);
  842 + info("nSV = %d%n", nSV);
  843 + }
  844 +
638 845 /**
639 846 * A coordinate descent algorithm for
640 847 * the dual of L2-regularized logistic regression problems
641 848 *<pre>
642   - * min_\alpha 0.5(\alpha^T Q \alpha) + \sum \alpha_i log (\alpha_i) + (upper_bound_i - alpha_i) log (upper_bound_i - alpha_i) ,
643   - * s.t. 0 <= alpha_i <= upper_bound_i,
  849 + * min_\alpha 0.5(\alpha^T Q \alpha) + \sum \alpha_i log (\alpha_i) + (upper_bound_i - \alpha_i) log (upper_bound_i - \alpha_i) ,
  850 + * s.t. 0 <= \alpha_i <= upper_bound_i,
644 851 *
645 852 * where Qij = yi yj xi^T xj and
646 853 * upper_bound_i = Cp if y_i = 1
@@ -671,21 +878,30 @@ private static void solve_l2r_lr_dual(Problem prob, double w[], double eps, doub
671 878 double innereps_min = Math.min(1e-8, eps);
672 879 double upper_bound[] = new double[] {Cn, 0, Cp};
673 880
674   - for (i = 0; i < w_size; i++)
675   - w[i] = 0;
676 881 for (i = 0; i < l; i++) {
677 882 if (prob.y[i] > 0) {
678 883 y[i] = +1;
679 884 } else {
680 885 y[i] = -1;
681 886 }
  887 + }
  888 +
  889 + // Initial alpha can be set here. Note that
  890 + // 0 < alpha[i] < upper_bound[GETI(i)]
  891 + // alpha[2*i] + alpha[2*i+1] = upper_bound[GETI(i)]
  892 + for (i = 0; i < l; i++) {
682 893 alpha[2 * i] = Math.min(0.001 * upper_bound[GETI(y, i)], 1e-8);
683 894 alpha[2 * i + 1] = upper_bound[GETI(y, i)] - alpha[2 * i];
  895 + }
684 896
  897 + for (i = 0; i < w_size; i++)
  898 + w[i] = 0;
  899 + for (i = 0; i < l; i++) {
685 900 xTx[i] = 0;
686 901 for (Feature xi : prob.x[i]) {
687   - xTx[i] += (xi.getValue()) * (xi.getValue());
688   - w[xi.getIndex() - 1] += y[i] * alpha[2 * i] * xi.getValue();
  902 + double val = xi.getValue();
  903 + xTx[i] += val * val;
  904 + w[xi.getIndex() - 1] += y[i] * alpha[2 * i] * val;
689 905 }
690 906 index[i] = i;
691 907 }
@@ -819,6 +1035,10 @@ private static void solve_l1r_l2_svc(Problem prob_col, double[] w, double eps, d
819 1035
820 1036 double[] C = new double[] {Cn, 0, Cp};
821 1037
  1038 + // Initial w can be set here.
  1039 + for (j = 0; j < w_size; j++)
  1040 + w[j] = 0;
  1041 +
822 1042 for (j = 0; j < l; j++) {
823 1043 b[j] = 1;
824 1044 if (prob_col.y[j] > 0)
@@ -827,13 +1047,14 @@ private static void solve_l1r_l2_svc(Problem prob_col, double[] w, double eps, d
827 1047 y[j] = -1;
828 1048 }
829 1049 for (j = 0; j < w_size; j++) {
830   - w[j] = 0;
831 1050 index[j] = j;
832 1051 xj_sq[j] = 0;
833 1052 for (Feature xi : prob_col.x[j]) {
834 1053 int ind = xi.getIndex() - 1;
835   - double val = xi.getValue();
836 1054 xi.setValue(xi.getValue() * y[ind]); // x->value stores yi*xij
  1055 + double val = xi.getValue();
  1056 + b[ind] -= w[j] * val;
  1057 +
837 1058 xj_sq[j] += C[GETI(y, ind)] * val * val;
838 1059 }
839 1060 }
@@ -890,9 +1111,9 @@ else if (Gp > Gmax_old / l && Gn < -Gmax_old / l) {
890 1111 Gnorm1_new += violation;
891 1112
892 1113 // obtain Newton direction d
893   - if (Gp <= H * w[j])
  1114 + if (Gp < H * w[j])
894 1115 d = -Gp / H;
895   - else if (Gn >= H * w[j])
  1116 + else if (Gn > H * w[j])
896 1117 d = -Gn / H;
897 1118 else
898 1119 d = -w[j];
@@ -1041,7 +1262,7 @@ private static void solve_l1r_lr(Problem prob_col, double[] w, double eps, doubl
1041 1262 double nu = 1e-12;
1042 1263 double inner_eps = 1;
1043 1264 double sigma = 0.01;
1044   - double w_norm = 0, w_norm_new;
  1265 + double w_norm, w_norm_new;
1045 1266 double z, G, H;
1046 1267 double Gnorm1_init = 0; // eclipse moans this variable might not be initialized
1047 1268 double Gmax_old = Double.POSITIVE_INFINITY;
@@ -1064,27 +1285,40 @@ private static void solve_l1r_lr(Problem prob_col, double[] w, double eps, doubl
1064 1285
1065 1286 double[] C = {Cn, 0, Cp};
1066 1287
  1288 + // Initial w can be set here.
  1289 + for (j = 0; j < w_size; j++)
  1290 + w[j] = 0;
  1291 +
1067 1292 for (j = 0; j < l; j++) {
1068 1293 if (prob_col.y[j] > 0)
1069 1294 y[j] = 1;
1070 1295 else
1071 1296 y[j] = -1;
1072 1297
1073   - // assume initial w is 0
1074   - exp_wTx[j] = 1;
1075   - tau[j] = C[GETI(y, j)] * 0.5;
1076   - D[j] = C[GETI(y, j)] * 0.25;
  1298 + exp_wTx[j] = 0;
1077 1299 }
  1300 +
  1301 + w_norm = 0;
1078 1302 for (j = 0; j < w_size; j++) {
1079   - w[j] = 0;
  1303 + w_norm += Math.abs(w[j]);
1080 1304 wpd[j] = w[j];
1081 1305 index[j] = j;
1082 1306 xjneg_sum[j] = 0;
1083 1307 for (Feature x : prob_col.x[j]) {
1084 1308 int ind = x.getIndex() - 1;
1085   - if (y[ind] == -1) xjneg_sum[j] += C[GETI(y, ind)] * x.getValue();
  1309 + double val = x.getValue();
  1310 + exp_wTx[ind] += w[j] * val;
  1311 + if (y[ind] == -1) {
  1312 + xjneg_sum[j] += C[GETI(y, ind)] * val;
  1313 + }
1086 1314 }
1087 1315 }
  1316 + for (j = 0; j < l; j++) {
  1317 + exp_wTx[j] = Math.exp(exp_wTx[j]);
  1318 + double tau_tmp = 1 / (1 + exp_wTx[j]);
  1319 + tau[j] = C[GETI(y, j)] * tau_tmp;
  1320 + D[j] = C[GETI(y, j)] * exp_wTx[j] * tau_tmp * tau_tmp;
  1321 + }
1088 1322
1089 1323 while (newton_iter < max_newton_iter) {
1090 1324 Gmax_new = 0;
@@ -1183,9 +1417,9 @@ else if (Gp > QP_Gmax_old / l && Gn < -QP_Gmax_old / l) {
1183 1417 QP_Gnorm1_new += violation;
1184 1418
1185 1419 // obtain solution of one-variable problem
1186   - if (Gp <= H * wpd[j])
  1420 + if (Gp < H * wpd[j])
1187 1421 z = -Gp / H;
1188   - else if (Gn >= H * wpd[j])
  1422 + else if (Gn > H * wpd[j])
1189 1423 z = -Gn / H;
1190 1424 else
1191 1425 z = -wpd[j];
@@ -1218,7 +1452,7 @@ else if (Gn >= H * wpd[j])
1218 1452 QP_Gmax_old = QP_Gmax_new;
1219 1453 }
1220 1454
1221   - if (iter >= max_iter) info("WARNING: reaching max number of inner iterations\n");
  1455 + if (iter >= max_iter) info("WARNING: reaching max number of inner iterations%n");
1222 1456
1223 1457 delta = 0;
1224 1458 w_norm_new = 0;
@@ -1321,7 +1555,7 @@ static Problem transpose(Problem prob) {
1321 1555 Problem prob_col = new Problem();
1322 1556 prob_col.l = l;
1323 1557 prob_col.n = n;
1324   - prob_col.y = new int[l];
  1558 + prob_col.y = new double[l];
1325 1559 prob_col.x = new Feature[n][];
1326 1560
1327 1561 for (int i = 0; i < l; i++)
@@ -1368,6 +1602,7 @@ static void swap(IntArrayPointer array, int idxA, int idxB) {
1368 1602 array.set(idxB, temp);
1369 1603 }
1370 1604
  1605 +
1371 1606 /**
1372 1607 * @throws IllegalArgumentException if the feature nodes of prob are not sorted in ascending order
1373 1608 */
@@ -1395,123 +1630,158 @@ public static Model train(Problem prob, Parameter param) {
1395 1630 model.nr_feature = n - 1;
1396 1631 else
1397 1632 model.nr_feature = n;
  1633 +
1398 1634 model.solverType = param.solverType;
1399 1635 model.bias = prob.bias;
1400 1636
1401   - int[] perm = new int[l];
1402   - // group training data of the same class
1403   - GroupClassesReturn rv = groupClasses(prob, perm);
1404   - int nr_class = rv.nr_class;
1405   - int[] label = rv.label;
1406   - int[] start = rv.start;
1407   - int[] count = rv.count;
1408   -
1409   - model.nr_class = nr_class;
1410   - model.label = new int[nr_class];
1411   - for (int i = 0; i < nr_class; i++)
1412   - model.label[i] = label[i];
1413   -
1414   - // calculate weighted C
1415   - double[] weighted_C = new double[nr_class];
1416   - for (int i = 0; i < nr_class; i++) {
1417   - weighted_C[i] = param.C;
1418   - }
  1637 + if (param.solverType == SolverType.L2R_L2LOSS_SVR || //
  1638 + param.solverType == SolverType.L2R_L1LOSS_SVR_DUAL || //
  1639 + param.solverType == SolverType.L2R_L2LOSS_SVR_DUAL) {
  1640 + model.w = new double[w_size];
  1641 + model.nr_class = 2;
  1642 + model.label = null;
1419 1643
1420   - for (int i = 0; i < param.getNumWeights(); i++) {
1421   - int j;
1422   - for (j = 0; j < nr_class; j++)
1423   - if (param.weightLabel[i] == label[j]) break;
1424   - if (j == nr_class) throw new IllegalArgumentException("class label " + param.weightLabel[i] + " specified in weight is not found");
1425   -
1426   - weighted_C[j] *= param.weight[i];
1427   - }
  1644 + checkProblemSize(n, model.nr_class);
1428 1645