port version 1.91

+ support for linear support vector regression
bwaldvogel · Aug 23, 2012 · 283f7f3 · 283f7f3
1 parent bf0ec6f
commit 283f7f3
Show file tree

Hide file tree

Showing 15 changed files with 832 additions and 295 deletions.
diff --git a/README b/README
@@ -29,11 +29,12 @@ The two most important methods that you might be interested in are:
 
 -------------------------------------------------------------------------------
 
-LIBLINEAR is a simple package for solving large-scale regularized
-linear classification. It currently supports L2-regularized logistic
-regression/L2-loss support vector classification/L1-loss support vector
-classification, and L1-regularized L2-loss support vector classification/
-logistic regression. This document explains the usage of LIBLINEAR.
+LIBLINEAR is a simple package for solving large-scale regularized linear
+classification and regression. It currently supports
+- L2-regularized logistic regression/L2-loss support vector classification/L1-loss support vector classification
+- L1-regularized L2-loss support vector classification/L1-regularized logistic regression
+- L2-regularized L2-loss support vector regression/L1-loss support vector regression.
+This document explains the usage of LIBLINEAR.
 
 To get started, please read the ``Quick Start'' section first.
 For developers, please check the ``Library Usage'' section to learn
@@ -59,8 +60,8 @@ When to use LIBLINEAR but not LIBSVM
 
 There are some large data for which with/without nonlinear mappings
 gives similar performances.  Without using kernels, one can
-efficiently train a much larger set via a linear classifier.  These
-data usually have a large number of features. Document classification
+efficiently train a much larger set via linear classification/regression.
+These data usually have a large number of features. Document classification
 is an example.
 
 Warning: While generally liblinear is very fast, its default solver
@@ -128,25 +129,34 @@ and mark
 Usage: train [options] training_set_file [model_file]
 options:
 -s type : set type of solver (default 1)
-	0 -- L2-regularized logistic regression (primal)
-	1 -- L2-regularized L2-loss support vector classification (dual)
-	2 -- L2-regularized L2-loss support vector classification (primal)
-	3 -- L2-regularized L1-loss support vector classification (dual)
-	4 -- multi-class support vector classification by Crammer and Singer
-	5 -- L1-regularized L2-loss support vector classification
-	6 -- L1-regularized logistic regression
-	7 -- L2-regularized logistic regression (dual)
+	 0 -- L2-regularized logistic regression (primal)
+	 1 -- L2-regularized L2-loss support vector classification (dual)
+	 2 -- L2-regularized L2-loss support vector classification (primal)
+	 3 -- L2-regularized L1-loss support vector classification (dual)
+	 4 -- multi-class support vector classification by Crammer and Singer
+	 5 -- L1-regularized L2-loss support vector classification
+	 6 -- L1-regularized logistic regression
+	 7 -- L2-regularized logistic regression (dual)
+	11 -- L2-regularized L2-loss epsilon support vector regression (primal)
+	12 -- L2-regularized L2-loss epsilon support vector regression (dual)
+	13 -- L2-regularized L1-loss epsilon support vector regression (dual)
 -c cost : set the parameter C (default 1)
+-p epsilon : set the epsilon in loss function of epsilon-SVR (default 0.1)
 -e epsilon : set tolerance of termination criterion
 	-s 0 and 2
 		|f'(w)|_2 <= eps*min(pos,neg)/l*|f'(w0)|_2,
 		where f is the primal function and pos/neg are # of
 		positive/negative data (default 0.01)
+	-s 11
+		|f'(w)|_2 <= eps*|f'(w0)|_2 (default 0.001)
 	-s 1, 3, 4 and 7
 		Dual maximal violation <= eps; similar to libsvm (default 0.1)
 	-s 5 and 6
 		|f'(w)|_inf <= eps*min(pos,neg)/l*|f'(w0)|_inf,
 		where f is the primal function (default 0.01)
+	-s 12 and 13\n"
+		|f'(alpha)|_1 <= eps |f'(alpha0)|,
+		where f is the dual function (default 0.1)
 -B bias : if bias >= 0, instance x becomes [x; bias]; if < 0, no bias term added (default -1)
 -wi weight: weights adjust the parameter C of different classes (see README for details)
 -v n: n-fold cross validation mode
@@ -183,23 +193,40 @@ For L1-regularized logistic regression (-s 6), we solve
 
 min_w \sum |w_j| + C \sum log(1 + exp(-y_i w^Tx_i))
 
+For L2-regularized logistic regression (-s 7), we solve
+
+min_alpha  0.5(alpha^T Q alpha) + \sum alpha_i*log(alpha_i) + \sum (C-alpha_i)*log(C-alpha_i) - a constant
+    s.t.   0 <= alpha_i <= C,
+
 where
 
 Q is a matrix with Q_ij = y_i y_j x_i^T x_j.
 
-For L2-regularized logistic regression (-s 7), we solve
+For L2-regularized L2-loss SVR (-s 11), we solve
 
-min_alpha  0.5(alpha^T Q alpha) + \sum alpha_i*log(alpha_i) + \sum (C-alpha_i)*log(C-alpha_i) - a constant
-    s.t.   0 <= alpha_i <= C,
+min_w w^Tw/2 + C \sum max(0, |y_i-w^Tx_i|-epsilon)^2
+
+For L2-regularized L2-loss SVR dual (-s 12), we solve
+
+min_beta  0.5(beta^T (Q + lambda I/2/C) beta) - y^T beta + \sum |beta_i|
+
+For L2-regularized L1-loss SVR dual (-s 13), we solve
+
+min_beta  0.5(beta^T Q beta) - y^T beta + \sum |beta_i|
+    s.t.   -C <= beta_i <= C,
+
+where
+
+Q is a matrix with Q_ij = x_i^T x_j.
 
 If bias >= 0, w becomes [w; w_{n+1}] and x becomes [x; bias].
 
 The primal-dual relationship implies that -s 1 and -s 2 give the same
-model, and -s 0 and -s 7 give the same.
+model, -s 0 and -s 7 give the same, and -s 11 and -s 12 give the same.
 
-We implement 1-vs-the rest multi-class strategy. In training i
-vs. non_i, their C parameters are (weight from -wi)*C and C,
-respectively. If there are only two classes, we train only one
+We implement 1-vs-the rest multi-class strategy for classification.
+In training i vs. non_i, their C parameters are (weight from -wi)*C
+and C, respectively. If there are only two classes, we train only one
 model. Thus weight1*C vs. weight2*C is used. See examples below.
 
 We also implement multi-class SVM by Crammer and Singer (-s 4):
@@ -224,7 +251,10 @@ and C^m_i = C if m  = y_i,
 
 Usage: predict [options] test_file model_file output_file
 options:
--b probability_estimates: whether to predict probability estimates, 0 or 1 (default 0)
+-b probability_estimates: whether to output probability estimates, 0 or 1 (default 0); currently for logistic regression only
+
+Note that -b is only needed in the prediction phase. This is different
+from the setting of LIBSVM.
 
 Examples
 ========
@@ -267,8 +297,9 @@ Library Usage
 - Function: model* train(const struct problem *prob,
                 const struct parameter *param);
 
-    This function constructs and returns a linear classification model
-    according to the given training data and parameters.
+    This function constructs and returns a linear classification
+    or regression model according to the given training data and
+    parameters.
 
     struct problem describes the problem:
 
@@ -283,10 +314,10 @@ Library Usage
     where `l' is the number of training data. If bias >= 0, we assume
     that one additional feature is added to the end of each data
     instance. `n' is the number of feature (including the bias feature
-    if bias >= 0). `y' is an array containing the target values. And
-    `x' is an array of pointers,
-    each of which points to a sparse representation (array of feature_node) of one
-    training vector.
+    if bias >= 0). `y' is an array containing the target values. (integers
+    in classification, real numbers in regression) And `x' is an array
+    of pointers, each of which points to a sparse representation (array
+    of feature_node) of one training vector.
 
     For example, if we have the following training data:
 
@@ -311,7 +342,8 @@ Library Usage
          [ ] -> (2,0.1) (4,1.4) (5,0.5) (6,1) (-1,?)
          [ ] -> (1,-0.1) (2,-0.2) (3,0.1) (4,1.1) (5,0.1) (6,1) (-1,?)
 
-    struct parameter describes the parameters of a linear classification model:
+    struct parameter describes the parameters of a linear classification
+    or regression model:
 
         struct parameter
         {
@@ -323,9 +355,10 @@ Library Usage
                 int nr_weight;
                 int *weight_label;
                 double* weight;
+                double p;
         };
 
-    solver_type can be one of L2R_LR, L2R_L2LOSS_SVC_DUAL, L2R_L2LOSS_SVC, L2R_L1LOSS_SVC_DUAL, MCSVM_CS, L1R_L2LOSS_SVC, L1R_LR, L2R_LR_DUAL.
+    solver_type can be one of L2R_LR, L2R_L2LOSS_SVC_DUAL, L2R_L2LOSS_SVC, L2R_L1LOSS_SVC_DUAL, MCSVM_CS, L1R_L2LOSS_SVC, L1R_LR, L2R_LR_DUAL, L2R_L2LOSS_SVR, L2R_L2LOSS_SVR_DUAL, L2R_L1LOSS_SVR_DUAL.
 
     L2R_LR                L2-regularized logistic regression (primal)
     L2R_L2LOSS_SVC_DUAL   L2-regularized L2-loss support vector classification (dual)
@@ -335,8 +368,12 @@ Library Usage
     L1R_L2LOSS_SVC        L1-regularized L2-loss support vector classification
     L1R_LR                L1-regularized logistic regression
     L2R_LR_DUAL           L2-regularized logistic regression (dual)
+    L2R_L2LOSS_SVR        L2-regularized L2-loss support vector regression (primal)
+    L2R_L2LOSS_SVR_DUAL   L2-regularized L2-loss support vector regression (dual)
+    L2R_L1LOSS_SVR_DUAL   L2-regularized L1-loss support vector regression (dual)
 
     C is the cost of constraints violation.
+    p is the sensitiveness of loss of support vector regression.
     eps is the stopping criterion.
 
     nr_weight, weight_label, and weight are used to change the penalty
@@ -368,7 +405,8 @@ Library Usage
 
      param describes the parameters used to obtain the model.
 
-     nr_class and nr_feature are the number of classes and features, respectively.
+     nr_class and nr_feature are the number of classes and features,
+     respectively. nr_class = 2 for regression.
 
      The nr_feature*nr_class array w gives feature weights. We use one
      against the rest for multi-class classification, so each feature
@@ -386,7 +424,7 @@ Library Usage
 
      The array label stores class labels.
 
-- Function: void cross_validation(const problem *prob, const parameter *param, int nr_fold, int *target);
+- Function: void cross_validation(const problem *prob, const parameter *param, int nr_fold, double *target);
 
     This function conducts cross validation. Data are separated to
     nr_fold folds. Under given parameters, sequentially each fold is
@@ -396,23 +434,25 @@ Library Usage
 
     The format of prob is same as that for train().
 
-- Function: int predict(const model *model_, const feature_node *x);
+- Function: double predict(const model *model_, const feature_node *x);
 
-    This functions classifies a test vector using the given
-    model. The predicted label is returned.
+    For a classification model, the predicted class for x is returned.
+    For a regression model, the function value of x calculated using
+    the model is returned.
 
-- Function: int predict_values(const struct model *model_,
+- Function: double predict_values(const struct model *model_,
             const struct feature_node *x, double* dec_values);
 
-    This function gives nr_w decision values in the array
-    dec_values. nr_w is 1 if there are two classes except multi-class
-    svm by Crammer and Singer (-s 4), and is the number of classes otherwise.
+    This function gives nr_w decision values in the array dec_values.
+    nr_w=1 if regression is applied or the number of classes is two. An exception is
+    multi-class svm by Crammer and Singer (-s 4), where nr_w = 2 if there are two classes. For all other situations, nr_w is the
+    number of classes.
 
-    We implement one-vs-the rest multi-class strategy (-s 0,1,2,3) and
-    multi-class svm by Crammer and Singer (-s 4) for multi-class SVM.
+    We implement one-vs-the rest multi-class strategy (-s 0,1,2,3,5,6,7)
+    and multi-class svm by Crammer and Singer (-s 4) for multi-class SVM.
     The class with the highest decision value is returned.
 
-- Function: int predict_probability(const struct model *model_,
+- Function: double predict_probability(const struct model *model_,
             const struct feature_node *x, double* prob_estimates);
 
     This function gives nr_class probability estimates in the array
@@ -428,10 +468,12 @@ Library Usage
 - Function: int get_nr_class(const model *model_);
 
     The function gives the number of classes of the model.
+    For a regression model, 2 is returned.
 
 - Function: void get_labels(const model *model_, int* label);
 
     This function outputs the name of labels into an array called label.
+    For a regression model, label is unchanged.
 
 - Function: const char *check_parameter(const struct problem *prob,
             const struct parameter *param);

diff --git a/pom.xml b/pom.xml
@@ -4,7 +4,7 @@
     <artifactId>liblinear</artifactId>
     <packaging>jar</packaging>
     <name>liblinear</name>
-    <version>1.9-SNAPSHOT</version>
+    <version>1.91-SNAPSHOT</version>
     <description>Java port of Liblinear</description>
     <url>http://www.bwaldvogel.de/liblinear-java/</url>
 

diff --git a/src/main/java/de/bwaldvogel/liblinear/L2R_L2_SvcFunction.java b/src/main/java/de/bwaldvogel/liblinear/L2R_L2_SvcFunction.java
@@ -2,49 +2,40 @@
 
 class L2R_L2_SvcFunction implements Function {
 
-    private final Problem  prob;
-    private final double[] C;
-    private final int[]    I;
-    private final double[] z;
+    protected final Problem  prob;
+    protected final double[] C;
+    protected final int[]    I;
+    protected final double[] z;
 
-    private int            sizeI;
+    protected int            sizeI;
 
-    public L2R_L2_SvcFunction( Problem prob, double Cp, double Cn ) {
-        int i;
+    public L2R_L2_SvcFunction( Problem prob, double[] C ) {
         int l = prob.l;
-        int[] y = prob.y;
 
         this.prob = prob;
 
         z = new double[l];
-        C = new double[l];
         I = new int[l];
-
-        for (i = 0; i < l; i++) {
-            if (y[i] == 1)
-                C[i] = Cp;
-            else
-                C[i] = Cn;
-        }
+        this.C = C;
     }
 
     public double fun(double[] w) {
         int i;
         double f = 0;
-        int[] y = prob.y;
+        double[] y = prob.y;
         int l = prob.l;
         int w_size = get_nr_variable();
 
         Xv(w, z);
+
+        for (i = 0; i < w_size; i++)
+            f += w[i] * w[i];
+        f /= 2.0;
         for (i = 0; i < l; i++) {
             z[i] = y[i] * z[i];
             double d = 1 - z[i];
             if (d > 0) f += C[i] * d * d;
         }
-        f = 2 * f;
-        for (i = 0; i < w_size; i++)
-            f += w[i] * w[i];
-        f /= 2.0;
 
         return (f);
     }
@@ -54,13 +45,12 @@ public int get_nr_variable() {
     }
 
     public void grad(double[] w, double[] g) {
-        int i;
-        int[] y = prob.y;
+        double[] y = prob.y;
         int l = prob.l;
         int w_size = get_nr_variable();
 
         sizeI = 0;
-        for (i = 0; i < l; i++) {
+        for (int i = 0; i < l; i++) {
             if (z[i] < 1) {
                 z[sizeI] = C[i] * y[i] * (z[i] - 1);
                 I[sizeI] = i;
@@ -69,15 +59,14 @@ public void grad(double[] w, double[] g) {
         }
         subXTv(z, g);
 
-        for (i = 0; i < w_size; i++)
+        for (int i = 0; i < w_size; i++)
             g[i] = w[i] + 2 * g[i];
     }
 
     public void Hv(double[] s, double[] Hs) {
         int i;
-        int l = prob.l;
         int w_size = get_nr_variable();
-        double[] wa = new double[l];
+        double[] wa = new double[sizeI];
 
         subXv(s, wa);
         for (i = 0; i < sizeI; i++)
@@ -88,7 +77,7 @@ public void Hv(double[] s, double[] Hs) {
             Hs[i] = s[i] + 2 * Hs[i];
     }
 
-    private void subXTv(double[] v, double[] XTv) {
+    protected void subXTv(double[] v, double[] XTv) {
         int i;
         int w_size = get_nr_variable();
 
@@ -112,7 +101,7 @@ private void subXv(double[] v, double[] Xv) {
         }
     }
 
-    private void Xv(double[] v, double[] Xv) {
+    protected void Xv(double[] v, double[] Xv) {
 
         for (int i = 0; i < prob.l; i++) {
             Xv[i] = 0;