Permalink
Browse files

port version 1.91

+ support for linear support vector regression
  • Loading branch information...
1 parent bf0ec6f commit 283f7f3d4e465cc07138aca536732520e2fe9ddf Benedikt Waldvogel committed Aug 23, 2012
View
@@ -29,11 +29,12 @@ The two most important methods that you might be interested in are:
-------------------------------------------------------------------------------
-LIBLINEAR is a simple package for solving large-scale regularized
-linear classification. It currently supports L2-regularized logistic
-regression/L2-loss support vector classification/L1-loss support vector
-classification, and L1-regularized L2-loss support vector classification/
-logistic regression. This document explains the usage of LIBLINEAR.
+LIBLINEAR is a simple package for solving large-scale regularized linear
+classification and regression. It currently supports
+- L2-regularized logistic regression/L2-loss support vector classification/L1-loss support vector classification
+- L1-regularized L2-loss support vector classification/L1-regularized logistic regression
+- L2-regularized L2-loss support vector regression/L1-loss support vector regression.
+This document explains the usage of LIBLINEAR.
To get started, please read the ``Quick Start'' section first.
For developers, please check the ``Library Usage'' section to learn
@@ -59,8 +60,8 @@ When to use LIBLINEAR but not LIBSVM
There are some large data for which with/without nonlinear mappings
gives similar performances. Without using kernels, one can
-efficiently train a much larger set via a linear classifier. These
-data usually have a large number of features. Document classification
+efficiently train a much larger set via linear classification/regression.
+These data usually have a large number of features. Document classification
is an example.
Warning: While generally liblinear is very fast, its default solver
@@ -128,25 +129,34 @@ and mark
Usage: train [options] training_set_file [model_file]
options:
-s type : set type of solver (default 1)
- 0 -- L2-regularized logistic regression (primal)
- 1 -- L2-regularized L2-loss support vector classification (dual)
- 2 -- L2-regularized L2-loss support vector classification (primal)
- 3 -- L2-regularized L1-loss support vector classification (dual)
- 4 -- multi-class support vector classification by Crammer and Singer
- 5 -- L1-regularized L2-loss support vector classification
- 6 -- L1-regularized logistic regression
- 7 -- L2-regularized logistic regression (dual)
+ 0 -- L2-regularized logistic regression (primal)
+ 1 -- L2-regularized L2-loss support vector classification (dual)
+ 2 -- L2-regularized L2-loss support vector classification (primal)
+ 3 -- L2-regularized L1-loss support vector classification (dual)
+ 4 -- multi-class support vector classification by Crammer and Singer
+ 5 -- L1-regularized L2-loss support vector classification
+ 6 -- L1-regularized logistic regression
+ 7 -- L2-regularized logistic regression (dual)
+ 11 -- L2-regularized L2-loss epsilon support vector regression (primal)
+ 12 -- L2-regularized L2-loss epsilon support vector regression (dual)
+ 13 -- L2-regularized L1-loss epsilon support vector regression (dual)
-c cost : set the parameter C (default 1)
+-p epsilon : set the epsilon in loss function of epsilon-SVR (default 0.1)
-e epsilon : set tolerance of termination criterion
-s 0 and 2
|f'(w)|_2 <= eps*min(pos,neg)/l*|f'(w0)|_2,
where f is the primal function and pos/neg are # of
positive/negative data (default 0.01)
+ -s 11
+ |f'(w)|_2 <= eps*|f'(w0)|_2 (default 0.001)
-s 1, 3, 4 and 7
Dual maximal violation <= eps; similar to libsvm (default 0.1)
-s 5 and 6
|f'(w)|_inf <= eps*min(pos,neg)/l*|f'(w0)|_inf,
where f is the primal function (default 0.01)
+ -s 12 and 13\n"
+ |f'(alpha)|_1 <= eps |f'(alpha0)|,
+ where f is the dual function (default 0.1)
-B bias : if bias >= 0, instance x becomes [x; bias]; if < 0, no bias term added (default -1)
-wi weight: weights adjust the parameter C of different classes (see README for details)
-v n: n-fold cross validation mode
@@ -183,23 +193,40 @@ For L1-regularized logistic regression (-s 6), we solve
min_w \sum |w_j| + C \sum log(1 + exp(-y_i w^Tx_i))
+For L2-regularized logistic regression (-s 7), we solve
+
+min_alpha 0.5(alpha^T Q alpha) + \sum alpha_i*log(alpha_i) + \sum (C-alpha_i)*log(C-alpha_i) - a constant
+ s.t. 0 <= alpha_i <= C,
+
where
Q is a matrix with Q_ij = y_i y_j x_i^T x_j.
-For L2-regularized logistic regression (-s 7), we solve
+For L2-regularized L2-loss SVR (-s 11), we solve
-min_alpha 0.5(alpha^T Q alpha) + \sum alpha_i*log(alpha_i) + \sum (C-alpha_i)*log(C-alpha_i) - a constant
- s.t. 0 <= alpha_i <= C,
+min_w w^Tw/2 + C \sum max(0, |y_i-w^Tx_i|-epsilon)^2
+
+For L2-regularized L2-loss SVR dual (-s 12), we solve
+
+min_beta 0.5(beta^T (Q + lambda I/2/C) beta) - y^T beta + \sum |beta_i|
+
+For L2-regularized L1-loss SVR dual (-s 13), we solve
+
+min_beta 0.5(beta^T Q beta) - y^T beta + \sum |beta_i|
+ s.t. -C <= beta_i <= C,
+
+where
+
+Q is a matrix with Q_ij = x_i^T x_j.
If bias >= 0, w becomes [w; w_{n+1}] and x becomes [x; bias].
The primal-dual relationship implies that -s 1 and -s 2 give the same
-model, and -s 0 and -s 7 give the same.
+model, -s 0 and -s 7 give the same, and -s 11 and -s 12 give the same.
-We implement 1-vs-the rest multi-class strategy. In training i
-vs. non_i, their C parameters are (weight from -wi)*C and C,
-respectively. If there are only two classes, we train only one
+We implement 1-vs-the rest multi-class strategy for classification.
+In training i vs. non_i, their C parameters are (weight from -wi)*C
+and C, respectively. If there are only two classes, we train only one
model. Thus weight1*C vs. weight2*C is used. See examples below.
We also implement multi-class SVM by Crammer and Singer (-s 4):
@@ -224,7 +251,10 @@ and C^m_i = C if m = y_i,
Usage: predict [options] test_file model_file output_file
options:
--b probability_estimates: whether to predict probability estimates, 0 or 1 (default 0)
+-b probability_estimates: whether to output probability estimates, 0 or 1 (default 0); currently for logistic regression only
+
+Note that -b is only needed in the prediction phase. This is different
+from the setting of LIBSVM.
Examples
========
@@ -267,8 +297,9 @@ Library Usage
- Function: model* train(const struct problem *prob,
const struct parameter *param);
- This function constructs and returns a linear classification model
- according to the given training data and parameters.
+ This function constructs and returns a linear classification
+ or regression model according to the given training data and
+ parameters.
struct problem describes the problem:
@@ -283,10 +314,10 @@ Library Usage
where `l' is the number of training data. If bias >= 0, we assume
that one additional feature is added to the end of each data
instance. `n' is the number of feature (including the bias feature
- if bias >= 0). `y' is an array containing the target values. And
- `x' is an array of pointers,
- each of which points to a sparse representation (array of feature_node) of one
- training vector.
+ if bias >= 0). `y' is an array containing the target values. (integers
+ in classification, real numbers in regression) And `x' is an array
+ of pointers, each of which points to a sparse representation (array
+ of feature_node) of one training vector.
For example, if we have the following training data:
@@ -311,7 +342,8 @@ Library Usage
[ ] -> (2,0.1) (4,1.4) (5,0.5) (6,1) (-1,?)
[ ] -> (1,-0.1) (2,-0.2) (3,0.1) (4,1.1) (5,0.1) (6,1) (-1,?)
- struct parameter describes the parameters of a linear classification model:
+ struct parameter describes the parameters of a linear classification
+ or regression model:
struct parameter
{
@@ -323,9 +355,10 @@ Library Usage
int nr_weight;
int *weight_label;
double* weight;
+ double p;
};
- solver_type can be one of L2R_LR, L2R_L2LOSS_SVC_DUAL, L2R_L2LOSS_SVC, L2R_L1LOSS_SVC_DUAL, MCSVM_CS, L1R_L2LOSS_SVC, L1R_LR, L2R_LR_DUAL.
+ solver_type can be one of L2R_LR, L2R_L2LOSS_SVC_DUAL, L2R_L2LOSS_SVC, L2R_L1LOSS_SVC_DUAL, MCSVM_CS, L1R_L2LOSS_SVC, L1R_LR, L2R_LR_DUAL, L2R_L2LOSS_SVR, L2R_L2LOSS_SVR_DUAL, L2R_L1LOSS_SVR_DUAL.
L2R_LR L2-regularized logistic regression (primal)
L2R_L2LOSS_SVC_DUAL L2-regularized L2-loss support vector classification (dual)
@@ -335,8 +368,12 @@ Library Usage
L1R_L2LOSS_SVC L1-regularized L2-loss support vector classification
L1R_LR L1-regularized logistic regression
L2R_LR_DUAL L2-regularized logistic regression (dual)
+ L2R_L2LOSS_SVR L2-regularized L2-loss support vector regression (primal)
+ L2R_L2LOSS_SVR_DUAL L2-regularized L2-loss support vector regression (dual)
+ L2R_L1LOSS_SVR_DUAL L2-regularized L1-loss support vector regression (dual)
C is the cost of constraints violation.
+ p is the sensitiveness of loss of support vector regression.
eps is the stopping criterion.
nr_weight, weight_label, and weight are used to change the penalty
@@ -368,7 +405,8 @@ Library Usage
param describes the parameters used to obtain the model.
- nr_class and nr_feature are the number of classes and features, respectively.
+ nr_class and nr_feature are the number of classes and features,
+ respectively. nr_class = 2 for regression.
The nr_feature*nr_class array w gives feature weights. We use one
against the rest for multi-class classification, so each feature
@@ -386,7 +424,7 @@ Library Usage
The array label stores class labels.
-- Function: void cross_validation(const problem *prob, const parameter *param, int nr_fold, int *target);
+- Function: void cross_validation(const problem *prob, const parameter *param, int nr_fold, double *target);
This function conducts cross validation. Data are separated to
nr_fold folds. Under given parameters, sequentially each fold is
@@ -396,23 +434,25 @@ Library Usage
The format of prob is same as that for train().
-- Function: int predict(const model *model_, const feature_node *x);
+- Function: double predict(const model *model_, const feature_node *x);
- This functions classifies a test vector using the given
- model. The predicted label is returned.
+ For a classification model, the predicted class for x is returned.
+ For a regression model, the function value of x calculated using
+ the model is returned.
-- Function: int predict_values(const struct model *model_,
+- Function: double predict_values(const struct model *model_,
const struct feature_node *x, double* dec_values);
- This function gives nr_w decision values in the array
- dec_values. nr_w is 1 if there are two classes except multi-class
- svm by Crammer and Singer (-s 4), and is the number of classes otherwise.
+ This function gives nr_w decision values in the array dec_values.
+ nr_w=1 if regression is applied or the number of classes is two. An exception is
+ multi-class svm by Crammer and Singer (-s 4), where nr_w = 2 if there are two classes. For all other situations, nr_w is the
+ number of classes.
- We implement one-vs-the rest multi-class strategy (-s 0,1,2,3) and
- multi-class svm by Crammer and Singer (-s 4) for multi-class SVM.
+ We implement one-vs-the rest multi-class strategy (-s 0,1,2,3,5,6,7)
+ and multi-class svm by Crammer and Singer (-s 4) for multi-class SVM.
The class with the highest decision value is returned.
-- Function: int predict_probability(const struct model *model_,
+- Function: double predict_probability(const struct model *model_,
const struct feature_node *x, double* prob_estimates);
This function gives nr_class probability estimates in the array
@@ -428,10 +468,12 @@ Library Usage
- Function: int get_nr_class(const model *model_);
The function gives the number of classes of the model.
+ For a regression model, 2 is returned.
- Function: void get_labels(const model *model_, int* label);
This function outputs the name of labels into an array called label.
+ For a regression model, label is unchanged.
- Function: const char *check_parameter(const struct problem *prob,
const struct parameter *param);
View
@@ -4,7 +4,7 @@
<artifactId>liblinear</artifactId>
<packaging>jar</packaging>
<name>liblinear</name>
- <version>1.9-SNAPSHOT</version>
+ <version>1.91-SNAPSHOT</version>
<description>Java port of Liblinear</description>
<url>http://www.bwaldvogel.de/liblinear-java/</url>
@@ -2,49 +2,40 @@
class L2R_L2_SvcFunction implements Function {
- private final Problem prob;
- private final double[] C;
- private final int[] I;
- private final double[] z;
+ protected final Problem prob;
+ protected final double[] C;
+ protected final int[] I;
+ protected final double[] z;
- private int sizeI;
+ protected int sizeI;
- public L2R_L2_SvcFunction( Problem prob, double Cp, double Cn ) {
- int i;
+ public L2R_L2_SvcFunction( Problem prob, double[] C ) {
int l = prob.l;
- int[] y = prob.y;
this.prob = prob;
z = new double[l];
- C = new double[l];
I = new int[l];
-
- for (i = 0; i < l; i++) {
- if (y[i] == 1)
- C[i] = Cp;
- else
- C[i] = Cn;
- }
+ this.C = C;
}
public double fun(double[] w) {
int i;
double f = 0;
- int[] y = prob.y;
+ double[] y = prob.y;
int l = prob.l;
int w_size = get_nr_variable();
Xv(w, z);
+
+ for (i = 0; i < w_size; i++)
+ f += w[i] * w[i];
+ f /= 2.0;
for (i = 0; i < l; i++) {
z[i] = y[i] * z[i];
double d = 1 - z[i];
if (d > 0) f += C[i] * d * d;
}
- f = 2 * f;
- for (i = 0; i < w_size; i++)
- f += w[i] * w[i];
- f /= 2.0;
return (f);
}
@@ -54,13 +45,12 @@ public int get_nr_variable() {
}
public void grad(double[] w, double[] g) {
- int i;
- int[] y = prob.y;
+ double[] y = prob.y;
int l = prob.l;
int w_size = get_nr_variable();
sizeI = 0;
- for (i = 0; i < l; i++) {
+ for (int i = 0; i < l; i++) {
if (z[i] < 1) {
z[sizeI] = C[i] * y[i] * (z[i] - 1);
I[sizeI] = i;
@@ -69,15 +59,14 @@ public void grad(double[] w, double[] g) {
}
subXTv(z, g);
- for (i = 0; i < w_size; i++)
+ for (int i = 0; i < w_size; i++)
g[i] = w[i] + 2 * g[i];
}
public void Hv(double[] s, double[] Hs) {
int i;
- int l = prob.l;
int w_size = get_nr_variable();
- double[] wa = new double[l];
+ double[] wa = new double[sizeI];
subXv(s, wa);
for (i = 0; i < sizeI; i++)
@@ -88,7 +77,7 @@ public void Hv(double[] s, double[] Hs) {
Hs[i] = s[i] + 2 * Hs[i];
}
- private void subXTv(double[] v, double[] XTv) {
+ protected void subXTv(double[] v, double[] XTv) {
int i;
int w_size = get_nr_variable();
@@ -112,7 +101,7 @@ private void subXv(double[] v, double[] Xv) {
}
}
- private void Xv(double[] v, double[] Xv) {
+ protected void Xv(double[] v, double[] Xv) {
for (int i = 0; i < prob.l; i++) {
Xv[i] = 0;
Oops, something went wrong.

0 comments on commit 283f7f3

Please sign in to comment.