Port "Add a new option -R for not regularizing the bias"

Ported from cjlin1/liblinear@f68d25c
bwaldvogel · Oct 19, 2020 · b8a6bd3 · b8a6bd3
1 parent 8f9d403
commit b8a6bd3
Show file tree

Hide file tree

Showing 9 changed files with 257 additions and 94 deletions.
diff --git a/README.md b/README.md
@@ -183,6 +183,8 @@ sparse data, use `-l 0` to keep the sparsity.
             |f'(alpha)|_1 <= eps |f'(alpha0)|,
             where f is the dual function (default 0.1)
     -B bias : if bias >= 0, instance x becomes [x; bias]; if < 0, no bias term added (default -1)
+    -R : not regularize the bias; must with -B 1 to have the bias; DON'T use this unless you know what it is
+    	(for -s 0, 2, 5, 6, 11)
     -wi weight: weights adjust the parameter C of different classes (see README for details)
     -v n: n-fold cross validation mode
     -C : find parameters (C for -s 0, 2 and C, p for -s 11)

diff --git a/src/main/java/de/bwaldvogel/liblinear/L2R_L2_SvcFunction.java b/src/main/java/de/bwaldvogel/liblinear/L2R_L2_SvcFunction.java
@@ -6,17 +6,19 @@ class L2R_L2_SvcFunction implements Function {
     protected final double[] C;
     protected final int[]    I;
     protected final double[] z;
+    protected final boolean  regularize_bias;
 
     protected int            sizeI;
 
-    public L2R_L2_SvcFunction(Problem prob, double[] C) {
+    public L2R_L2_SvcFunction(Problem prob, Parameter param, double[] C) {
         int l = prob.l;
 
         this.prob = prob;
 
         z = new double[l];
         I = new int[l];
         this.C = C;
+        this.regularize_bias = param.regularize_bias;
     }
 
     @Override
@@ -31,6 +33,8 @@ public double fun(double[] w) {
 
         for (i = 0; i < w_size; i++)
             f += w[i] * w[i];
+        if (!regularize_bias)
+            f -= w[w_size - 1] * w[w_size - 1];
         f /= 2.0;
         for (i = 0; i < l; i++) {
             z[i] = y[i] * z[i];
@@ -64,6 +68,8 @@ public void grad(double[] w, double[] g) {
 
         for (int i = 0; i < w_size; i++)
             g[i] = w[i] + 2 * g[i];
+        if (!regularize_bias)
+            g[w_size - 1] -= w[w_size - 1];
     }
 
     @Override
@@ -83,6 +89,8 @@ public void Hv(double[] s, double[] Hs) {
         }
         for (i = 0; i < w_size; i++)
             Hs[i] = s[i] + 2 * Hs[i];
+        if (!regularize_bias)
+            Hs[w_size - 1] -= s[w_size - 1];
     }
 
     protected void subXTv(double[] v, double[] XTv) {
@@ -111,6 +119,8 @@ public void get_diag_preconditioner(double[] M) {
 
         for (int i = 0; i < w_size; i++)
             M[i] = 1;
+        if (!regularize_bias)
+            M[w_size - 1] = 0;
 
         for (int i = 0; i < sizeI; i++) {
             int idx = I[i];

diff --git a/src/main/java/de/bwaldvogel/liblinear/L2R_L2_SvrFunction.java b/src/main/java/de/bwaldvogel/liblinear/L2R_L2_SvrFunction.java
@@ -5,11 +5,11 @@
  */
 public class L2R_L2_SvrFunction extends L2R_L2_SvcFunction {
 
-    private double p;
+    private final double p;
 
-    public L2R_L2_SvrFunction(Problem prob, double[] C, double p) {
-        super(prob, C);
-        this.p = p;
+    public L2R_L2_SvrFunction(Problem prob, Parameter param, double[] C) {
+        super(prob, param, C);
+        this.p = param.p;
     }
 
     @Override
@@ -24,6 +24,8 @@ public double fun(double[] w) {
 
         for (int i = 0; i < w_size; i++)
             f += w[i] * w[i];
+        if (!regularize_bias)
+            f -= w[w_size - 1] * w[w_size - 1];
         f /= 2;
         for (int i = 0; i < l; i++) {
             d = z[i] - y[i];
@@ -61,7 +63,8 @@ public void grad(double[] w, double[] g) {
 
         for (int i = 0; i < w_size; i++)
             g[i] = w[i] + 2 * g[i];
-
+        if (!regularize_bias)
+            g[w_size - 1] -= w[w_size - 1];
     }
 
 }
diff --git a/src/main/java/de/bwaldvogel/liblinear/L2R_LrFunction.java b/src/main/java/de/bwaldvogel/liblinear/L2R_LrFunction.java
@@ -5,16 +5,18 @@ class L2R_LrFunction implements Function {
     private final double[] C;
     private final double[] z;
     private final double[] D;
-    private final Problem  prob;
+    private final Problem prob;
+    private final boolean regularize_bias;
 
-    public L2R_LrFunction(Problem prob, double[] C) {
+    L2R_LrFunction(Problem prob, Parameter parameter, double[] C) {
         int l = prob.l;
 
         this.prob = prob;
 
         z = new double[l];
         D = new double[l];
         this.C = C;
+        this.regularize_bias = parameter.regularize_bias;
     }
 
     private void Xv(double[] v, double[] Xv) {
@@ -50,6 +52,9 @@ public double fun(double[] w) {
 
         for (i = 0; i < w_size; i++)
             f += w[i] * w[i];
+        if (!regularize_bias) {
+            f -= w[w_size - 1] * w[w_size - 1];
+        }
         f /= 2.0;
         for (i = 0; i < l; i++) {
             double yz = y[i] * z[i];
@@ -78,6 +83,8 @@ public void grad(double[] w, double[] g) {
 
         for (i = 0; i < w_size; i++)
             g[i] = w[i] + g[i];
+        if (!regularize_bias)
+            g[w_size - 1] -= w[w_size - 1];
     }
 
     @Override
@@ -99,6 +106,8 @@ public void Hv(double[] s, double[] Hs) {
         }
         for (i = 0; i < w_size; i++)
             Hs[i] = s[i] + Hs[i];
+        if (!regularize_bias)
+            Hs[w_size - 1] -= s[w_size - 1];
     }
 
     @Override
@@ -114,6 +123,8 @@ public void get_diag_preconditioner(double[] M) {
 
         for (int i = 0; i < w_size; i++)
             M[i] = 1;
+        if (!regularize_bias)
+            M[w_size - 1] = 0;
 
         for (int i = 0; i < l; i++) {
             for (Feature xi : x[i]) {