// Accord Machine Learning Library // The Accord.NET Framework // http://accord-framework.net // // Copyright © César Souza, 2009-2017 // cesarsouza at gmail.com // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA // // // This code has been based on the LIBLINEAR project;s implementation for the // L2-regularized L2-loss support vector classification (dual) machines. The // original LIBLINEAR license is given below: // // // Copyright (c) 2007-2011 The LIBLINEAR Project. // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // // 3. Neither name of copyright holders nor the names of its contributors // may be used to endorse or promote products derived from this software // without specific prior written permission. // // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // namespace Accord.MachineLearning.VectorMachines.Learning { using Accord.Statistics.Kernels; using System; using System.Collections; using System.Diagnostics; using System.Threading; using Statistics.Models.Regression.Linear; /// /// Different categories of loss functions that can be used to learn /// support vector machines. /// /// public enum Loss { /// /// Hinge-loss function. /// /// L1, /// /// Squared hinge-loss function. /// /// L2 }; /// /// L2-regularized, L1 or L2-loss dual formulation /// Support Vector Machine learning (-s 1 and -s 3). /// /// /// /// /// This class implements a learning algorithm /// specifically crafted for linear machines only. It provides a L2-regularized, L1 /// or L2-loss coordinate descent learning algorithm for optimizing the dual form of /// learning. The code has been based on liblinear's method solve_l2r_l1l2_svc /// method, whose original description is provided below. /// /// /// /// Liblinear's solver -s 1: L2R_L2LOSS_SVC_DUAL and -s 3: /// L2R_L1LOSS_SVC_DUAL. A coordinate descent algorithm for L1-loss and /// L2-loss SVM problems in the dual. /// /// /// /// min_\alpha 0.5(\alpha^T (Q + D)\alpha) - e^T \alpha, /// s.t. 0 <= \alpha_i <= upper_bound_i, /// /// /// /// where Qij = yi yj xi^T xj and /// D is a diagonal matrix /// /// /// In L1-SVM case: /// /// upper_bound_i = Cp if y_i = 1 /// upper_bound_i = Cn if y_i = -1 /// D_ii = 0 /// /// /// In L2-SVM case: /// /// upper_bound_i = INF /// D_ii = 1/(2*Cp) if y_i = 1 /// D_ii = 1/(2*Cn) if y_i = -1 /// /// /// /// Given: x, y, Cp, Cn, and eps as the stopping tolerance /// /// /// See Algorithm 3 of Hsieh et al., ICML 2008. /// /// /// /// The next example shows how to solve a multi-class problem using a one-vs-one SVM /// where the binary machines are learned using the Linear Dual Coordinate Descent algorithm. /// /// /// /// The following example shows how to obtain a /// from a linear . It contains exactly the same data /// used in the documentation page for /// . /// /// /// /// /// /// /// /// public class LinearDualCoordinateDescent : BaseLinearDualCoordinateDescent, ILinearSupportVectorMachineLearning { /// /// Obsolete. /// [Obsolete("Please do not pass parameters in the constructor. Use the default constructor and the Learn method instead.")] public LinearDualCoordinateDescent(ISupportVectorMachine model, double[][] input, int[] output) : base(model, input, output) { } /// /// Initializes a new instance of the class. /// public LinearDualCoordinateDescent() { } /// /// Creates an instance of the model to be learned. Inheritors /// of this abstract class must define this method so new models /// can be created from the training data. /// protected override SupportVectorMachine Create(int inputs, Linear kernel) { return new SupportVectorMachine(inputs) { Kernel = kernel }; } } /// /// L2-regularized, L1 or L2-loss dual formulation /// Support Vector Machine learning (-s 1 and -s 3). /// /// /// /// /// This class implements a learning algorithm /// specifically crafted for linear machines only. It provides a L2-regularized, L1 /// or L2-loss coordinate descent learning algorithm for optimizing the dual form of /// learning. The code has been based on liblinear's method solve_l2r_l1l2_svc /// method, whose original description is provided below. /// /// /// /// Liblinear's solver -s 1: L2R_L2LOSS_SVC_DUAL and -s 3: /// L2R_L1LOSS_SVC_DUAL. A coordinate descent algorithm for L1-loss and /// L2-loss SVM problems in the dual. /// /// /// /// min_\alpha 0.5(\alpha^T (Q + D)\alpha) - e^T \alpha, /// s.t. 0 <= \alpha_i <= upper_bound_i, /// /// /// /// where Qij = yi yj xi^T xj and /// D is a diagonal matrix /// /// /// In L1-SVM case: /// /// upper_bound_i = Cp if y_i = 1 /// upper_bound_i = Cn if y_i = -1 /// D_ii = 0 /// /// /// In L2-SVM case: /// /// upper_bound_i = INF /// D_ii = 1/(2*Cp) if y_i = 1 /// D_ii = 1/(2*Cn) if y_i = -1 /// /// /// /// Given: x, y, Cp, Cn, and eps as the stopping tolerance /// /// /// See Algorithm 3 of Hsieh et al., ICML 2008. /// /// /// /// The next example shows how to solve a multi-class problem using a one-vs-one SVM /// where the binary machines are learned using the Linear Dual Coordinate Descent algorithm. /// /// /// /// /// /// /// public class LinearDualCoordinateDescent : BaseLinearDualCoordinateDescent, TKernel, double[]> where TKernel : struct, ILinear { /// /// Creates an instance of the model to be learned. Inheritors /// of this abstract class must define this method so new models /// can be created from the training data. /// protected override SupportVectorMachine Create(int inputs, TKernel kernel) { return new SupportVectorMachine(inputs, kernel); } } /// /// L2-regularized, L1 or L2-loss dual formulation /// Support Vector Machine learning (-s 1 and -s 3). /// /// /// /// /// This class implements a learning algorithm /// specifically crafted for linear machines only. It provides a L2-regularized, L1 /// or L2-loss coordinate descent learning algorithm for optimizing the dual form of /// learning. The code has been based on liblinear's method solve_l2r_l1l2_svc /// method, whose original description is provided below. /// /// /// /// Liblinear's solver -s 1: L2R_L2LOSS_SVC_DUAL and -s 3: /// L2R_L1LOSS_SVC_DUAL. A coordinate descent algorithm for L1-loss and /// L2-loss SVM problems in the dual. /// /// /// /// min_\alpha 0.5(\alpha^T (Q + D)\alpha) - e^T \alpha, /// s.t. 0 <= \alpha_i <= upper_bound_i, /// /// /// /// where Qij = yi yj xi^T xj and /// D is a diagonal matrix /// /// /// In L1-SVM case: /// /// upper_bound_i = Cp if y_i = 1 /// upper_bound_i = Cn if y_i = -1 /// D_ii = 0 /// /// /// In L2-SVM case: /// /// upper_bound_i = INF /// D_ii = 1/(2*Cp) if y_i = 1 /// D_ii = 1/(2*Cn) if y_i = -1 /// /// /// /// Given: x, y, Cp, Cn, and eps as the stopping tolerance /// /// /// See Algorithm 3 of Hsieh et al., ICML 2008. /// /// /// /// The next example shows how to solve a multi-class problem using a one-vs-one SVM /// where the binary machines are learned using the Linear Dual Coordinate Descent algorithm. /// /// /// /// /// /// /// public class LinearDualCoordinateDescent : BaseLinearDualCoordinateDescent, TKernel, TInput> where TKernel : struct, ILinear where TInput : IList #if !NETSTANDARD1_4 , ICloneable #endif { /// /// Creates an instance of the model to be learned. Inheritors /// of this abstract class must define this method so new models /// can be created from the training data. /// protected override SupportVectorMachine Create(int inputs, TKernel kernel) { return new SupportVectorMachine(inputs, kernel); } } /// /// Base class for Linear Dual Coordinate Descent. /// public abstract class BaseLinearDualCoordinateDescent : BaseSupportVectorClassification where TModel : SupportVectorMachine where TKernel : struct, ILinear where TInput : IList #if !NETSTANDARD1_4 , ICloneable #endif { int max_iter = 1000; private double eps = 0.1; private double[] alpha; private double[] weights; private double bias; private Loss loss = Loss.L2; /// /// Constructs a new coordinate descent algorithm for L1-loss and L2-loss SVM dual problems. /// /// public BaseLinearDualCoordinateDescent() { } /// /// Gets or sets the cost function that /// should be optimized. Default is /// . /// /// public Loss Loss { get { return loss; } set { loss = value; } } /// /// Gets the value for the Lagrange multipliers /// (alpha) for every observation vector. /// /// public double[] Lagrange { get { return alpha; } } /// /// Convergence tolerance. Default value is 0.1. /// /// /// /// The criterion for completing the model training process. The default is 0.1. /// /// public double Tolerance { get { return this.eps; } set { if (value <= 0) throw new ArgumentOutOfRangeException("value"); this.eps = value; } } /// /// Runs the learning algorithm. /// /// protected override void InnerRun() { TInput[] x = Inputs; int samples = Inputs.Length; TKernel kernel = Kernel; int dimensions = kernel.GetLength(x); this.alpha = new double[samples]; this.weights = new double[dimensions]; double[] w = weights; double[] c = this.C; int[] y = Outputs; var random = Accord.Math.Random.Generator.Random; // Lagrange multipliers Array.Clear(alpha, 0, alpha.Length); // Zero the weight vector bias = 0; int iter = 0; double[] QD = new double[x.Length]; int[] index = new int[x.Length]; int active_size = x.Length; // PG: projected gradient, for shrinking and stopping double PG; double PGmax_old = Double.PositiveInfinity; double PGmin_old = Double.NegativeInfinity; double PGmax_new, PGmin_new; // default solver_type: L2R_L2LOSS_SVC_DUAL // double[] diag = { 0.5 / Cn, 0, 0.5 / Cp }; // double[] upper_bound = { Double.PositiveInfinity, 0, Double.PositiveInfinity }; double[] diag = new double[c.Length]; double[] upper_bound = new double[c.Length]; if (Loss == Loss.L2) { // Default for (int i = 0; i < diag.Length; i++) diag[i] = 0.5 / c[i]; for (int i = 0; i < upper_bound.Length; i++) upper_bound[i] = Double.PositiveInfinity; } else { // diag remains zero, and upper_bound = c; } for (int i = 0; i < x.Length; i++) { QD[i] = 1 + diag[i] + kernel.Function(x[i], x[i]); kernel.Product(y[i] * alpha[i], x[i], accumulate: w); bias += y[i] * alpha[i]; index[i] = i; } while (iter < max_iter) { if (Token.IsCancellationRequested) break; PGmax_new = double.NegativeInfinity; PGmin_new = double.PositiveInfinity; for (int i = 0; i < active_size; i++) { int j = i + random.Next() % (active_size - i); var old = index[i]; index[i] = index[j]; index[j] = old; } for (int s = 0; s < active_size; s++) { int i = index[s]; int yi = y[i]; double G = bias + kernel.Function(w, x[i]); G = G * yi - 1; double C = upper_bound[i]; G += alpha[i] * diag[i]; PG = 0; if (alpha[i] == 0) { if (G > PGmax_old) { active_size--; var old = index[s]; index[s] = index[active_size]; index[active_size] = old; s--; continue; } else if (G < 0) { PG = G; } } else if (alpha[i] == C) { if (G < PGmin_old) { active_size--; var old = index[s]; index[s] = index[active_size]; index[active_size] = old; s--; continue; } else if (G > 0) { PG = G; } } else { PG = G; } PGmax_new = Math.Max(PGmax_new, PG); PGmin_new = Math.Min(PGmin_new, PG); if (Math.Abs(PG) > 1.0e-12) { double alpha_old = alpha[i]; alpha[i] = Math.Min(Math.Max(alpha[i] - G / QD[i], 0.0), C); double d = (alpha[i] - alpha_old) * yi; kernel.Product(d, x[i], accumulate: w); bias += d; } } iter++; if (iter % 10 == 0) Debug.WriteLine("."); if (PGmax_new - PGmin_new <= eps) { if (active_size == x.Length) break; active_size = x.Length; Debug.WriteLine("*"); PGmax_old = Double.PositiveInfinity; PGmin_old = Double.NegativeInfinity; continue; } PGmax_old = PGmax_new; PGmin_old = PGmin_new; if (PGmax_old <= 0) PGmax_old = Double.PositiveInfinity; if (PGmin_old >= 0) PGmin_old = Double.NegativeInfinity; } Debug.WriteLine("optimization finished, #iter = " + iter); if (iter >= max_iter) { Debug.WriteLine("WARNING: reaching max number of iterations"); Debug.WriteLine("Using -s 2 may be faster (also see FAQ)"); } Model.Weights = new double[] { 1.0 }; Model.SupportVectors = new[] { kernel.CreateVector(w) }; Model.Threshold = bias; } /// /// Obsolete. /// protected BaseLinearDualCoordinateDescent(ISupportVectorMachine model, TInput[] input, int[] output) : base(model, input, output) { } } }