updated

dmeoli · May 30, 2020 · 3ad2093 · 3ad2093
1 parent 2195f35
commit 3ad2093
Show file tree

Hide file tree

Showing 9 changed files with 76 additions and 27 deletions.
diff --git a/notebooks/ml/MachineLearningReport.ipynb b/notebooks/ml/MachineLearningReport.ipynb
@@ -45,9 +45,9 @@
     "\n",
     "The scope of this report excludes the implementation details but is just to show the results over the *Monk's* classification problem and the *Grid Search* results over **ML-CUP19** regression problem. The latter refers to an academic competition within the Machine Learning course for which a **MEE** of **0.75** was achieved using a **Support Vector Regression** with **Laplacian kernel**.\n",
     "\n",
-    "The choice to train a SVM rather than any other models was dictated by my personal fascination about the versatility in the SVM formulation of the core optimization problem in such a differents ways. From the most immediate and simplest i.e, as a *primal formulation* which gives rise to an unconstrained optimization problem, going from more complex and powerful formulation i.e, as a constrained quadratic optimization problem deriving from the *dual* of the primal problem, which allows the kernel trick usage; up to formulations as mathematical artifacts e.g., as a box-constrained quadratic optimization problem deriving from the *Lagrangian bi-dual relaxation* of the equality constraints of the Wolfe dual formulation or as an unconstrained quadratic optimization problem deriving from the Lagrangian bi-dual relaxation of the box-constraints.\n",
+    "The choice to train a SVR rather than any other models was dictated by my personal fascination about the versatility of the SVM formulation in such a differents ways. From the most immediate and simplest i.e, as a *primal formulation* which gives rise to an unconstrained optimization problem, going from more complex and powerful formulation i.e, as a constrained quadratic optimization problem deriving from the *Wolfe dual* of the primal problem; up to formulations as mathematical artifacts e.g., as a box-constrained quadratic optimization problem deriving from the *Lagrangian bi-dual relaxation* of the equality constraints in the Wolfe dual formulation or as an unconstrained quadratic optimization problem deriving from the *Lagrangian bi-dual relaxation* of both equality and box-constraints.\n",
     "\n",
-    "For performance and efficiency reasons, the training phase over ML-CUP19 in the grid search was done with a custom reimplementation of the Platt's *Sequential Minimization Optimization* algorithm which is the best-known way to train a SVM in its dual formulation, since it breaks up the original large QP problem into a series of smallest possible problems, which are then solved analytically."
+    "For performance and efficiency reasons, the training phase over ML-CUP19 in the grid search was done with a custom reimplementation of the Platt's *Sequential Minimization Optimization* algorithm which is the best-known way to train a SVM in its Wolfe dual formulation, since it breaks up the original large QP problem into a series of smallest possible problems, which are then solved analytically."
    ]
   },
   {

diff --git a/notebooks/ml/SupportVectorMachines.ipynb b/notebooks/ml/SupportVectorMachines.ipynb
@@ -119,7 +119,13 @@
     "            \\textrm{subject to} \\quad & y_{i}(\\langle x_i, w \\rangle +b) \\geq 1 \\ \\forall_{i}\n",
     "    \\end{aligned} \\tag{1.7}\n",
     "\\end{equation}\n",
-    "$$"
+    "$$\n",
+    "\n",
+    "which can be equivalently formulated as: \n",
+    "\n",
+    "$$ \\min_{w,b} \\frac{1}{2} \\Vert w \\Vert^2 \\sum_{i=1}^n \\max(0, y_i (\\langle x_i, w \\rangle + b)) $$\n",
+    "\n",
+    "where we make use of the Hinge loss."
    ]
   },
   {
@@ -318,7 +324,7 @@
     "\n",
     "$$\n",
     "\\begin{equation}\n",
-    "    y'=\\displaystyle \\operatorname{sgn}(\\sum_{i=1}^{n}\\alpha_i y_i\\langle x_{i}, x' \\rangle+b) \\tag{3.11}\n",
+    "    y'=\\displaystyle \\operatorname{sgn}(\\sum_{i=1}^{n}\\alpha_i y_i\\langle x_{i}, x' \\rangle+b) \\tag{1.18}\n",
     "\\end{equation}\n",
     "$$"
    ]
@@ -439,7 +445,13 @@
     "            \\textrm{subject to} \\quad & y_{i}(\\langle x_i, w \\rangle +b) \\geq 1 - \\xi_{i} \\ \\forall_{i}\n",
     "    \\end{aligned} \\tag{2.4}\n",
     "\\end{equation}\n",
-    "$$"
+    "$$\n",
+    "\n",
+    "which can be equivalently formulated as: \n",
+    "\n",
+    "$$ \\min_{w,b} \\frac{1}{2} \\Vert w \\Vert^2 + C \\sum_{i=1}^n \\max(0, y_i (\\langle x_i, w \\rangle + b)) $$\n",
+    "\n",
+    "where we make use of the Hinge loss."
    ]
   },
   {
@@ -1102,11 +1114,17 @@
     "$$\n",
     "\\begin{equation}\n",
     "    \\begin{aligned}\n",
-    "        \\min \\quad & \\frac{1}{2}\\Vert w\\Vert^{2} + C \\sum_{i=1}^{n}(\\xi_{i}^{+}+\\xi_{i}^{-}) \\\\\n",
+    "        \\min_w \\quad & \\frac{1}{2}\\Vert w\\Vert^{2} + C \\sum_{i=1}^{n}(\\xi_{i}^{+}+\\xi_{i}^{-}) \\\\\n",
     "            \\textrm{subject to} \\quad & y_{i} - \\langle x_i, w \\rangle - b \\leq \\epsilon + \\xi_{i}^{+} \\ \\forall_{i} \\\\ & \\langle x_i, w \\rangle + b - y_{i} \\leq \\epsilon + \\xi_{i}^{-} \\ \\forall_{i}\n",
     "    \\end{aligned} \\tag{3.3}\n",
     "\\end{equation}\n",
-    "$$"
+    "$$\n",
+    "\n",
+    "which can be equivalently formulated as: \n",
+    "\n",
+    "$$ \\min_ {w, b} \\frac{1}{2} \\Vert w\\Vert^{2} + C \\sum_{i=1}^n \\max(0, |y_i - (\\langle x_i, w \\rangle + b)| - \\epsilon) $$\n",
+    "\n",
+    "where we make use of the epsilon-insensitive loss i.e., errors of less than $\\epsilon$ are ignored."
    ]
   },
   {

diff --git a/optiml/ml/svm/_base.py b/optiml/ml/svm/_base.py
@@ -12,7 +12,7 @@
 from .smo import SMO, SMOClassifier, SMORegression
 from ...opti import Optimizer
 from ...opti import Quadratic
-from ...opti.qp import LagrangianConstrainedQuadratic
+from ...opti.qp import LagrangianEqualityConstrainedQuadratic, LagrangianConstrainedQuadratic
 from ...opti.qp.bcqp import BoxConstrainedQuadraticOptimizer, LagrangianBoxConstrainedQuadratic
 from ...opti.unconstrained import ProximalBundle
 from ...opti.unconstrained.line_search import LineSearchOptimizer
@@ -362,9 +362,12 @@ def fit(self, X, y):
 
                 if issubclass(self.optimizer, BoxConstrainedQuadraticOptimizer):
 
-                    self.optimizer = self.optimizer(f=self.obj, ub=ub, max_iter=self.max_iter,
-                                                    verbose=self.verbose).minimize()
-                    alphas = self.optimizer.x
+                    self.obj = LagrangianEqualityConstrainedQuadratic(Q, q, A)
+                    self.optimizer = self.optimizer(f=self.obj, ub=np.append(ub, 0), max_iter=self.max_iter,
+                                                    verbose=self.verbose)
+                    self.optimizer.x = np.zeros(self.obj.ndim)
+                    self.optimizer.minimize()
+                    alphas = self.optimizer.x[:-1]
 
                 elif issubclass(self.optimizer, Optimizer):
 

diff --git a/optiml/ml/tests/test_svm.py b/optiml/ml/tests/test_svm.py
@@ -70,8 +70,8 @@ def test_solve_svr_as_qp_with_cvxopt():
 #     X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, train_size=0.75, random_state=1)
 #     svr = DualSVR(kernel=linear, optimizer=InteriorPoint).fit(X_train, y_train)
 #     assert svr.score(X_test, y_test) >= 0.77
-#
-#
+
+
 # def test_solve_svr_as_bcqp_lagrangian_relaxation_with_frank_wolfe():
 #     X, y = load_boston(return_X_y=True)
 #     X_scaled = StandardScaler().fit_transform(X)

diff --git a/optiml/ml/utils.py b/optiml/ml/utils.py
@@ -105,9 +105,9 @@ def plot_svm_hyperplane(svm, X, y):
         plt.xlabel('$X$', fontsize=9)
         plt.ylabel('$y$', fontsize=9)
 
-    kernel = ('' if isinstance(svm, LinearClassifierMixin) or isinstance(svm, LinearModel)
-              else 'using ' + (svm.kernel + ' kernel' if isinstance(svm.kernel, str)
-              else svm.kernel.__class__.__name__ if isinstance(svm.kernel, Kernel) else svm.kernel.__name__))
+    kernel = ('' if isinstance(svm, LinearClassifierMixin) or isinstance(svm, LinearModel) else
+              'using ' + (svm.kernel + ' kernel' if isinstance(svm.kernel, str) else
+                          svm.kernel.__class__.__name__ if isinstance(svm.kernel, Kernel) else svm.kernel.__name__))
     plt.title(f'{"custom" if isinstance(svm, SVM) else "sklearn"} {svm.__class__.__name__} {kernel}', fontsize=9)
 
     # set the legend
@@ -136,14 +136,14 @@ def plot_svm_hyperplane(svm, X, y):
                    ['training data', 'decision boundary', '$\epsilon$-insensitive tube', 'support vectors'],
                    fontsize='7', shadow=True).get_frame().set_facecolor('white')
 
-    # training data
+    # plot training data
     if isinstance(svm, ClassifierMixin):
         plt.plot(X1[:, 0], X1[:, 1], marker='x', markersize=5, color='lightblue', linestyle='none')
         plt.plot(X2[:, 0], X2[:, 1], marker='o', markersize=4, color='darkorange', linestyle='none')
     else:
         plt.plot(X, y, marker='o', markersize=4, color='darkorange', linestyle='none')
 
-    # support vectors
+    # plot support vectors
     if isinstance(svm, ClassifierMixin):
         if isinstance(svm, DualSVC) or isinstance(svm, SKLSVC):
             plt.scatter(X[svm.support_][:, 0], X[svm.support_][:, 1], s=60, color='navy')
@@ -157,6 +157,7 @@ def plot_svm_hyperplane(svm, X, y):
             support_ = np.argwhere((2 * y - 1) * svm.predict(X) <= svm.epsilon).ravel()
             plt.scatter(X[support_], y[support_], s=60, color='navy')
 
+    # plot boundaries
     if isinstance(svm, ClassifierMixin):
         _X1, _X2 = np.meshgrid(np.linspace(X1.min(), X1.max(), 50), np.linspace(X1.min(), X1.max(), 50))
         X = np.array([[x1, x2] for x1, x2 in zip(np.ravel(_X1), np.ravel(_X2))])

diff --git a/optiml/opti/_base.py b/optiml/opti/_base.py
@@ -1,5 +1,8 @@
 import autograd.numpy as np
 from autograd import jacobian, hessian
+from scipy.linalg import ldl
+
+from optiml.opti.utils import ldl_solve
 
 
 class Optimizer:
@@ -124,7 +127,7 @@ def __init__(self, Q, q):
     def x_star(self):
         if not hasattr(self, 'x_opt'):
             try:
-                self.x_opt = np.linalg.solve(self.Q, -self.q)  # complexity O(2n^3/3)
+                self.x_opt = ldl_solve(ldl(self.Q), -self.q)
             except np.linalg.LinAlgError:  # the Hessian matrix is singular
                 self.x_opt = np.full(fill_value=np.nan, shape=self.ndim)
         return self.x_opt

diff --git a/optiml/opti/qp/__init__.py b/optiml/opti/qp/__init__.py
@@ -1,3 +1,3 @@
-__all__ = ['LagrangianDual', 'LagrangianConstrainedQuadratic']
+__all__ = ['LagrangianDual', 'LagrangianEqualityConstrainedQuadratic', 'LagrangianConstrainedQuadratic']
 
-from .lagrangian_dual import LagrangianDual, LagrangianConstrainedQuadratic
+from .lagrangian_dual import LagrangianDual, LagrangianEqualityConstrainedQuadratic, LagrangianConstrainedQuadratic
diff --git a/optiml/opti/qp/lagrangian_dual.py b/optiml/opti/qp/lagrangian_dual.py
@@ -40,10 +40,7 @@ def _print_dual_info(self, opt):
             print(' - pcost: {: 1.4e}'.format(self.f_x), end='')
             print(' - gap: {: 1.4e}'.format(gap), end='')
 
-        try:
-            self.callback()
-        except StopIteration:
-            raise StopIteration
+        self.callback()
 
         if gap <= self.eps:
             self.status = 'optimal'
@@ -87,6 +84,31 @@ def callback(self, args=()):
             self._callback(self, *args, *self.callback_args)
 
 
+class LagrangianEqualityConstrainedQuadratic(Quadratic):
+
+    def __init__(self, Q, q, A):
+        """
+        Construct the lagrangian relaxation of an equality constrained quadratic function defined as:
+
+                                1/2 x^T Q x + q^T x : A x = 0
+
+        By using Lagrange multipliers and seeking the extremum of the Lagrangian, it may be readily
+        shown that the solution to the equality constrained problem is given by the linear system:
+
+                                | Q A^T | |    x   | = | -q |
+                                | A  0  | | lambda |   |  0 |
+
+        where lambda is a set of Lagrange multipliers which come out of the solution alongside x.
+
+        :param A: equality constraints matrix to be relaxed
+        """
+        A = np.atleast_2d(np.asarray(A, dtype=np.float))
+        Q = np.vstack((np.hstack((Q, A.T)),
+                       np.hstack((A, np.zeros((A.shape[0], A.shape[0]))))))
+        q = np.hstack((-q, np.zeros(A.shape[0])))
+        super().__init__(Q, q)
+
+
 class LagrangianConstrainedQuadratic(Quadratic):
 
     def __init__(self, quad, A, ub):
@@ -105,7 +127,7 @@ def __init__(self, quad, A, ub):
         # of Q because it is symmetric but could be not positive definite.
         # This will be used at each iteration to solve the Lagrangian relaxation.
         self.L, self.D, self.P = ldl(self.Q)
-        self.A = np.asarray(A, dtype=np.float)
+        self.A = np.atleast_2d(np.asarray(A, dtype=np.float))
         if any(u < 0 for u in ub):
             raise ValueError('the lower bound must be > 0')
         self.ub = np.asarray(ub, dtype=np.float)

diff --git a/optiml/opti/unconstrained/line_search/newton.py b/optiml/opti/unconstrained/line_search/newton.py
@@ -1,6 +1,8 @@
 import numpy as np
+from scipy.linalg import ldl
 
 from . import LineSearchOptimizer
+from ...utils import ldl_solve
 
 
 class Newton(LineSearchOptimizer):
@@ -167,7 +169,7 @@ def minimize(self):
                 if self.is_verbose():
                     print('\t{: 1.4e}'.format(self.delta), end='')
 
-            d = -np.linalg.inv(self.H_x).dot(self.g_x)  # or np.linalg.solve(self.H_x, self.g_x)
+            d = ldl_solve(ldl(self.H_x), -self.g_x)
 
             phi_p0 = self.g_x.T.dot(d)