Skip to content

Commit

Permalink
updated
Browse files Browse the repository at this point in the history
  • Loading branch information
dmeoli committed May 30, 2020
1 parent 2195f35 commit 3ad2093
Show file tree
Hide file tree
Showing 9 changed files with 76 additions and 27 deletions.
4 changes: 2 additions & 2 deletions notebooks/ml/MachineLearningReport.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,9 @@
"\n",
"The scope of this report excludes the implementation details but is just to show the results over the *Monk's* classification problem and the *Grid Search* results over **ML-CUP19** regression problem. The latter refers to an academic competition within the Machine Learning course for which a **MEE** of **0.75** was achieved using a **Support Vector Regression** with **Laplacian kernel**.\n",
"\n",
"The choice to train a SVM rather than any other models was dictated by my personal fascination about the versatility in the SVM formulation of the core optimization problem in such a differents ways. From the most immediate and simplest i.e, as a *primal formulation* which gives rise to an unconstrained optimization problem, going from more complex and powerful formulation i.e, as a constrained quadratic optimization problem deriving from the *dual* of the primal problem, which allows the kernel trick usage; up to formulations as mathematical artifacts e.g., as a box-constrained quadratic optimization problem deriving from the *Lagrangian bi-dual relaxation* of the equality constraints of the Wolfe dual formulation or as an unconstrained quadratic optimization problem deriving from the Lagrangian bi-dual relaxation of the box-constraints.\n",
"The choice to train a SVR rather than any other models was dictated by my personal fascination about the versatility of the SVM formulation in such a differents ways. From the most immediate and simplest i.e, as a *primal formulation* which gives rise to an unconstrained optimization problem, going from more complex and powerful formulation i.e, as a constrained quadratic optimization problem deriving from the *Wolfe dual* of the primal problem; up to formulations as mathematical artifacts e.g., as a box-constrained quadratic optimization problem deriving from the *Lagrangian bi-dual relaxation* of the equality constraints in the Wolfe dual formulation or as an unconstrained quadratic optimization problem deriving from the *Lagrangian bi-dual relaxation* of both equality and box-constraints.\n",
"\n",
"For performance and efficiency reasons, the training phase over ML-CUP19 in the grid search was done with a custom reimplementation of the Platt's *Sequential Minimization Optimization* algorithm which is the best-known way to train a SVM in its dual formulation, since it breaks up the original large QP problem into a series of smallest possible problems, which are then solved analytically."
"For performance and efficiency reasons, the training phase over ML-CUP19 in the grid search was done with a custom reimplementation of the Platt's *Sequential Minimization Optimization* algorithm which is the best-known way to train a SVM in its Wolfe dual formulation, since it breaks up the original large QP problem into a series of smallest possible problems, which are then solved analytically."
]
},
{
Expand Down
28 changes: 23 additions & 5 deletions notebooks/ml/SupportVectorMachines.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,13 @@
" \\textrm{subject to} \\quad & y_{i}(\\langle x_i, w \\rangle +b) \\geq 1 \\ \\forall_{i}\n",
" \\end{aligned} \\tag{1.7}\n",
"\\end{equation}\n",
"$$"
"$$\n",
"\n",
"which can be equivalently formulated as: \n",
"\n",
"$$ \\min_{w,b} \\frac{1}{2} \\Vert w \\Vert^2 \\sum_{i=1}^n \\max(0, y_i (\\langle x_i, w \\rangle + b)) $$\n",
"\n",
"where we make use of the Hinge loss."
]
},
{
Expand Down Expand Up @@ -318,7 +324,7 @@
"\n",
"$$\n",
"\\begin{equation}\n",
" y'=\\displaystyle \\operatorname{sgn}(\\sum_{i=1}^{n}\\alpha_i y_i\\langle x_{i}, x' \\rangle+b) \\tag{3.11}\n",
" y'=\\displaystyle \\operatorname{sgn}(\\sum_{i=1}^{n}\\alpha_i y_i\\langle x_{i}, x' \\rangle+b) \\tag{1.18}\n",
"\\end{equation}\n",
"$$"
]
Expand Down Expand Up @@ -439,7 +445,13 @@
" \\textrm{subject to} \\quad & y_{i}(\\langle x_i, w \\rangle +b) \\geq 1 - \\xi_{i} \\ \\forall_{i}\n",
" \\end{aligned} \\tag{2.4}\n",
"\\end{equation}\n",
"$$"
"$$\n",
"\n",
"which can be equivalently formulated as: \n",
"\n",
"$$ \\min_{w,b} \\frac{1}{2} \\Vert w \\Vert^2 + C \\sum_{i=1}^n \\max(0, y_i (\\langle x_i, w \\rangle + b)) $$\n",
"\n",
"where we make use of the Hinge loss."
]
},
{
Expand Down Expand Up @@ -1102,11 +1114,17 @@
"$$\n",
"\\begin{equation}\n",
" \\begin{aligned}\n",
" \\min \\quad & \\frac{1}{2}\\Vert w\\Vert^{2} + C \\sum_{i=1}^{n}(\\xi_{i}^{+}+\\xi_{i}^{-}) \\\\\n",
" \\min_w \\quad & \\frac{1}{2}\\Vert w\\Vert^{2} + C \\sum_{i=1}^{n}(\\xi_{i}^{+}+\\xi_{i}^{-}) \\\\\n",
" \\textrm{subject to} \\quad & y_{i} - \\langle x_i, w \\rangle - b \\leq \\epsilon + \\xi_{i}^{+} \\ \\forall_{i} \\\\ & \\langle x_i, w \\rangle + b - y_{i} \\leq \\epsilon + \\xi_{i}^{-} \\ \\forall_{i}\n",
" \\end{aligned} \\tag{3.3}\n",
"\\end{equation}\n",
"$$"
"$$\n",
"\n",
"which can be equivalently formulated as: \n",
"\n",
"$$ \\min_ {w, b} \\frac{1}{2} \\Vert w\\Vert^{2} + C \\sum_{i=1}^n \\max(0, |y_i - (\\langle x_i, w \\rangle + b)| - \\epsilon) $$\n",
"\n",
"where we make use of the epsilon-insensitive loss i.e., errors of less than $\\epsilon$ are ignored."
]
},
{
Expand Down
11 changes: 7 additions & 4 deletions optiml/ml/svm/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from .smo import SMO, SMOClassifier, SMORegression
from ...opti import Optimizer
from ...opti import Quadratic
from ...opti.qp import LagrangianConstrainedQuadratic
from ...opti.qp import LagrangianEqualityConstrainedQuadratic, LagrangianConstrainedQuadratic
from ...opti.qp.bcqp import BoxConstrainedQuadraticOptimizer, LagrangianBoxConstrainedQuadratic
from ...opti.unconstrained import ProximalBundle
from ...opti.unconstrained.line_search import LineSearchOptimizer
Expand Down Expand Up @@ -362,9 +362,12 @@ def fit(self, X, y):

if issubclass(self.optimizer, BoxConstrainedQuadraticOptimizer):

self.optimizer = self.optimizer(f=self.obj, ub=ub, max_iter=self.max_iter,
verbose=self.verbose).minimize()
alphas = self.optimizer.x
self.obj = LagrangianEqualityConstrainedQuadratic(Q, q, A)
self.optimizer = self.optimizer(f=self.obj, ub=np.append(ub, 0), max_iter=self.max_iter,
verbose=self.verbose)
self.optimizer.x = np.zeros(self.obj.ndim)
self.optimizer.minimize()
alphas = self.optimizer.x[:-1]

elif issubclass(self.optimizer, Optimizer):

Expand Down
4 changes: 2 additions & 2 deletions optiml/ml/tests/test_svm.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,8 @@ def test_solve_svr_as_qp_with_cvxopt():
# X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, train_size=0.75, random_state=1)
# svr = DualSVR(kernel=linear, optimizer=InteriorPoint).fit(X_train, y_train)
# assert svr.score(X_test, y_test) >= 0.77
#
#


# def test_solve_svr_as_bcqp_lagrangian_relaxation_with_frank_wolfe():
# X, y = load_boston(return_X_y=True)
# X_scaled = StandardScaler().fit_transform(X)
Expand Down
11 changes: 6 additions & 5 deletions optiml/ml/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,9 +105,9 @@ def plot_svm_hyperplane(svm, X, y):
plt.xlabel('$X$', fontsize=9)
plt.ylabel('$y$', fontsize=9)

kernel = ('' if isinstance(svm, LinearClassifierMixin) or isinstance(svm, LinearModel)
else 'using ' + (svm.kernel + ' kernel' if isinstance(svm.kernel, str)
else svm.kernel.__class__.__name__ if isinstance(svm.kernel, Kernel) else svm.kernel.__name__))
kernel = ('' if isinstance(svm, LinearClassifierMixin) or isinstance(svm, LinearModel) else
'using ' + (svm.kernel + ' kernel' if isinstance(svm.kernel, str) else
svm.kernel.__class__.__name__ if isinstance(svm.kernel, Kernel) else svm.kernel.__name__))
plt.title(f'{"custom" if isinstance(svm, SVM) else "sklearn"} {svm.__class__.__name__} {kernel}', fontsize=9)

# set the legend
Expand Down Expand Up @@ -136,14 +136,14 @@ def plot_svm_hyperplane(svm, X, y):
['training data', 'decision boundary', '$\epsilon$-insensitive tube', 'support vectors'],
fontsize='7', shadow=True).get_frame().set_facecolor('white')

# training data
# plot training data
if isinstance(svm, ClassifierMixin):
plt.plot(X1[:, 0], X1[:, 1], marker='x', markersize=5, color='lightblue', linestyle='none')
plt.plot(X2[:, 0], X2[:, 1], marker='o', markersize=4, color='darkorange', linestyle='none')
else:
plt.plot(X, y, marker='o', markersize=4, color='darkorange', linestyle='none')

# support vectors
# plot support vectors
if isinstance(svm, ClassifierMixin):
if isinstance(svm, DualSVC) or isinstance(svm, SKLSVC):
plt.scatter(X[svm.support_][:, 0], X[svm.support_][:, 1], s=60, color='navy')
Expand All @@ -157,6 +157,7 @@ def plot_svm_hyperplane(svm, X, y):
support_ = np.argwhere((2 * y - 1) * svm.predict(X) <= svm.epsilon).ravel()
plt.scatter(X[support_], y[support_], s=60, color='navy')

# plot boundaries
if isinstance(svm, ClassifierMixin):
_X1, _X2 = np.meshgrid(np.linspace(X1.min(), X1.max(), 50), np.linspace(X1.min(), X1.max(), 50))
X = np.array([[x1, x2] for x1, x2 in zip(np.ravel(_X1), np.ravel(_X2))])
Expand Down
5 changes: 4 additions & 1 deletion optiml/opti/_base.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
import autograd.numpy as np
from autograd import jacobian, hessian
from scipy.linalg import ldl

from optiml.opti.utils import ldl_solve


class Optimizer:
Expand Down Expand Up @@ -124,7 +127,7 @@ def __init__(self, Q, q):
def x_star(self):
if not hasattr(self, 'x_opt'):
try:
self.x_opt = np.linalg.solve(self.Q, -self.q) # complexity O(2n^3/3)
self.x_opt = ldl_solve(ldl(self.Q), -self.q)
except np.linalg.LinAlgError: # the Hessian matrix is singular
self.x_opt = np.full(fill_value=np.nan, shape=self.ndim)
return self.x_opt
Expand Down
4 changes: 2 additions & 2 deletions optiml/opti/qp/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
__all__ = ['LagrangianDual', 'LagrangianConstrainedQuadratic']
__all__ = ['LagrangianDual', 'LagrangianEqualityConstrainedQuadratic', 'LagrangianConstrainedQuadratic']

from .lagrangian_dual import LagrangianDual, LagrangianConstrainedQuadratic
from .lagrangian_dual import LagrangianDual, LagrangianEqualityConstrainedQuadratic, LagrangianConstrainedQuadratic
32 changes: 27 additions & 5 deletions optiml/opti/qp/lagrangian_dual.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,7 @@ def _print_dual_info(self, opt):
print(' - pcost: {: 1.4e}'.format(self.f_x), end='')
print(' - gap: {: 1.4e}'.format(gap), end='')

try:
self.callback()
except StopIteration:
raise StopIteration
self.callback()

if gap <= self.eps:
self.status = 'optimal'
Expand Down Expand Up @@ -87,6 +84,31 @@ def callback(self, args=()):
self._callback(self, *args, *self.callback_args)


class LagrangianEqualityConstrainedQuadratic(Quadratic):

def __init__(self, Q, q, A):
"""
Construct the lagrangian relaxation of an equality constrained quadratic function defined as:
1/2 x^T Q x + q^T x : A x = 0
By using Lagrange multipliers and seeking the extremum of the Lagrangian, it may be readily
shown that the solution to the equality constrained problem is given by the linear system:
| Q A^T | | x | = | -q |
| A 0 | | lambda | | 0 |
where lambda is a set of Lagrange multipliers which come out of the solution alongside x.
:param A: equality constraints matrix to be relaxed
"""
A = np.atleast_2d(np.asarray(A, dtype=np.float))
Q = np.vstack((np.hstack((Q, A.T)),
np.hstack((A, np.zeros((A.shape[0], A.shape[0]))))))
q = np.hstack((-q, np.zeros(A.shape[0])))
super().__init__(Q, q)


class LagrangianConstrainedQuadratic(Quadratic):

def __init__(self, quad, A, ub):
Expand All @@ -105,7 +127,7 @@ def __init__(self, quad, A, ub):
# of Q because it is symmetric but could be not positive definite.
# This will be used at each iteration to solve the Lagrangian relaxation.
self.L, self.D, self.P = ldl(self.Q)
self.A = np.asarray(A, dtype=np.float)
self.A = np.atleast_2d(np.asarray(A, dtype=np.float))
if any(u < 0 for u in ub):
raise ValueError('the lower bound must be > 0')
self.ub = np.asarray(ub, dtype=np.float)
Expand Down
4 changes: 3 additions & 1 deletion optiml/opti/unconstrained/line_search/newton.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import numpy as np
from scipy.linalg import ldl

from . import LineSearchOptimizer
from ...utils import ldl_solve


class Newton(LineSearchOptimizer):
Expand Down Expand Up @@ -167,7 +169,7 @@ def minimize(self):
if self.is_verbose():
print('\t{: 1.4e}'.format(self.delta), end='')

d = -np.linalg.inv(self.H_x).dot(self.g_x) # or np.linalg.solve(self.H_x, self.g_x)
d = ldl_solve(ldl(self.H_x), -self.g_x)

phi_p0 = self.g_x.T.dot(d)

Expand Down

0 comments on commit 3ad2093

Please sign in to comment.