In [1]:
% load_ext autoreload
% autoreload 2
import numpy as np
import sys
sys.path.append('/Users/IzmailovPavel/Documents/Education/GPproject/gplib/')

# Optimization
Testing gplib.optim 
## Deterministic methods

In [2]:
A = np.random.rand(12, 12)
A = A.T.dot(A) + np.eye(12)*0.5
b = np.random.rand(12).reshape((12, 1))
def f(x):
    """for minimization"""
    x = x.reshape((x.size, 1))
    fun = x.T.dot(A.dot(x)) / 2 + b.T.dot(x)
    grad = A.dot(x) + b
    return fun[0, 0], grad.reshape(-1)

def f2(x):
    """for maximization, with hessian"""
    x = x.reshape((x.size, 1))
    fun = -x.T.dot(A.dot(x)) / 2 - b.T.dot(x)
    grad = -A.dot(x) - b
    hess = -A
    return fun[0, 0], grad.reshape(-1), hess

In [3]:
x_0 = np.random.rand(12).reshape(12,1)

### Gradient checking

In [4]:
from gplib.optim.utility import check_gradient

In [5]:
check_gradient(f, x_0)


Difference between calculated and approximated gradients
7.79829121056e-06


### scipy.L-BFGS-B wrapper

In [6]:
from gplib.optim.methods import LBFGS

In [7]:
optimizer = LBFGS(maxiter=20, disp=5)

In [8]:
x, stat = optimizer.minimize(f, x_0)

Iteration 0 :
	x [ 0.0672269   0.25084111  0.85310949  0.94455894  0.31832569]
Iteration 5 :
	x [ 0.00888674 -0.07168481  0.22909253 -0.05835525 -0.30975145]
Iteration 10 :
	x [ 0.08214552 -0.0711999   0.29856264 -0.15992151 -0.42189632]
Iteration 15 :
	x [ 0.08337992 -0.0720693   0.29747764 -0.16006448 -0.42249568]


In [9]:
stat['time']

0.002584695816040039

In [10]:
stat['fun']

-0.2755426265523222

### FGD with armiho step-size rule

In [11]:
from gplib.optim.methods import FGD

In [12]:
optimizer = FGD(maxiter=100, disp=10)

In [13]:
x, stat = optimizer.minimize(f, x_0)

Iteration  0 :
	Gradient projection norm 73.2433491075
	Function value 71.8867684491
Iteration  10 :
	Gradient projection norm 1.12076127815
	Function value -0.109996487424
Iteration  20 :
	Gradient projection norm 0.834438786714
	Function value -0.220581263237
Iteration  30 :
	Gradient projection norm 0.172473479271
	Function value -0.257858065265
Iteration  40 :
	Gradient projection norm 0.112893224102
	Function value -0.268718037258
Iteration  50 :
	Gradient projection norm 0.152922694631
	Function value -0.272786881311
Iteration  60 :
	Gradient projection norm 0.0384195754519
	Function value -0.274485851026
Iteration  70 :
	Gradient projection norm 0.0255323766013
	Function value -0.27510360217
Iteration  80 :
	Gradient projection norm 0.0173577402903
	Function value -0.275357653722
Iteration  90 :
	Gradient projection norm 0.0224832157868
	Function value -0.27546458113


In [14]:
stat['time']

0.015423059463500977

In [15]:
stat['fun']

-0.27550630389258479

### Projected Newton
#### Without Hessian

In [16]:
from gplib.optim.methods import ProjNewton

In [17]:
optimizer = ProjNewton(maxiter=20, disp=5)

In [18]:
x, stat = optimizer.minimize(f, x_0)

Iteration  0 :
	Gradient projection norm 253.722403941
	Function value 71.8867684491
Iteration  5 :
	Gradient projection norm 0.47023128676
	Function value -0.273979961261
Iteration  10 :
	Gradient projection norm 0.0249156073566
	Function value -0.275541680521
Iteration  15 :
	Gradient projection norm 0.000259808616059
	Function value -0.275542629366
Gradient projection reached the stopping criterion


In [19]:
stat['time']

0.024365901947021484

In [20]:
stat['fun']

-0.27554262994756867

#### With Hessian

In [21]:
x, stat = optimizer.maximize(f2, x_0)

Iteration  0 :
	Gradient projection norm 253.722403941
	Function value 71.8867684491
Gradient projection reached the stopping criterion


TypeError: 'NoneType' object is not iterable

In [None]:
stat['fun']

In [None]:
stat['time']

## Stochastic methods

In [None]:
w = np.random.rand(10, 1)
X = np.random.rand(500, 10)
y = X.dot(w) + np.random.normal(scale=0.1, size=(500,1))

In [None]:
w_0 = np.random.rand(10,1)

In [None]:
def f(point, indices=None):
    point = point.reshape(point.size, 1)
    if indices is None:
        indices = np.arange(y.size).tolist()
    fun = np.linalg.norm(y[indices] - X[indices].dot(point))**2
    grad = -2 * (y[indices] - X[indices].dot(point)).T.dot(X[indices])
    return fun, grad.reshape(-1)

In [None]:
check_gradient(f, w_0)

### climin AdaDelta

In [None]:
from gplib.optim.methods import AdaDelta
from climin.util import iter_minibatches

In [None]:
batch_size = 20
batches = (i for i in iter_minibatches([X, y], batch_size, [0, 0]))
def climin_fun(point):
    X_batch, y_batch = next(batches)
    point = point.reshape(point.size, 1)
    fun = np.linalg.norm(y_batch - X_batch.dot(point))**2
    grad = -2 * (y_batch - X_batch.dot(point)).T.dot(X_batch)
    return grad.reshape(-1)

In [None]:
optimizer = AdaDelta(iter_per_epoch=y.size/batch_size, n_epoch=20, disp=5, step_rate=1.)
x, stat = optimizer.minimize(climin_fun, w_0)

In [None]:
stat['time']

In [None]:
f(x)[0]