In [3]:
import numpy as np
import matplotlib.pyplot as plt
import loadParametersP1
import loadFittingDataP1
from scipy.stats import multivariate_normal
import pdb
import math
import copy

In [4]:
class GD(object):
    
    def __init__(self, x0, objective,
                 gradient=None,
                 step_size=0.1):
        self.x0 = x0
        self.objective = objective
        self.gradient = gradient
        self.step_size = step_size
    
    def compute_gradient(self, x, idx=None, eps=1e-6):
        if self.gradient != None:
            return self.gradient(x)
        grad = np.array([0.0 for i in range(len(x))])
        if idx != None:
            f_x = self.objective(x)
            x[idx] += eps
            f_eps = self.objective(x)
            x[idx] -= eps
            grad[idx] = (f_eps-f_x)/eps
            return grad
        X = copy.copy(x)
        for i in range(len(x)):
            f_x = self.objective(X)
            #print X, X[i]+eps, self.objective(X), eps
            X[i] = X[i] + eps
            #print X, X[i], self.objective(X)
            f_eps = self.objective(X)
            X[i] = X[i] - eps
            grad[i] = (f_eps-f_x)/eps
        return grad
        
    
    def step(self, stochastic=False, gtol=1e-8):
        log = []
        while True:
            grad = self.compute_gradient(self.x0)
            log.append((self.x0, self.objective(self.x0)))
            if np.linalg.norm(grad) < gtol:
                break
            self.x0 = self.x0 - self.step_size * grad
        return log

In [5]:
gaussMean,gaussCov,quadBowlA,quadBowlb = loadParametersP1.getData()

In [6]:
test_gauss = lambda x : -multivariate_normal.pdf(x, gaussMean, gaussCov)
test_gauss_gradient = lambda x : -test_gauss(x)*np.linalg.inv(gaussCov).dot(x-gaussMean)

In [7]:
test_bowl = lambda x : 0.5*x.T.dot(quadBowlA.dot(x)) - x.T.dot(quadBowlb)
test_bowl_gradient = lambda x : quadBowlA.dot(x) - quadBowlb

In [12]:
x0 = np.array([6.0, 16.0])
log = GD(x0, test_gauss, None, 10000).step()

In [13]:
log[-1], len(log)

((array([  9.9651509 ,  10.05227234]), -0.00015915462901127802), 2987)

In [19]:
X,y = loadFittingDataP1.getData()

In [97]:
class SGD(object):
    
    def __init__(self, X, y,
                 step_size=1e-5):
        self.X = X
        self.y = y
        self.step_size = step_size
    
    def compute_objective(self, theta):
        return sum([(theta.dot(X[i])-y[i])**2 for i in range(len(y))])
    
    def compute_numerical_gradient(self, theta, eps=1e-7, idx=None):
        grad = np.zeros(len(theta))
        for i in range(len(theta)):
            if idx != None and i != idx:
                continue
            f = self.compute_objective(theta)
            theta[i] = theta[i] + eps
            f_eps = self.compute_objective(theta)
            theta[i] = theta[i] - eps
            grad[i] = (f_eps-f)/eps
        return grad
        
    def compute_gradient(self, theta, idx=None):
        if idx == None:
            idx = range(len(self.y))
        grad = np.zeros(self.X.shape[1])
        for i in idx:
            grad += 2 * (self.X[i].dot(theta) - self.y[i]) * self.X[i]
        return grad
    
    def step(self, theta, stochastic=False, minibatch_size=1, ftol=1e-7):
        log = []
        idx = None
        prev_objective = self.compute_objective(theta)
        t_0 = 0.01
        t = 0
        k = 0.7
        while True:
            if stochastic:
                idx = np.random.randint(self.X.shape[1], size=minibatch_size)
            grad = self.compute_gradient(theta)
            log.append((theta, prev_objective))
            step_size = math.pow(t_0+t, -k)
            theta = theta - self.step_size * grad
            tmp = self.compute_objective(theta)
            if abs(tmp-prev_objective) < ftol:
                break
            prev_objective = tmp
            t += 1
            print log[-1][1], grad
        return log

In [98]:
optimizer = SGD(X,y)

In [99]:
theta_0 = np.random.random(X.shape[1])
print optimizer.compute_gradient(theta_0) - optimizer.compute_numerical_gradient(theta_0)

[ 0.02228589  0.05137948  0.08752402 -0.08297291  0.12458932  0.07481222
 -0.06388252 -0.04999496  0.01166251  0.02678426]


In [100]:
optimizer.step(np.random.random(X.shape[1]));

20773371.7734 [   -2451.02243875    85364.25453889   577638.18341395  -280364.55387158
   222822.89750581  -280368.6515151     17711.8464542    107290.23670455
  1194392.33732171 -1177688.07827311]
1426781.1278 [ -47464.54265331  143450.7692434    33924.78356994 -230658.32004675
  -43878.44362921 -149890.63536861 -146200.47028245   18951.84979319
   71075.15210149 -240251.68661893]
215771.750315 [ 47146.71496224  37920.70341357  43496.50073172 -44877.47793889
   2548.86543549 -82041.08907787 -53640.80615243   5423.74939752
  22833.89312737 -80480.57171984]
49731.8569043 [  4089.97902502  13714.81106705  12924.58616518 -29349.85232198
   1369.77920248 -34767.24014152 -36227.84827368 -14952.18338966
   7843.73710467 -18754.79461546]
18203.0890452 [ 11560.90023762   3359.19869084   8812.75270338  -7722.10797995
   1947.93953803 -16673.81101444 -18242.7747863   -4839.88712367
   3124.18157031  -7385.01654175]
10916.5041462 [  3154.50277586    557.86467489   3431.63292228  -5251.68050596
  

In [7]:
x = [5, 10]
eps = 1e-6
i=0
print x, x[i]+eps, eps
x[i] = x[i] + eps
print x, x[i]

[5, 10] 5.000001 1e-06
[5.000001, 10] 5.000001


In [14]:
import sys
print (sys.version)

2.7.6 (default, Jun 22 2015, 17:58:13) 
[GCC 4.8.2]


In [42]:
X[:4].shape

(4, 10)