In [None]:
# This notebook demonstrates stochatic gradient descent and minibatch line search methods.
# It was prepared by Vlad Kobzar (vk283@nyu.edu) for the Machine Learning course at NYU's Center for Data Science
# https://davidrosenberg.github.io/ml2017/
# February 4, 2017

# To install matplotlib, use e.g. pip install matplotlib
# To run matplotlib animations in Jupiter, we need the ffmpeg package. It can be installed using, e.g., homebrew. 
#     brew install ffmpeg
# To install homebrew on MAC, run
#     /usr/bin/ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)"

In [83]:
import numpy as np
import sympy as sp
from numpy import linalg

#minibatch extracts a random batch d of b elements from s
def minibatch(s, b):
    indices = np.arange(s.shape[0])
    np.random.shuffle(indices)
    excerpt = indices[0: b]
    d = sample[excerpt,:]
    return d

# data_generating_function returns a point 
# from the linear model y(x) = w_0 +w_1x
def data_generating_function(x, w0, w1):
    return (w0+w1*x)

# gradient_linear_regression evaluates at the point (w_0,w_1) the gradient 
# of the linear regression given by the array data 
def gradient_linear_regression (w0,w1, data):
    w= np.array([w0,w1])
    grad = np.array([0,0])
    n=data.shape[0]
    for i in range(n):
        x_i=np.array([1, data [i,0]])
        y_i=data [i,1]
        grad = grad + (np.dot(w, x_i)-y_i) *x_i
    return (2 / n * grad)

# linear_regression evaluates at the point (w_0,w_1) 
# the linear regression given by the array data  
def linear_regression (w0,w1, data):
    w= np.array([w0,w1])
    r = 0
    n=data.shape[0]
    for i in range(n):
        x_i=np.array([1,data[i,0]])
        y_i=data[i,1]
        r = r +  (np.dot(w, x_i)-y_i)**2
    return ((1 / n) *  r)

# fixed_step_gd returns a list of numpy 2D arrays representing the steps 
# of a gradient descent minimization algorithm with a 1/i step size with
# respect to a linear regression function.  
def fixed_step_gd(data, initial_vector, step_size, 
                              maxstep = 12, precision=0.2):
    x=initial_vector
    steps=[x]
    grad = np.array ([0, 2*precision]) 
    i=0
    while linalg.norm(grad,2)> precision and (i< maxstep):
        grad=-gradient_linear_regression(x[0],x[1], data)
        x = x +step_size/(i+1)*grad
        i+=1
        steps.append(x) 
    return steps

# minibatch_gd returns a list of numpy 2D arrays representing the steps 
# of a minibatch minimization algorithm with
# respect to a linear regression function. 
def minibatch_gd(data, initial_vector, step_size, batchsize,
                              maxstep = 12, precision=0.2):
    x=initial_vector
    steps=[x]
    grad = np.array ([0, 2*precision]) 
    i=0
    while linalg.norm(grad,2)> precision and (i< maxstep):
        batch =minibatch (data, batchsize)
        grad=-gradient_linear_regression(x[0],x[1], batch)
        x = x +step_size/(i+1)*grad
        i+=1
        steps.append(x) 
    return steps 

#plot an animated search
def init():
    global fixed_pts, fixed_sct, pts, sct
    fixed_pts.set_data ([],[])
    fixed_sct.set_data([],[])
    pts.set_data ([],[])
    sct.set_data([],[])
    return pts,sct
                
def animate(i):       
    global fixed_pts, fixed_sct, pts, sct
    fixed_pts.set_data([item[0] for item in fixed_points[0:i]], [item[1] for item in fixed_points[0:i]])
    pts.set_data([item[0] for item in points[0:i]], [item[1] for item in points[0:i]]) 
    fixed_sct.set_data([item[0] for item in fixed_points[0:(i)]],
                       [item[1] for item in fixed_points[0:(i)]])
    sct.set_data([item[0] for item in points[0:(i)]],
                 [item[1] for item in points[0:(i)]])
    return fixed_pts, fixed_sct, pts,sct


In [85]:
# The following parameters are set by the user

# Parameters of the distribution from which 
# the sample is drawn

w0 = 2
w1 = 1
mu=0
sigma=1  

#x-coordinates of the sample from the linear model
xmin = -1.5
xmax = 3
grid_size = 100

#Search parameters
x_init=np.array([0.5,0]) #the initial point
batchsize=10           #batchsize 
step_size = 0.3            #step size
maxstep = 20         #stopping condition maximum number of steps 
precision = 0.2         #gradient l2 norm threshold for the stopping condition


# Number of alternative paths plotted
#alternative_paths= 
#Number of alternative steps plotted
#alternative_steps= 10

import matplotlib.pyplot as plt
from matplotlib import  rc

x = np.linspace(xmin, xmax, grid_size)
y=data_generating_function(x, w0, w1)+np.random.normal(mu, sigma, len(x))
sample=np.dstack((np.array(x.ravel()), np.array(y.ravel())))
sample=sample.reshape(sample.shape[1],sample.shape[2])

fig1 = plt.figure(1)
ax1 = fig1.add_subplot(111)
ax1.plot (x, data_generating_function(x, w0, w1))
ax1.scatter(x, y)
ax1.set_title('Full batch sampled from y = w_0+w_1 x + epsilon, where\n'
           'w_0=%.1f and w_1=%.1f and epsilon is N(%.0f, %.0f)' % (w0, w1, mu, sigma))
ax1.set_xlabel('x ')
ax1.set_ylabel('y ') 
pyplot.figure(name)
fig1.show()

from matplotlib import animation
from IPython.display import HTML
rc('animation', html='html5')

fixed_points= fixed_step_gd(sample, x_init, step_size, 
                              maxstep, precision)
points=minibatch_gd(sample, x_init, step_size, batchsize,
                              maxstep, precision)

#Boundaries and gridsize of the linear regression
w0min = w0-2
w0max = w0+2
w1min = w1-2
w1max = w1+2
hatgrid_size = 20

#calculate the linear regression for the full batch
hatw0 = np.linspace(w0min, w0max, grid_size)
hatw1 = np.linspace(w1min, w1max, grid_size)
w0mesh, w1mesh = np.meshgrid(hatw0, hatw1)
zz=linear_regression(hatw0[None,:],hatw1[:,None] ,sample)

#set up figure and animation

fig2 = plt.figure(2)
ax2 = fig2.add_subplot(111, xlim=(w0min, w0max), ylim=(w1min, w1max))
ax2.contour(w0mesh, w1mesh, zz)
ax2.scatter([w0], [w1], color='blue')
#plt.clabel(cp, inline=True, fontsize=7)

ax2.set_title('Gradient Descent Methods')
ax2.set_xlabel('w_0 ')
ax2.set_ylabel('w_1 ') 

#pts holds the location of the fixed step points
fixed_pts, = ax2.plot([],[], color='red', label='Fixed step size:  %.2f' % step_size)
fixed_sct,  = ax2.plot([], 'ro', markersize=3)
          
#holds the location of the SGD/minbatch points
pts, = ax2.plot([],[], color='black',  label='SGD: batch size %.d' % batchsize)
sct,  = ax2.plot([], 'ko', markersize=3)

ax2.legend([fixed_pts,pts], [fixed_pts.get_label(), pts.get_label()], loc='best', fontsize=7)

anim = animation.FuncAnimation(fig2, animate, init_func=init,
                              frames= max([len(fixed_points),len (points)]), interval=600, blit=False)
HTML(anim.to_html5_video())



NameError: name 'pyplot' is not defined

In [73]:
  # still images
#alternative_paths = []
#for i in range (n):
#        alternative_path[i]=minibatch_gd(sample, x_init, step_size, batchsize,
#                              maxstep = 12, precision=0.2)
    
#    arr.set_offsets([points[0][0],points[0][1]]) 
#    arr.set_UVC(points[1][0]-points[0][0],points[1][1]-points[0][1])
    
#        for j in range (alternative_steps):
#                targets =minibatch (sample, batchsize)
#                grad=gradient_linear_regression(x_old[0], x_old[1], targets)
#                x_alt = x_old -gamma/(i+1) * grad
#                normalization = linalg.norm(x_old-x_alt,2) 
#                x_alt =   4*(x_alt-x_old) / (normalization )
#                alternative_points.append([x_alt[0],x_alt[1]])  

#arrows holds the location of the arrows
#arr = ax.quiver (points[0][0],points[0][1], points[1][0]-points[0][0],points[1][1]-points[0][1],
#                 width=0.003, color='b')   


#    if i>1:
#        arr.set_offsets([points[i-2][0],points[(i-2)][1]]) 
#        for j in (range (alternative_steps)):
#            q1.append(alternative_points[(i-2)*alternative_steps+j][0])
#            q2.append(alternative_points[(i-2)*alternative_steps+j][1])
#            arr.set_UVC(q1,q2)
