In [1]:
import numpy as np
import matplotlib.pyplot as plt
import scipy.special

In [2]:
#setup regression

coefs = np.asarray([0.4, 3.0, 2.5, 6.0, -0.8])
#coefs = np.asarray([0.4])
intercept = 1.44

num_points = 100
x_min = -20
x_max = 20
x = np.random.uniform(x_min, x_max, (len(coefs), num_points))

x_aug = np.concatenate((x.T,np.ones(num_points).reshape(num_points,1) ), axis=1)
coefs_aug = np.concatenate((coefs, np.array(intercept).reshape(1))).reshape(len(coefs)+1,1)

e_mean = 0
e_var = 0.5
e = np.random.normal(e_mean, e_var, (num_points,1))

y = x_aug @ coefs_aug + e.reshape(num_points,1)

In [3]:
#solve via normal equation
theta_normal = np.linalg.inv( x_aug.T @ x_aug ) @ (x_aug.T @ y)
print("norm of parameter difference (normal equations):", np.linalg.norm(coefs_aug - theta_normal,2))
print("true coefficients:\n", coefs_aug)
print("coefficients from normal equation\n", theta_normal)

norm of parameter difference (normal equations): 0.07505847417043875
true coefficients:
 [[ 0.4 ]
 [ 3.  ]
 [ 2.5 ]
 [ 6.  ]
 [-0.8 ]
 [ 1.44]]
coefficients from normal equation
 [[ 0.40254343]
 [ 3.00869686]
 [ 2.50316382]
 [ 6.0048633 ]
 [-0.80381125]
 [ 1.36581454]]


In [5]:
#solve via gradient descent
epsilon = 1e-4
delta = 100

alpha = 0.001

theta0 = np.random.uniform(-2,2,len(coefs_aug)).reshape(len(coefs_aug),1)
theta_gd = []
theta_gd.append(theta0)

k = 0
while(delta > epsilon):
    #compute error
    e = x_aug @ theta_gd[k] - y
    
    #compute gradients
    gradients = 2/num_points * x_aug.T @ e
    #print(e)
    #compute next theta
    theta_temp = theta_gd[k] - alpha * gradients
    
    #check converge
    delta = np.linalg.norm(gradients,1)

    #store
    theta_gd.append(theta_gd[k] - alpha * gradients)
    k += 1
    
    if k % 1000 == 0:
        print("iteration: ", k)
    
print("total number of iterations: ", format(k))
theta_hat_gd = theta_gd[k-1]
print("estimated coefficients", theta_hat_gd)
print("true coefficients", coefs_aug)

iteration:  1000
iteration:  2000
iteration:  3000
iteration:  4000
total number of iterations:  4439
estimated coefficients [[ 0.40254308]
 [ 3.0086971 ]
 [ 2.50316403]
 [ 6.00486302]
 [-0.8038116 ]
 [ 1.36586468]]
true coefficients [[ 0.4 ]
 [ 3.  ]
 [ 2.5 ]
 [ 6.  ]
 [-0.8 ]
 [ 1.44]]


In [8]:
#Solve via mini-batch gradient descent

#batch size and convergence criteria
batch_size = int(np.floor(num_points/10))
epsilon = 1e-5
delta0 = 100
delta = []
delta.append(delta0)
max_iters = 3000

#learning rate parameters
alpha = 0.01

#initial parameters and parameter lists
theta0 = np.random.uniform(-2,2,len(coefs_aug)).reshape(len(coefs_aug),1)
theta_mbgd = []
theta_mbgd.append(theta0)

k = 0
while(k < max_iters):
    
    #grab a mini-batch
    batch = np.random.choice(np.arange(0,num_points), batch_size)
    x_batch = x_aug[batch,:]
    y_batch = y[batch]
    
    #compute prediction error
    e = y_batch - x_batch @ theta_mbgd[k]
    
    #compute gradients
    gradients = -2/num_points * x_batch.T @ e
    
    #compute next theta
    theta_mbgd.append(theta_mbgd[k] - alpha * gradients)
    
    #update index
    k += 1
    
print("total number of iterations: ", format(k))
theta_hat_mbGD = theta_mbgd[k]
print("MBGD coefficients:\n", theta_hat_mbGD)
print("GD coefficients:\n", theta_hat_gd)

total number of iterations:  3000
MBGD coefficients:
 [[ 0.39817672]
 [ 3.01175424]
 [ 2.50195921]
 [ 6.00535701]
 [-0.79884783]
 [ 1.3694756 ]]
GD coefficients:
 [[ 0.40254308]
 [ 3.0086971 ]
 [ 2.50316403]
 [ 6.00486302]
 [-0.8038116 ]
 [ 1.36586468]]
