In [26]:
#class for function and 1st derivative
import numpy as np

class F(object):
  def __init__(self):
    pass
  
  def func(self, x1, x2): 
    y = 2*x1**2-1.05*x1**4+x1**6/6+x1*x2+x2**2
    return y
  
  def deriv(self, x1, x2): #for GD calculation 
    dx1 = 4*x1-4.2*x1**3+x1**5+x2 
    dx2 = x1+2*x2
    return dx1, dx2
  
    
#class for stochastic GD calculations
class grad_desc(object):
  def __init__(self, function, alpha = 0.00003, n_iter = 10000, tol = 1e-5):
    self.alpha = alpha
    self.n_iter = n_iter
    self.tol = tol
    self.function = function
    
#randomization of picking values for x1 and x2  (stochastic GD specific)
    def x1_shuffled(self,x1):
      x1_shuffled = np.random.shuffle(x1)
      return x1_shuffled
    def x2_shuffled(self,x2):
      x2_shuffled = np.random.shuffle(x2)
      return x2_shuffled
 
  
  
  def grad_desc(self, x1_shuffled, x2_shuffled):
    
    dx1, dx2 = self.function.deriv(x1_shuffled, x2_shuffled)
    

      
    new_x1 = x1_shuffled - self.alpha * dx1
    new_x2 = x2_shuffled - self.alpha * dx2
    
    return new_x1, new_x2, dx1, dx2
  
  def optimise(self, start_x1 = -5, start_x2 = 6):
    all_x1 = [start_x1]
    all_x2 = [start_x2]
    all_y = [self.function.func(start_x1, start_x2)]
    
    
    for i in range(self.n_iter):
      x1_shuffled,x2_shuffled,dx1,dx2 = self.grad_desc(all_x1[i], all_x2[i])
      
      all_x1.append(x1_shuffled)
      all_x2.append(x2_shuffled)
      y = self.function.func(x1_shuffled, x2_shuffled)
      all_y.append(y)
      
      print("i", i, "x1", x1_shuffled,"x2", x2_shuffled,"y", y)
      
    
    return all_x1, all_x2, all_y
#call function
function1 = F()
gd = grad_desc(function = function1)

#output path for stochastic GD
all_x1, all_x2, all_y = gd.optimise()




i 0 x1 -4.92158 x2 5.99979 y 1807.3894183637049
i 1 x1 -4.849564747315373 x2 5.99957766 y 1641.2014382607663
i 2 x1 -4.783063319848055 x2 5.99936317228282 y 1499.149358397817
i 3 x1 -4.721354871663337 x2 5.999146702392078 y 1376.5778724435083
i 4 x1 -4.663848261352155 x2 5.998928394236085 y 1269.9305530686786
i 5 x1 -4.610052950353702 x2 5.998708373980271 y 1176.4474781628135
i 6 x1 -4.55955760044218 x2 5.998486753066343 y 1093.95680292436
i 7 x1 -4.512014052792691 x2 5.998263630589173 y 1020.7280485012983
i 8 x1 -4.467125145566372 x2 5.9980390951929206 y 955.3668880019397
i 9 x1 -4.424635319174249 x2 5.997813226601576 y 896.7384195189098
i 10 x1 -4.384323279024922 x2 5.997586096867555 y 843.9103625342626
i 11 x1 -4.345996199107143 x2 5.997357771400114 y 796.1104248889338
i 12 x1 -4.309485094832469 x2 5.997128309819803 y 752.6939037975035
i 13 x1 -4.274641093908151 x2 5.996897766674058 y 713.1187816915776
i 14 x1 -4.241332404560714 x2 5.996666192040875 y 676.9263813971069
i 15 x1 -4.20

In [27]:
#for comparison of performance with other GD variations   

min_y = min(all_y)

print("min_y:", min(all_y))

index_min_y = all_y.index(min_y)

print("min(x1, x2):",(all_x1[index_min_y], all_x2[index_min_y]))




min_y: 8.291701109169574
min(x1, x2): (-1.9173599429630568, 3.7458226118603988)


Experiments with stepsize: 
0.00001
min_y: 0.005114320240766271
min(x1, x2) is (-0.0037383778076813924, 0.0732124673240082)


0.000001
min_y: 2.0337874730950265
min(x1, x2) is (0.8034441349745729, 0.7370605773345906)

0.0005
min_y: 1.1601150361756083e-08
min(x1, x2) is (-4.6289062888230705e-05, 0.00011175302435649475)

0.0003
min_y: 6.6313599851671905e-06
min(x1, x2) is (-0.0011034152633559045, 0.0026731911611193497)

0.00009
min_y: 0.008415657175000986
min(x1, x2) is (0.00431621630625691, 0.08940091938547981)
