In [10]:
import numpy as np

In [11]:
def f(w1,w2,x):
  '''
  f = funtion we are trying to estimate the parameters
  w1 = bias
  w2 = slope
  x = a point in a plane
  '''
  yhat = w1 + w2 * x
  return yhat

In [12]:
def dx_w1(w1,w2,x,y):
  '''
  dx_w1 = derivative of the loss function with respect to w1
   w1 = bias
  w2 = slope
  x = a point in a plane
  y = target value
  '''
  yhat = f(w1,w2,x)
  gradient = 2 * (yhat - y)
  return gradient


In [13]:
def dx_w2(w1,w2,x,y):
  '''
  dx_w2 = derivative of the loss function with respect to w2
  w1 = bias
  w2 = slope
  x = a point in a plane
  y = target value
  '''
  yhat = f(w1,w2,x)
  gradient = 2 * x *(yhat - y)
  return gradient

In [14]:
def gradient_w1(w1,w2,xs,ys):
  '''
  w1 = bias
  w2 = slope
  xs = input values on plane
  ys = output values on plane
  '''
  N = len(ys)
  total = 0
  for x,y in zip(xs,ys):
    total = total + dx_w1(w1,w2,x,y)
  gradient = total / N
  return gradient

In [15]:
def gradient_w2(w1,w2,xs,ys):
  '''
  w1 = bias
  w2 = slope
  xs = input values on plane
  ys = output values on plane
  '''
  N = len(ys)
  total = 0
  for x,y in zip(xs,ys):
    total = total + dx_w2(w1,w2,x,y)
  gradient = total / N
  return gradient

In [16]:
def gradient_descent(xs,ys,learning_rate = 0.01,max_run_iteration = 1000):
  '''
  xs = list of x values
  ys = list of y values
  learning_rate = learning rate
  max_run_iteration = maximum number of iterations
  '''
  w1 = np.random.uniform(0,1,1)
  w2 = np.random.uniform(0,1,1)
  for i in range(max_run_iteration):
    w1 = w1 - learning_rate * gradient_w1(w1,w2,xs,ys)
    w2 = w2 - learning_rate * gradient_w2(w1,w2,xs,ys)
    if i % 100 == 0:
      print(f"Iteration {i}: w1 = {w1}, w2 = {w2}")
  return w1,w2


In [17]:
# Here we have a simple line with intercept = 0 and slope = 2
xs = [1,2,3,4,5,6,7]
ys = [2,4,6,8,10,12,14]
w1,w2 = gradient_descent(xs,ys)
print(w1,w2)

Iteration 0: w1 = [0.46319422], w2 = [1.27463625]
Iteration 100: w1 = [0.39747469], w2 = [1.92003479]
Iteration 200: w1 = [0.26876353], w2 = [1.9459293]
Iteration 300: w1 = [0.18173191], w2 = [1.9634386]
Iteration 400: w1 = [0.12288307], w2 = [1.975278]
Iteration 500: w1 = [0.0830908], w2 = [1.98328353]
Iteration 600: w1 = [0.05618415], w2 = [1.9886967]
Iteration 700: w1 = [0.03799047], w2 = [1.99235696]
Iteration 800: w1 = [0.02568831], w2 = [1.99483195]
Iteration 900: w1 = [0.01736986], w2 = [1.99650548]
[0.01179116] [1.99762782]


In [18]:
# Here we have a simple line with intercept = 0 and slope = 1
xs = [1,2,3,4,5,6,7]
ys = [1,2,3,4,5,6,7]
w1,w2 = gradient_descent(xs,ys)
print(w1,w2)

Iteration 0: w1 = [0.16667347], w2 = [0.58940605]
Iteration 100: w1 = [0.16296559], w2 = [0.96721407]
Iteration 200: w1 = [0.1101937], w2 = [0.97783088]
Iteration 300: w1 = [0.07451053], w2 = [0.98500974]
Iteration 400: w1 = [0.05038236], w2 = [0.98986392]
Iteration 500: w1 = [0.03406743], w2 = [0.99314621]
Iteration 600: w1 = [0.02303564], w2 = [0.99536562]
Iteration 700: w1 = [0.01557618], w2 = [0.99686633]
Iteration 800: w1 = [0.01053227], w2 = [0.99788108]
Iteration 900: w1 = [0.00712169], w2 = [0.99856724]
[0.00483441] [0.9990274]


In [19]:
# Here we have a simple line with intercept = 1 and slope = 2 at
xs = [1,2,3,4,5,6,7]
ys = [3,5,7,9,11,13,15]
w1,w2 = gradient_descent(xs,ys)
print(w1,w2)

Iteration 0: w1 = [0.10290539], w2 = [1.45952665]
Iteration 100: w1 = [0.48951218], w2 = [2.10270155]
Iteration 200: w1 = [0.65481951], w2 = [2.0694445]
Iteration 300: w1 = [0.76659664], w2 = [2.04695682]
Iteration 400: w1 = [0.84217785], w2 = [2.03175116]
Iteration 500: w1 = [0.89328418], w2 = [2.02146943]
Iteration 600: w1 = [0.92784114], w2 = [2.01451715]
Iteration 700: w1 = [0.95120778], w2 = [2.00981617]
Iteration 800: w1 = [0.96700779], w2 = [2.00663748]
Iteration 900: w1 = [0.9776914], w2 = [2.00448811]
[0.98485628] [2.00304666]
