# Foundations of AI & ML
## Session 05
### Experiment 2 - Part 1

**Objectives:** Plot a Quadratic Equation representing an error function and see how to arrive at the minima in the plot.

**Expected Time:** This Experiment should take around 20 mins

In [1]:
import matplotlib
matplotlib.use('nbagg')
import numpy as np
import matplotlib.pyplot as plt

### We are assuming the error  function = $ w^2 $ + $ 2w $ + $ 2 $

In [2]:
def error(w):
    return (w**2) + (2*w) + 2

### Making the data

In [14]:
x = list(range(-10,10))
err = []
for i in x:
    err.append(error(i))

### Derivative of the error function is $ 2w $ + $ 2 $

In [15]:
def gradient(w):
    return 2*w + 2

Let us write a function for gradient descent that can be reused:

In [16]:
def delta(w, eta):
    return eta*gradient(w)

def gradient_descent(eta, w, nb_of_iterations):
    w_err = [np.array([w, error(w)])] # List to store the w, error values
    print(w_err)
    for i in range(nb_of_iterations):
        dw = delta(w, eta)  # Get the delta w update
        w = w - dw  # Update the current w value
        w_err.append(np.array([w, error(w)]))  # Add w, error to list
    return np.array(w_err)

### $ eta $ = 0.2

In [17]:
# Set the learning rate
eta = 0.2

#Set the initial parameter
w = 5

# number of gradient descent updates
nb_of_iterations = 20

w_err_02 = gradient_descent(eta, w, nb_of_iterations)
w_err_02

[array([ 5, 37])]


array([[ 5.        , 37.        ],
       [ 2.6       , 13.96      ],
       [ 1.16      ,  5.6656    ],
       [ 0.296     ,  2.679616  ],
       [-0.2224    ,  1.60466176],
       [-0.53344   ,  1.21767823],
       [-0.720064  ,  1.07836416],
       [-0.8320384 ,  1.0282111 ],
       [-0.89922304,  1.010156  ],
       [-0.93953382,  1.00365616],
       [-0.96372029,  1.00131622],
       [-0.97823218,  1.00047384],
       [-0.98693931,  1.00017058],
       [-0.99216358,  1.00006141],
       [-0.99529815,  1.00002211],
       [-0.99717889,  1.00000796],
       [-0.99830733,  1.00000287],
       [-0.9989844 ,  1.00000103],
       [-0.99939064,  1.00000037],
       [-0.99963438,  1.00000013],
       [-0.99978063,  1.00000005]])

### $eta $ = 0.5

In [18]:
# Set the learning rate
eta = 0.5

#Set the initial parameter
w = 5

# number of gradient descent updates
nb_of_iterations = 20

w_err_05 = gradient_descent(eta, w, nb_of_iterations)
w_err_05

[array([ 5, 37])]


array([[ 5., 37.],
       [-1.,  1.],
       [-1.,  1.],
       [-1.,  1.],
       [-1.,  1.],
       [-1.,  1.],
       [-1.,  1.],
       [-1.,  1.],
       [-1.,  1.],
       [-1.,  1.],
       [-1.,  1.],
       [-1.,  1.],
       [-1.,  1.],
       [-1.,  1.],
       [-1.,  1.],
       [-1.,  1.],
       [-1.,  1.],
       [-1.,  1.],
       [-1.,  1.],
       [-1.,  1.],
       [-1.,  1.]])

### $eta$ = 0.7

In [19]:
# Set the learning rate
eta = 0.7

#Set the initial parameter
w = 5

# number of gradient descent updates
nb_of_iterations = 20

w_err_07 = gradient_descent(eta, w, nb_of_iterations)
w_err_07

[array([ 5, 37])]


array([[ 5.        , 37.        ],
       [-3.4       ,  6.76      ],
       [-0.04      ,  1.9216    ],
       [-1.384     ,  1.147456  ],
       [-0.8464    ,  1.02359296],
       [-1.06144   ,  1.00377487],
       [-0.975424  ,  1.00060398],
       [-1.0098304 ,  1.00009664],
       [-0.99606784,  1.00001546],
       [-1.00157286,  1.00000247],
       [-0.99937085,  1.0000004 ],
       [-1.00025166,  1.00000006],
       [-0.99989934,  1.00000001],
       [-1.00004027,  1.        ],
       [-0.99998389,  1.        ],
       [-1.00000644,  1.        ],
       [-0.99999742,  1.        ],
       [-1.00000103,  1.        ],
       [-0.99999959,  1.        ],
       [-1.00000016,  1.        ],
       [-0.99999993,  1.        ]])

In [20]:
# Print the final w, and cost
for i in range(0, len(w_err_07)):
    print('w({}): {:.4f} \t cost: {:.4f}'.format(i, w_err_07[i][0], w_err_07[i][1]))

w(0): 5.0000 	 cost: 37.0000
w(1): -3.4000 	 cost: 6.7600
w(2): -0.0400 	 cost: 1.9216
w(3): -1.3840 	 cost: 1.1475
w(4): -0.8464 	 cost: 1.0236
w(5): -1.0614 	 cost: 1.0038
w(6): -0.9754 	 cost: 1.0006
w(7): -1.0098 	 cost: 1.0001
w(8): -0.9961 	 cost: 1.0000
w(9): -1.0016 	 cost: 1.0000
w(10): -0.9994 	 cost: 1.0000
w(11): -1.0003 	 cost: 1.0000
w(12): -0.9999 	 cost: 1.0000
w(13): -1.0000 	 cost: 1.0000
w(14): -1.0000 	 cost: 1.0000
w(15): -1.0000 	 cost: 1.0000
w(16): -1.0000 	 cost: 1.0000
w(17): -1.0000 	 cost: 1.0000
w(18): -1.0000 	 cost: 1.0000
w(19): -1.0000 	 cost: 1.0000
w(20): -1.0000 	 cost: 1.0000


### let us see how to arrive at local minima

In [22]:
w = list(range(-10,10))
plt.figure(figsize=(8, 10))
plt.grid(True)

plt.subplot(311)
plt.plot(x, err)
plt.plot(w_err_02[:,0], w_err_02[:,1],"o")
plt.title(["x vs m","eta = 0.2"])
n = range(1, len(w_err_02[:,0]))
for i, txt in enumerate(n):
    plt.annotate(txt, (w_err_02[:,0][i], w_err_02[:,1][i]))
    
plt.subplot(312)
plt.plot(x, err)
plt.plot(w_err_05[:,0], w_err_05[:,1],"o")
plt.title(["x vs m","eta = 0.5"])
n = range(1, len(w_err_05[:,0]))
for i, txt in enumerate(n):
    plt.annotate(txt, (w_err_05[:,0][i], w_err_05[:,1][i]))
plt.subplot(313)
plt.plot(x, err)
plt.plot(w_err_07[:,0], w_err_07[:,1],"o")
plt.title(["x vs m","eta = 0.7"])
n = range(1, len(w_err_02[:,0]))
for i, txt in enumerate(n):
    plt.annotate(txt, (w_err_07[:,0][i], w_err_07[:,1][i]))
plt.show()

<IPython.core.display.Javascript object>

In [23]:
range(1, len(w_err_02[:,0]))

range(1, 21)

**Excerise : ** Observe how we reached to local minima