# Gradient Descent Exercises

## Imports

In [None]:
import matplotlib.pyplot as plt
import numpy as np
from math import log

## Implementing Gradient Descent Algorithm

In [None]:
def gradient_descent(derivative_func, learning_rate=0.02, 
                     initial_value=0.5, precision=0.001, max_iter=300):
    t=initial_value
    t_list=[]

    for i in range(1,max_iter+1):
        t_list.append(t)
        t = t - learning_rate * derivative_func(t)
        
        if abs(learning_rate * derivative_func(t))<=precision:
        #if derivative_func(x)==0:
            break
        
    return t,t_list,i

## Problem 1
### $$ g(t)=t^4-4t^2+5 $$

### $$ \frac{dg(t)}{dt}=4t^3-8t $$

In [None]:
#Implementing the function g(t)
def g(t):
    return t**4-4*t**2+5

#Implementing the derivative of the function g(t)
def dg(t):
    return 4*t**3-8*t

In [None]:
#Making data. 
#The linspace function returns evenly spaced numbers over a specified interval.
t_1=np.linspace(-2,2,1000)
type(t_1)
#t_1

In [None]:
#Visualizing the function
plt.figure(figsize=(8,5))
plt.plot(t_1,g(t_1))
plt.xlabel('t', fontsize=14)
plt.ylabel('g(t)', fontsize=14)
plt.title('Cost function')
plt.show()

In [None]:
#Setting parameters: initial_value= 0.5, learning_rate= 0.02, precision= 0.0001, max_iter= 1000
local_minima,t_list,runs=gradient_descent(dg, initial_value=0.5, 
        learning_rate=0.02, max_iter=1000, precision=0.0001)
print('Local minima occurs at', local_minima)
print('Cost at this point is', g(local_minima))
print('Slope at this point is', dg(local_minima))
print('Loop runs this many times:', runs)

In [None]:
#Plotting search trace on cost function
plt.figure(figsize=(8,5))
plt.plot(t_1,g(t_1))
plt.xlabel('t', fontsize=14)
plt.ylabel('g(t)', fontsize=14)
plt.title('Cost function')
plt.scatter(t_list,g(np.array(t_list)), color='red',alpha=0.4)
plt.show()

### TASK FOR YOU
Repeat after making the following changes and observe the results:

1. In implementation of gradient descent, replace the statment 'if abs(learning_rate * derivative_func(x))<=precision:' with 'if derivative_func(x_old)==0:'. Now the calling code again and observe the results.
2. Change initial value to -0.5, 1.5, 5, 50, 0.
3. Change learning rate to 0.002, 0.2, 2.

### Max Float Value in Python

In [None]:
import sys
sys.float_info.max
#This is the max value of float that can be habdled by the machine.

In [None]:
#plotting cost function against iterations

#Low learning rate (0.0005)
low_local_minimum,low_t_list,low_runs=gradient_descent(dg,initial_value=3,precision=0.0001, 
                                                       max_iter=100,learning_rate=0.0005)
n=list(range(1,101)) #since 100 iterations were run
plt.figure(figsize=(15,10))
plt.plot(n,g(np.array(low_t_list)),color='lightgreen')
plt.xlabel('Iterations', fontsize=18)
plt.ylabel('Cost function', fontsize=18)
plt.xticks(fontsize=18)
plt.yticks(fontsize=18)
plt.scatter(n,g(np.array(low_t_list)), color='lightgreen',alpha=0.8)

#Medium learning rate (0.001)
mid_local_minimum,mid_t_list,mid_runs=gradient_descent(dg,initial_value=3,precision=0.0001,
                                                       max_iter=100,learning_rate=0.001)
n=list(range(1,101))
plt.plot(n,g(np.array(mid_t_list)),color='hotpink')
plt.scatter(n,g(np.array(mid_t_list)), color='hotpink',alpha=0.8)

#High learning rate (0.002)
high_local_minimum,high_t_list,high_runs=gradient_descent(dg,initial_value=3,precision=0.0001,
                                                          max_iter=100,learning_rate=0.002)
n=list(range(1,101))
plt.plot(n,g(np.array(high_t_list)),color='steelblue')
plt.scatter(n,g(np.array(high_t_list)), color='steelblue',alpha=0.8)

#Very very high learning rate (0.25)
insane_local_minimum,insane_t_list,insane_runs=gradient_descent(dg,initial_value=1.9,precision=0.0001,
                                                                max_iter=100,learning_rate=0.25)
n=list(range(1,101))
plt.plot(n,g(np.array(insane_t_list)),color='red')
plt.scatter(n,g(np.array(insane_t_list)), color='red',alpha=0.8)

plt.legend(['0.0005','0.001','0.002','0.25'],title='Learning Rate',title_fontsize=14, fontsize=18)
plt.show()

## Problem 2
### $$ h(t)=t^5-2t^4+2 $$
### $$ \frac{dh(t)}{dt}=5t^4-8t $$

In [None]:
#Implementing the function h(x)
def h(t):
    return t**5-2*t**4+2

#Implementing the derivative of the function h(x)
def dh(t):
    return 5*t**4-8*t**3

#Making data
t_2=np.linspace(-2.5,2.5,1000)

#Visualizing the function
plt.figure(figsize=(8,5))
plt.xlim(-1.2, 2.5)
plt.ylim(-1, 4)
plt.plot(t_2,h(t_2))
plt.xlabel('t', fontsize=14)
plt.ylabel('h(t)', fontsize=14)
plt.title('Cost function')
plt.show()

In [None]:
#Running gradient descent and plotting the results
local_minima,t_list,runs=gradient_descent(dh,initial_value= 1, learning_rate= 0.02, precision= 0.001,max_iter= 20)
    
print('Local minima occures at ', local_minima)
print('Cost at this point is ', h(local_minima))
print('Slope at this point is ', dh(local_minima))
print('Loop runs this many times: ', runs)

plt.figure(figsize=(8,5))
plt.xlim(-1.2, 2.5)
plt.ylim(-1, 4)
plt.plot(t_2,h(t_2))
plt.xlabel('t', fontsize=14)
plt.ylabel('h(t)', fontsize=14)
plt.title('Cost function')
plt.scatter(t_list,h(np.array(t_list)), color='red',alpha=0.4)
plt.show()

### TASK FOR YOU
Repeat after making the following changes and observe the results:
1. initial_value= -0.8, max_iter= 8
2. initial_value= -0.8, learning_rate= 0.02, precision= 0.001,max_iter= 9

## Problem 3
The following cost function has 2 parameters.
### $$ f(t1,t2)=\frac{1}{3^{-t1^2-t2^2}+1} $$

In [None]:
#Implementing the function f(t1,t2)
def f(t1,t2):
    return 1/(3**(-t1**2-t2**2)+1)

#Making data for t1 and t2
t1=np.linspace(-2,2,200)
t2=np.linspace(-2,2,200)
t1,t2=np.meshgrid(t1,t2) 
#meshgrid converts t1 and t2 into 2D arrays, as required by the plotting function plot_surface.

In [None]:
#Plotting a 3D graph
fig=plt.figure(figsize=[16,12])
ax=plt.axes(projection='3d')
ax.set_xlabel('t1', fontsize=16)
ax.set_ylabel('t2', fontsize=16)
ax.set_zlabel('Cost - f(t1,t2)', fontsize=16)
ax.plot_surface(t1,t2,f(t1,t2), alpha=0.4, cmap='summer') 
#Explore other colormaps
#Website for color choices: materialpalette.com
plt.show()

### Partial derivatives for $$ f(t1,t2)=\frac{1}{3^{-t1^2-t2^2}+1} $$ w.r.t. t1 and t2

$$ \frac{\partial f(t1,t2)}{\partial dt1} = \frac{2t1.ln(3).3^{-t1^2-t2^2}}{({3^{-t1^2-t2^2}}+1)^2} $$
$$ \frac{\partial f(t1,t2)}{\partial dt2} = \frac{2t2.ln(3).3^{-t1^2-t2^2}}{({3^{-t1^2-t2^2}}+1)^2} $$

Use online tools for finsing derivatives.
One website is https://www.symbolab.com/solver/derivative-calculator

In [None]:
#Implementing partial derivatives
def pdfx(t1,t2):
    return 2*t1*log(3)*(3**(-t1**2-t2**2))/(3**(-t1**2-t2**2)+1)**2
def pdfy(t1,t2):
    return 2*t2*log(3)*(3**(-t1**2-t2**2))/(3**(-t1**2-t2**2)+1)**2

In [None]:
#Custom implementation of gradient descent for two parameters
learning_rate = 0.1
max_iter = 20000
initial_t1 = 1.8
initial_t2=1.5
precision=0.000001

t1_old=initial_t1
t2_old=initial_t2

t1_list=[]
t2_list=[]
for i in range (1,max_iter+1):
    t1_new = t1_old - learning_rate * pdfx(t1_old,t2_old)
    t2_new = t2_old - learning_rate * pdfy(t1_old,t2_old)
    
    t1_list.append(t1_old)
    t2_list.append(t2_old)

    t1_old=t1_new
    t2_old=t2_new

In [None]:
#Plotting the 3D graph
fig=plt.figure(figsize=[16,12])
ax=plt.axes(projection='3d')
ax.set_xlabel('t1', fontsize=16)
ax.set_ylabel('t2', fontsize=16)
ax.set_zlabel('Cost - f(t1,t2)', fontsize=16)
ax.plot_surface(t1,t2,f(t1,t2), alpha=0.4, cmap='summer')
t1_list=np.array(t1_list)
t2_list=np.array(t2_list)
ax.scatter(t1_list,t2_list,f(t1_list,t2_list), alpha=0.4, s=50, color='red')
plt.show()