### Reference for this code 
https://github.com/mattnedrich/GradientDescentExample

# Gradient descent excercise 

Run the next 4 cells once

In [1]:
from numpy import *
from datetime import datetime
import pandas as pd

# y = mx + b
# m is slope, b is y-intercept
def compute_error_for_line_given_points(b, m, points):
    totalError = 0
    for i in range(0, len(points)):
        x = points[i, 0]
        y = points[i, 1]
        totalError += (y - (m * x + b)) ** 2
    return totalError / float(len(points))

In [2]:
def step_gradient(b_current, m_current, points, learningRate):
    b_gradient = 0
    m_gradient = 0
    N = float(len(points))
    for i in range(0, len(points)):
        x = points[i, 0]
        y = points[i, 1]
        b_gradient += -(2/N) * (y - ((m_current * x) + b_current))
        m_gradient += -(2/N) * x * (y - ((m_current * x) + b_current))
    new_b = b_current - (learningRate * b_gradient)
    new_m = m_current - (learningRate * m_gradient)
    return [new_b, new_m]




In [3]:
def gradient_descent_runner(points, starting_b, starting_m, learning_rate, num_iterations):
    b = starting_b
    m = starting_m
    for i in range(num_iterations):
        b, m = step_gradient(b, m, array(points), learning_rate)
    return [b, m]


In [4]:
def run(lr,b_v,m_v,i):
    points = genfromtxt("data/gd_data.csv", delimiter=",")
    learning_rate = lr # try ( 0.00001 and 0.1 )
    initial_b =b_v # initial y-intercept guess
    initial_m =m_v # initial slope guess
    num_iterations = i# try ( 10 and 10000)

    
    start_time = datetime.now()
    
   
    print ("Starting gradient descent at b = {0}, m = {1}, error = {2}".format(initial_b, initial_m, compute_error_for_line_given_points(initial_b, initial_m, points))) 
    print ("Running...")
     
    [b, m] = gradient_descent_runner(points, initial_b, initial_m, learning_rate, num_iterations)
    print ("After {0} iterations b = {1}, m = {2}, error = {3}".format(num_iterations, b, m, compute_error_for_line_given_points(b, m, points)))
    
    end_time = datetime.now()
    print('Duration: {}'.format(end_time - start_time))


### Test Different Variables 
- Run the code below with different variables according to the followinig exercise:

### Exercise : 
    1. What are the results obtained from manipulating the learning rate? - try ( 0.00001 and 0.1 )
    2. What are the results obtainedfrom manipulating the num_iteration? - try ( 10 and 10000)
    3. How does manipulating the initial b and m cause change in the results ?
    4. Start the initial_b and initial_m at 10 what are the changes that you see ?
    5. Start the initial_b and initial_m at -10 what are the changes that you see ?
    6. What is the minimum error that you can get ?
    7. How are the values of the m , b and error different from that of the one manually calculated, when using Scikit Learn?
        - Obtain the mean squared error
        - intercept_
        - coef_
  

In [5]:
# note some values may give you error, just change it a bit 

learning_rate =  0.001         
initial_b = 10                # initial y-intercept guess
initial_m = 0.5               # initial slope guess
num_iterations = 100 
run(learning_rate,initial_b,initial_m,num_iterations)


Starting gradient descent at b = 10, m = 0.5, error = 1637.3790484995645
Running...
After 100 iterations b = -1.6492782424958336e+58, m = -8.391268428965048e+59, error = 1.7553255068785434e+123
Duration: 0:00:00.022160


In [6]:
# learning rate 0.1

learning_rate =  0.1         
initial_b = 10                # initial y-intercept guess
initial_m = 0.5               # initial slope guess
num_iterations = 100 
run(learning_rate,initial_b,initial_m,num_iterations)

Starting gradient descent at b = 10, m = 0.5, error = 1637.3790484995645
Running...
After 100 iterations b = -7.182427235712127e+267, m = -3.654306068766448e+269, error = inf
Duration: 0:00:00.022904


In [24]:
# learning rate 0.00001

learning_rate =  0.00001         
initial_b = 10                # initial y-intercept guess
initial_m = 0.5               # initial slope guess
num_iterations = 100 
run(learning_rate,initial_b,initial_m,num_iterations)

Starting gradient descent at b = 10, m = 0.5, error = 1637.3790484995645
Running...
After 100 iterations b = 10.015137183282315, m = 1.277932701051702, error = 110.46745020249041
Duration: 0:00:00.027333


learning rate affect the error value. best learning rate 0.001

In [8]:
# number iteration

learning_rate =  0.001         
initial_b = 10               # initial y-intercept guess
initial_m = 0.5               # initial slope guess
num_iterations = 10
run(learning_rate,initial_b,initial_m,num_iterations)

Starting gradient descent at b = 10, m = 0.5, error = 1637.3790484995645
Running...
After 10 iterations b = -15480.16091917473, m = -788114.4590746979, error = 1548397423449867.2
Duration: 0:00:00.005965


In [10]:
# number iteration 1000

learning_rate =  0.001         
initial_b = 10               # initial y-intercept guess
initial_m = 0.5               # initial slope guess
num_iterations = 10000
run(learning_rate,initial_b,initial_m,num_iterations)

Starting gradient descent at b = 10, m = 0.5, error = 1637.3790484995645
Running...
After 10000 iterations b = nan, m = nan, error = nan
Duration: 0:00:01.384571


In [11]:
#Start the initial_b and initial_m at 10 what are the changes that you see ? initial 10

learning_rate =  0.001         
initial_b = 10               # initial y-intercept guess
initial_m = 10               # initial slope guess
num_iterations = 1000
run(learning_rate,initial_b,initial_m,num_iterations)


Starting gradient descent at b = 10, m = 10, error = 189391.1642878167
Running...
After 1000 iterations b = nan, m = nan, error = nan
Duration: 0:00:00.150159


In [None]:
bigger iteration,lower error

In [28]:
learning_rate =  0.001         
initial_b = 10               # initial y-intercept guess
initial_m = 10               # initial slope guess
num_iterations = 100
run(learning_rate,initial_b,initial_m,num_iterations)

Starting gradient descent at b = 10, m = 10, error = 189391.1642878167
Running...
After 100 iterations b = 1.8362523680277467e+59, m = 9.342563387076555e+60, error = 2.175878004013662e+125
Duration: 0:00:00.038271


In [29]:
# initial -10

learning_rate =  0.001         
initial_b = -10               # initial y-intercept guess
initial_m = -10               # initial slope guess
num_iterations = 100
run(learning_rate,initial_b,initial_m,num_iterations)

Starting gradient descent at b = -10, m = -10, error = 339714.62578178523
Running...
After 100 iterations b = -2.4595640092780524e+59, m = -1.2513875032293553e+61, error = 3.9037865460519866e+125
Duration: 0:00:00.023112


Initial b,m

error increase when initial b,m negative value

### Question 7.
- Comparing to Scikit Learn 


In [13]:
data = pd.read_csv('data/gd_data.csv',names =['x','y'])

In [14]:
data.head()

Unnamed: 0,x,y
0,32.502345,31.707006
1,53.426804,68.777596
2,61.530358,62.562382
3,47.47564,71.546632
4,59.813208,87.230925


In [15]:
X = data[['x']]
y = data['y']

In [16]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

In [18]:
lr = LinearRegression()
model = lr.fit(X, y)


In [19]:
print('intercept:', model.intercept_)

print('slope:', model.coef_)

intercept: 7.991020982270399
slope: [1.32243102]


In [22]:
y_predict = model.predict(X)


In [23]:
mean_squared_error(y,y_predict)

110.25738346621316