# Gradient descent

In [10]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
import math

We have a value of x and y vectors and we want to derive the best fit line or an equation using m and b.

In [7]:
def gradient_descent(x,y):
    #start with some value(0) for the m and b current and then take the steps to reach a global minimum
    m_curr = 0
    b_curr = 0
    #define the number of iterations
    iterations = 1000 #number of steps we are going to do
    n = len(x) #number of data points
    learning_rate = 0.08 #choose a value like 0.001
    #for loop that iterates the number of iterations
    for i in range(iterations):
        #predict the value of y
        y_predicted = m_curr * x + b_curr
        #calculate the cost -> MSE
        cost = (1/n) * sum([val ** 2 for val in (y-y_predicted)])
        #calculate m derivate and b derivate
        md = -(2/n)*sum(x*(y-y_predicted))
        bd = -(2/n)*sum(y-y_predicted)
        #update the m and b
        m_curr = m_curr - learning_rate * md
        b_curr = b_curr - learning_rate * bd
        #at each iteration print they values
        print('m {}, b {}, cost {}, iteration {}'.format(m_curr,b_curr,cost,i))

In [8]:
x = np.array([1,2,3,4,5])
y = np.array([5,7,9,11,13])

gradient_descent(x,y)

m 0.062, b 0.018000000000000002, cost 89.0, iteration 0
m 0.122528, b 0.035592000000000006, cost 84.881304, iteration 1
m 0.181618832, b 0.052785648000000004, cost 80.955185108544, iteration 2
m 0.239306503808, b 0.069590363712, cost 77.21263768455901, iteration 3
m 0.29562421854195203, b 0.086015343961728, cost 73.64507722605434, iteration 4
m 0.35060439367025875, b 0.10206956796255283, cost 70.2443206760065, iteration 5
m 0.40427867960173774, b 0.11776180246460617, cost 67.00256764921804, iteration 6
m 0.4566779778357119, b 0.13310060678206653, cost 63.912382537082294, iteration 7
m 0.5078324586826338, b 0.14809433770148814, cost 60.966677449199324, iteration 8
m 0.5577715785654069, b 0.16275115427398937, cost 58.15869595270883, iteration 9
m 0.606524096911324, b 0.17707902249404894, cost 55.481997572035766, iteration 10
m 0.6541180926443106, b 0.1910857198675929, cost 52.9304430134884, iteration 11
m 0.7005809802869303, b 0.20477883987199186, cost 50.49818008081245, iteration 12
m 0

Exercice - The dataset gives the math and cs scores for students and I have to find the correlation between them. In summary, math is your x and cs is the y. You will find a value of m and b by applying gradient descent algorithm. I have to compare the cost between each iteration (compare previous with the current) and when it is within certain threshold or when the costs are similar stop. To compare the threshold, we use the math.isclose function with a tolerance of 1e-20. If the costs are in this range, you need to stop the for loop. How many iterations you need to figure out the value of m and b.

In [13]:
df = pd.read_csv("test_scores.csv")
x2 = np.array(df.math)
y2 = np.array(df.cs)

In [14]:
def gradient_descent2(x,y):
    #start with some value(0) for the m and b current and then take the steps to reach a global minimum
    m_curr = 0
    b_curr = 0
    #define the number of iterations
    iterations = 1000 #number of steps we are going to do
    n = len(x) #number of data points
    learning_rate = 0.0002 #choose a value like 0.001
    #stores the cost
    cost_previous = 0
    #for loop that iterates the number of iterations
    for i in range(iterations):
        #predict the value of y
        y_predicted = m_curr * x + b_curr
        #calculate the cost -> MSE
        cost = (1/n) * sum([val ** 2 for val in (y-y_predicted)])
        #calculate m derivate and b derivate
        md = -(2/n)*sum(x*(y-y_predicted))
        bd = -(2/n)*sum(y-y_predicted)
        #update the m and b
        m_curr = m_curr - learning_rate * md
        b_curr = b_curr - learning_rate * bd
        if math.isclose(cost, cost_previous, rel_tol=1e-20):
            break
        #at each iteration print they values
        print('m {}, b {}, cost {}, iteration {}'.format(m_curr,b_curr,cost,i))

In [15]:
gradient_descent2(x2,y2)

m 1.9783600000000003, b 0.027960000000000002, cost 5199.1, iteration 0
m 0.20975041279999962, b 0.0030470367999999894, cost 4161.482445460163, iteration 1
m 1.7908456142986242, b 0.025401286955264, cost 3332.2237319269248, iteration 2
m 0.37738163667530467, b 0.005499731626422651, cost 2669.4843523161976, iteration 3
m 1.6409848166378898, b 0.023373894401807944, cost 2139.826383775145, iteration 4
m 0.5113514173939655, b 0.0074774305434828076, cost 1716.5264071567592, iteration 5
m 1.5212165764726306, b 0.021771129698498662, cost 1378.2272007804495, iteration 6
m 0.6184191426785134, b 0.009075514323270572, cost 1107.8601808918404, iteration 7
m 1.4254981563597626, b 0.020507724625171385, cost 891.7842215178443, iteration 8
m 0.7039868810749315, b 0.010370210797388455, cost 719.0974036421305, iteration 9
m 1.3490002310389348, b 0.01951553325074733, cost 581.0869686205, iteration 10
m 0.7723719384951477, b 0.01142244086408669, cost 470.7897237271261, iteration 11
m 1.2878632281408475, b 

In [16]:
r = LinearRegression()
r.fit(df[['math']],df.cs)

In [17]:
r.coef_

array([1.01773624])

In [18]:
r.intercept_

1.9152193111569318