In [1]:
from numpy import *

# y = mx + b
# m is slope, b is y-intercept
def compute_error_for_line_given_points(b, m, points):
    totalError = 0
    for i in range(0, len(points)):
        x = points[i, 0]
        y = points[i, 1]
        totalError += (y - (m * x + b)) ** 2
    return totalError / float(len(points))

def step_gradient(b_current, m_current, points, learningRate):
    b_gradient = 0
    m_gradient = 0
    N = float(len(points))
    for i in range(0, len(points)):
        x = points[i, 0]
        y = points[i, 1]
        b_gradient += -(2/N) * (y - ((m_current * x) + b_current))
        m_gradient += -(2/N) * x * (y - ((m_current * x) + b_current))
    new_b = b_current - (learningRate * b_gradient)
    new_m = m_current - (learningRate * m_gradient)
    print "b_gradient is {0}, m_gradient is {1}".format(b_gradient, m_gradient)
    return [new_b, new_m]

def gradient_descent_runner(points, starting_b, starting_m, learning_rate, num_iterations):
    b = starting_b
    m = starting_m
    for i in range(num_iterations):
        b, m = step_gradient(b, m, array(points), learning_rate)
        print "Currently b = {1}, m = {2}, error = {3}".format(num_iterations, b, m, compute_error_for_line_given_points(b, m, points))
    return [b, m]

def run():
    points = genfromtxt("data.csv", delimiter=",")
    learning_rate = 0.0001
    initial_b = 0 # initial y-intercept guess
    initial_m = 0 # initial slope guess
    num_iterations = 10
    print "Starting gradient descent at b = {0}, m = {1}, error = {2}".format(initial_b, initial_m, compute_error_for_line_given_points(initial_b, initial_m, points))
    print "Running..."
    [b, m] = gradient_descent_runner(points, initial_b, initial_m, learning_rate, num_iterations)
    print "After {0} iterations b = {1}, m = {2}, error = {3}".format(num_iterations, b, m, compute_error_for_line_given_points(b, m, points))

if __name__ == '__main__':
    run()


Starting gradient descent at b = 0, m = 0, error = 5565.10783448
Running...
b_gradient is -145.470101107, m_gradient is -7370.70297359
Currently b = 0.0145470101107, m = 0.737070297359, error = 1484.58655741
b_gradient is -73.2695284886, m_gradient is -3697.25156992
Currently b = 0.0218739629596, m = 1.10679545435, error = 457.854257574
b_gradient is -37.052613617, m_gradient is -1854.59210562
Currently b = 0.0255792243213, m = 1.29225466491, error = 199.509985726
b_gradient is -18.8856523785, m_gradient is -930.285907381
Currently b = 0.0274677895591, m = 1.38528325565, error = 134.505910582
b_gradient is -9.77282422595, m_gradient is -466.639767331
Currently b = 0.0284450719817, m = 1.43194723238, error = 118.149693422
b_gradient is -5.2016878474, m_gradient is -234.067765137
Currently b = 0.0289652407665, m = 1.4553540089, error = 114.03414906
b_gradient is -2.90873359568, m_gradient is -117.406088221
Currently b = 0.029256114126, m = 1.46709461772, error = 112.998577317
b_gradient 

In [2]:
#so at each point you have an error, and an error gradient (in which direction you would have to move and by how much)
#when you sum the gradient at each point you get how far each of your parameters have to move and in which direction
#but, when I say "how far" and "how much" that doesn't mean I know exactly where the parameters will end up
#rather, it's that I know how steep the slope is there so I know they need to move "a lot" or "a little"
#how far we adjust the parameters and in which direction is influenced by the gradient (just described) and
#the learning rate 