In [17]:
#source: https://github.com/llSourcell/linear_regression_live/blob/master/demo.py

# A list of sublists each of which has two elements;
# the first being the price of wheat/kg and
# the second being the average price of bread.
price_wheat_bread = [[0.5,5],
                     [0.6,5.5],
                     [0.8,6],
                     [1.1,6.8],
                     [1.4,7]
                    ]

def step_gradient(b_current, 
                  m_current, 
                  points, 
                  learningRate):
    """
    For a given y-intercept, b, and slope, m, and set of points, and learning rate, 
    produce the step size of the gradient needed for changing those coefficients, b and m.
    """
    b_gradient = 0 # Initialize change in slope to naught, i.e. no change.
    m_gradient = 0 # Initialize change in y-intercept to naught, i.e. no change. 
    N = float(len(points)) # The number of pairs of points in our list of coordinates.
    for i in range(0, len(points)): # Sweep over the index of every pair of points in the list of coordinates.
        x = points[i][0] # Select the first number in the ith pair.
        y = points[i][1] # Select the second number in the ith pair.
        # How quickly does the squared error change with respect to the y-intercept?
        # The negative sign means that the gradient points in the direction of decrease of error, instead of increase.
        b_gradient += -(2/N) * (y - ((m_current * x) + b_current)) # Notice the two comes down based on the power rule of derivatives.
        # How quickly does the error change with respect to the slope?
        m_gradient += -(2/N) * x * (y - ((m_current * x) + b_current)) # Notice the x comes out based on the rule of derivatives.
    # Based on how large of steps you chose to to increment, the gradients (or negative of the slopes) are now used to adjust the old values.
    new_b = b_current - (learningRate * b_gradient) # I am not sure why there is a negative sign.
    new_m = m_current - (learningRate * m_gradient) # 
    return [new_b, new_m]

def gradient_descent_runner(points, 
                            starting_b, 
                            starting_m, 
                            learning_rate, 
                            num_iterations):
    """
    Given a set of points, and initial values of b and m, and a learning rate, and number of iterations,
    produce a...what?
    """
    b = starting_b
    m = starting_m
    for i in range(num_iterations): # How ever many times you want to run the gradient descent, that's how many increments will change your coefficients, m and b, to readjust the predicting line, with successively less and less error.
        b, m = step_gradient(b, m, points, learning_rate)
    return [b, m]

# Let's produce the best m and b values, given initial choices of m as 12, b as 42, 
# and the increment learning step as .01, over 10000 iterations.
# The goal being to end up with ideal, or at least very accurate, coefficients.
gradient_descent_runner(price_wheat_bread, 12, 42, 0.01, 10000) 


[4.107202463019789, 2.2190814997453208]

Apparently the ideal value of b (y-intercept) is 4.1 and of m (slope) is 2.2