In [31]:
def compute_err_for_points(b, m, points):
    total_error = 0
    for i in range(0, len(points)):
        # [x, y] from dataset (csv file)
        x = points[i, 0]
        y = points[i, 1]
        total_error += (y - (m*x + b))**2
        return total_error / float(len(points))

In [212]:
def step_gradient(current_b, current_m, points, learning_rate):
    #gradient descent
    gradient_b = 0
    gradient_m = 0
    N = float(len(points))
    for i in range(0, len(points)):
        x = points[i, 0]
        y = points[i, 1]
        gradient_b += -(2/N) * (y - ((current_m * x) + current_b))
        gradient_m += -(2/N) * x * (y - ((current_m * x) + current_b))
        new_b = current_b - (learning_rate * gradient_b)
        new_m = current_m - (learning_rate * gradient_m)
    
    return [new_b, new_m]

The error (RSS) decreases for each iteration.

In [222]:
def gradient_descent_runner(points, starting_b, starting_m, learning_rate, num_iterations, acceptable_err):
    b = starting_b
    m = starting_m
    current_err = compute_err_for_points(b, m, points)
    i = 0
    while (acceptable_err <= current_err and i < num_iterations):
        b, m = step_gradient(b, m, array(points), learning_rate)
        gradient_size = (b**2 + m**2)**0.5 #check this
        print(gradient_size)
        if (gradient_size <= acceptable_err): #check this
            break
            
        current_err = compute_err_for_points(b, m, points)
        print("Gradient descent partial with new b = {}, new m = {} and error = {} at iteration {}." \
                                    .format(b, m, current_err, i+1))
        i += 1

    return [b, m]

For b ~ -39 and m ~ 5, we set the `learning_rate = 0.00335` and the `num_iterations = 14000`. With this setup, we get b ~ -38.99 and m ~ 5.57.

With a `learning_rate = 0.0008` and `num_iterations = 14000`, we get b ~ -25.78 and m ~ 4.79 with an error of ~ 0.7. By using a tolerance (`acceptable_err = 0.00005`), we get b ~ -14.37 and m ~ 4.11, with only 6018 iterations.

In [214]:
def run():
    points = genfromtxt('income.csv', delimiter=',')
    #hyperparameters: 
    #too short --> too slow to converge
    #too great --> never converge
    learning_rate = 0.0008
    #y = mx + b (slope formula)
    initial_b = 0
    initial_m = 0
    num_iterations = 14000
    acceptable_err = 0.00005
    initial_err = compute_err_for_points(initial_b, initial_m, points)
    print("Starting gradient descent with b = {}, m = {} and error = {}".format(initial_b, initial_m, initial_err))
    [b, m] = gradient_descent_runner(points, initial_b, initial_m, learning_rate, num_iterations, acceptable_err)
    print("Gradient descent finished with b = {}, m = {} and error = {}".format(b, m, compute_err_for_points(b, m, points)))
   

In [221]:
from numpy import *

if __name__ == '__main__':
    run()

Starting gradient descent with b = 0, m = 0 and error = 23.689789509379533
1.40095900009
Gradient descent partial with new b = 0.08023275089091605, new m = 1.3986596532450708 and error = 5.285290108351334 at iteration 1.
2.19719720119
Gradient descent partial with new b = 0.12453144225733284, new m = 2.1936653028179314 and error = 0.7046141727289403 at iteration 2.
2.64974347798
Gradient descent partial with new b = 0.14840711108849813, new m = 2.6455842130784433 and error = 9.933393494608196e-05 at iteration 3.
2.90695366774
Gradient descent partial with new b = 0.16067545474686445, new m = 2.9025097802786615 and error = 0.21284659454558857 at iteration 4.
3.0531458512
Gradient descent partial with new b = 0.1663468745350518, new m = 3.0486108813778547 and error = 0.5316325327633656 at iteration 5.
3.13624222346
Gradient descent partial with new b = 0.1682690318634387, new m = 3.131724894867604 and error = 0.7765610690574505 at iteration 6.
3.18347936176
Gradient descent partial with 

3.43722364954
Gradient descent partial with new b = -0.9368195123579302, new m = 3.3070947398293824 and error = 0.9992930251974126 at iteration 380.
3.43819114325
Gradient descent partial with new b = -0.9397494755868736, new m = 3.3072691545555926 and error = 0.998860226386675 at iteration 381.
3.43916079593
Gradient descent partial with new b = -0.9426792158916417, new m = 3.307443556011582 and error = 0.9984275542383941 at iteration 382.
3.44013260518
Gradient descent partial with new b = -0.9456087332891955, new m = 3.30761794419836 and error = 0.9979950087286756 at iteration 383.
3.44110656862
Gradient descent partial with new b = -0.9485380277964948, new m = 3.3077923191169365 and error = 0.9975625898336212 at iteration 384.
3.44208268384
Gradient descent partial with new b = -0.9514670994304979, new m = 3.307966680768321 and error = 0.9971302975293416 at iteration 385.
3.44306094845
Gradient descent partial with new b = -0.9543959482081621, new m = 3.3081410291535223 and error =

5.07242024596
Gradient descent partial with new b = -3.6984089172660317, new m = 3.471486516218723 and error = 0.6329609980205145 at iteration 1358.
5.07451445209
Gradient descent partial with new b = -3.701128766922689, new m = 3.4716484233131575 and error = 0.6326412535751067 at iteration 1359.
5.07660909726
Gradient descent partial with new b = -3.7038484096415134, new m = 3.471810318089006 and error = 0.6323216142289303 at iteration 1360.
5.07870418062
Gradient descent partial with new b = -3.7065678454382494, new m = 3.4719722005472047 and error = 0.632002079961693 at iteration 1361.
5.08079970129
Gradient descent partial with new b = -3.7092870743286404, new m = 3.4721340706886914 and error = 0.6316826507531142 at iteration 1362.
5.08289565842
Gradient descent partial with new b = -3.712006096328429, new m = 3.472295928514403 and error = 0.6313633265829127 at iteration 1363.
5.08499205114
Gradient descent partial with new b = -3.7147249114533563, new m = 3.4724577740252767 and er

7.30288396698
Gradient descent partial with new b = -6.337619785678296, new m = 3.628593348303627 and error = 0.3606881736434296 at iteration 2366.
7.30514465099
Gradient descent partial with new b = -6.3401388328468675, new m = 3.6287433020361095 and error = 0.36046463148640695 at iteration 2367.
7.30740533504
Gradient descent partial with new b = -6.342657688355521, new m = 3.6288932443594684 and error = 0.3602411756228774 at iteration 2368.
7.30966601896
Gradient descent partial with new b = -6.3451763522188385, new m = 3.629043175274572 and error = 0.36001780603573336 at iteration 2369.
7.31192670256
Gradient descent partial with new b = -6.347694824451401, new m = 3.629193094782288 and error = 0.35979452270786977 at iteration 2370.
7.31418738565
Gradient descent partial with new b = -6.350213105067789, new m = 3.629343002883485 and error = 0.35957132562218247 at iteration 2371.
7.31644806804
Gradient descent partial with new b = -6.352731194082582, new m = 3.6294928995790303 and e

9.51022092982
Gradient descent partial with new b = -8.730609139520762, new m = 3.7710431165638125 and error = 0.17956544042647488 at iteration 3352.
9.51242152788
Gradient descent partial with new b = -8.732946117790647, new m = 3.7711822321066544 and error = 0.1794191209814636 at iteration 3353.
9.51462202556
Gradient descent partial with new b = -8.735282918253196, new m = 3.7713213370649887 and error = 0.17927287230098132 at iteration 3354.
9.51682242284
Gradient descent partial with new b = -8.737619540921939, new m = 3.7714604314396207 and error = 0.1791266943705681 at iteration 3355.
9.51902271965
Gradient descent partial with new b = -8.739955985810402, new m = 3.771599515231356 and error = 0.17898058717577137 at iteration 3356.
9.52122291595
Gradient descent partial with new b = -8.742292252932112, new m = 3.7717385884409995 and error = 0.17883455070213652 at iteration 3357.
9.52342301169
Gradient descent partial with new b = -8.744628342300594, new m = 3.771877651069356 and e

Gradient descent partial with new b = -10.97231392760317, new m = 3.90448722364157 and error = 0.06662009654742167 at iteration 4348.
11.6484010257
Gradient descent partial with new b = -10.974480347353312, new m = 3.904616186184118 and error = 0.06653748608926392 at iteration 4349.
11.6504851929
Gradient descent partial with new b = -10.976646602272945, new m = 3.9047451389146426 and error = 0.0664549331618342 at iteration 4350.
11.652569233
Gradient descent partial with new b = -10.978812692374607, new m = 3.9048740818338903 and error = 0.06637243775295824 at iteration 4351.
11.6546531458
Gradient descent partial with new b = -10.98097861767084, new m = 3.905003014942607 and error = 0.0662899998504635 at iteration 4352.
11.6567369314
Gradient descent partial with new b = -10.983144378174181, new m = 3.90513193824154 and error = 0.06620761944217887 at iteration 4353.
11.6588205897
Gradient descent partial with new b = -10.98530997389717, new m = 3.905260851731435 and error = 0.0661252

Gradient descent partial with new b = -13.05442995819356, new m = 4.028431337965426 and error = 0.010869732620448267 at iteration 5346.
13.6638133169
Gradient descent partial with new b = -13.05643796162145, new m = 4.028550870307895 and error = 0.010838816198191123 at iteration 5347.
13.6657671637
Gradient descent partial with new b = -13.058445812271822, new m = 4.028670393555831 and error = 0.010807946153082105 at iteration 5348.
13.6677208785
Gradient descent partial with new b = -13.0604535101563, new m = 4.028789907709925 and error = 0.010777122474893987 at iteration 5349.
13.6696744613
Gradient descent partial with new b = -13.062461055286509, new m = 4.028909412770869 and error = 0.01074634515340092 at iteration 5350.
13.6716279121
Gradient descent partial with new b = -13.064468447674068, new m = 4.029028908739355 and error = 0.010715614178379376 at iteration 5351.
13.673581231
Gradient descent partial with new b = -13.066475687330602, new m = 4.029148395616075 and error = 0.0