# Chapter 3: Walking the Gradient

## Gradient Descent

In [1]:
import numpy as np

In [2]:
def predict(X, w, b):
    return X * w + b

In [3]:
def loss(X, Y, w, b):
    return np.average((predict(X, w, b) - Y) ** 2)

In [4]:
def gradient(X, Y, w):
    return 2 * np.average(X * (predict(X, w, 0) - Y))

In [5]:
def train(X, Y, iterations, lr):
    w=0
    for i in range(iterations):
        print("Iteration %4d => Loss: %.10f" % (i, loss(X, Y, w, 0)))
        w -= gradient(X, Y, w) * lr
    return w

In [6]:
X, Y = np.loadtxt("pizza.txt", skiprows=1, unpack=True)
w = train(X, Y, iterations=100, lr=0.001)
print("\nw=%.10f" % w)

Iteration    0 => Loss: 812.8666666667
Iteration    1 => Loss: 304.3630879787
Iteration    2 => Loss: 143.5265791020
Iteration    3 => Loss: 92.6549949641
Iteration    4 => Loss: 76.5646303400
Iteration    5 => Loss: 71.4753484132
Iteration    6 => Loss: 69.8656402969
Iteration    7 => Loss: 69.3564996643
Iteration    8 => Loss: 69.1954616593
Iteration    9 => Loss: 69.1445263431
Iteration   10 => Loss: 69.1284158205
Iteration   11 => Loss: 69.1233201627
Iteration   12 => Loss: 69.1217084379
Iteration   13 => Loss: 69.1211986595
Iteration   14 => Loss: 69.1210374197
Iteration   15 => Loss: 69.1209864206
Iteration   16 => Loss: 69.1209702899
Iteration   17 => Loss: 69.1209651878
Iteration   18 => Loss: 69.1209635741
Iteration   19 => Loss: 69.1209630637
Iteration   20 => Loss: 69.1209629022
Iteration   21 => Loss: 69.1209628512
Iteration   22 => Loss: 69.1209628350
Iteration   23 => Loss: 69.1209628299
Iteration   24 => Loss: 69.1209628283
Iteration   25 => Loss: 69.1209628278
Iteration

## Putting Gradient Descent to the Test

In [7]:
def gradient(X, Y, w, b):
    w_gradient = 2 * np.average(X * (predict(X, w, b) - Y))
    b_gradient = 2 * np.average(predict(X, w, b) - Y)
    return (w_gradient, b_gradient)

In [8]:
def train_without_gd(X, Y, iterations, lr):
    w = b = 0
    for i in range(iterations):
        current_loss = loss(X, Y, w, b)
        print("Iteration %4d => Loss: %.6f" % (i, current_loss))

        if loss(X, Y, w + lr, b) < current_loss:
            w += lr
        elif loss(X, Y, w - lr, b) < current_loss:
            w -= lr
        elif loss(X, Y, w, b + lr) < current_loss:
            b += lr
        elif loss(X, Y, w, b - lr) < current_loss:
            b -= lr
        else:
            return w, b

    raise Exception("Couldn't converge within %d iterations" % iterations)

In [9]:
# Train without GD
w, b = train_without_gd(X, Y, iterations=10000000, lr=0.0001)
print("\nw=%.3f, b=%.3f" % (w, b))
print("Prediction: x=%d => y=%.2f" % (20, predict(20, w, b)))

Iteration    0 => Loss: 812.866667
Iteration    1 => Loss: 812.785989
Iteration    2 => Loss: 812.705315
Iteration    3 => Loss: 812.624646
Iteration    4 => Loss: 812.543982
Iteration    5 => Loss: 812.463321
Iteration    6 => Loss: 812.382665
Iteration    7 => Loss: 812.302014
Iteration    8 => Loss: 812.221367
Iteration    9 => Loss: 812.140724
Iteration   10 => Loss: 812.060085
Iteration   11 => Loss: 811.979451
Iteration   12 => Loss: 811.898822
Iteration   13 => Loss: 811.818196
Iteration   14 => Loss: 811.737576
Iteration   15 => Loss: 811.656959
Iteration   16 => Loss: 811.576347
Iteration   17 => Loss: 811.495739
Iteration   18 => Loss: 811.415136
Iteration   19 => Loss: 811.334537
Iteration   20 => Loss: 811.253942
Iteration   21 => Loss: 811.173352
Iteration   22 => Loss: 811.092766
Iteration   23 => Loss: 811.012184
Iteration   24 => Loss: 810.931607
Iteration   25 => Loss: 810.851034
Iteration   26 => Loss: 810.770466
Iteration   27 => Loss: 810.689902
Iteration   28 => Lo

In [10]:
def train(X, Y, iterations, lr):
    w = b = 0
    for i in range(iterations):
        print("Iteration %4d => Loss: %.10f" % (i, loss(X, Y, w, b)))
        w_gradient, b_gradient = gradient(X, Y, w, b)
        w -= w_gradient * lr
        b -= b_gradient * lr
    return w, b

In [11]:
# Train with GD
w, b = train(X, Y, iterations=20000, lr=0.001)
print("\nw=%.10f, b=%.10f" % (w, b))
print("Prediction: x=%d => y=%.2f" % (20, predict(20, w, b)))

Iteration    0 => Loss: 812.8666666667
Iteration    1 => Loss: 302.5769561564
Iteration    2 => Loss: 141.9840903267
Iteration    3 => Loss: 91.4213766211
Iteration    4 => Loss: 75.4790576522
Iteration    5 => Loss: 70.4298834852
Iteration    6 => Loss: 68.8082102709
Iteration    7 => Loss: 68.2650157314
Iteration    8 => Loss: 68.0611933727
Iteration    9 => Loss: 67.9641875162
Iteration   10 => Loss: 67.9008258073
Iteration   11 => Loss: 67.8480849450
Iteration   12 => Loss: 67.7987207420
Iteration   13 => Loss: 67.7504538190
Iteration   14 => Loss: 67.7025669451
Iteration   15 => Loss: 67.6548344110
Iteration   16 => Loss: 67.6071851614
Iteration   17 => Loss: 67.5595968023
Iteration   18 => Loss: 67.5120622506
Iteration   19 => Loss: 67.4645792410
Iteration   20 => Loss: 67.4171470238
Iteration   21 => Loss: 67.3697653264
Iteration   22 => Loss: 67.3224340264
Iteration   23 => Loss: 67.2751530484
Iteration   24 => Loss: 67.2279223323
Iteration   25 => Loss: 67.1807418223
Iteration