In [11]:
# Setup
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sea # Used to make prettier graphs

# Load data
X, Y = np.loadtxt("./pizza_data.txt", skiprows=1, unpack=True)


sea.set()
# plt.axis([0, 50, 0 , 50])
# plt.xticks(fontsize= 14)
# plt.yticks(fontsize= 14)
# plt.xlabel("Reservations", fontsize=14)
# plt.ylabel("Pizzas", fontsize=14)


# Gradient Descent
## Quick reminder
We are looking to minimize the loss in our algorithm. Imagine the loss function, graphed as a parabola. If we need to increase the weight to decrease the loss, we have a __negative__ slope for that point of the derivative of the graph. Vice versa for the opposite side of the parabola valley, where we need to decrease the weight to meet the minimum loss.

## Math of gradient descent
The following is the squared error loss:

  L = (1/m) * Sum[i=0, i => m] of function ((w*xsubi + b ) - ysubi)^2

The derivative of the function to calculate size and direction of the gradient is as follows:

  (2/m) * Sum[i=0, i => m] of function xsubi * ((w*xsubi + b ) - ysubi)

In [12]:

def predict(X, w, b):
  return X * w + b


def loss(X, Y, w, b):
  return np.average((predict(X, w, b) - Y) ** 2)


# Our new gradient function, but currently b is set to 0
def gradient(X, Y, w):
  return 2 * np.average(X * (predict(X, w, 0) - Y))

# Our new train function to do gradient descent
# This will 'walk' the algorithm in the proper direction to minimize the loss without using case specific if statements.
# Again, more iterations means closer to 0 loss, but akes much longer
def train(X, Y, iterations, lr, verbose = False):
  w = 0 
  for i in range(iterations):
    if(verbose):
      print("Iteration %4d => Loss: %.10f" % (i, loss(X, Y, w, 0)))

    w -= gradient(X, Y, w) * lr

  return w

In [13]:
# Let's test out the new training function to find a good weight
w = train(X, Y, 100, 0.001)
print("\nWeight = %.10f" % w)


Weight = 1.8436928702
