# Simple Linear Regression - Gradient Descent

Jeff Smith
<br>10/17/2018

In [None]:
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.stats

## Notation
w[] is the model
- w[0] - intercept
- w[1] - slope

In [None]:
def cost(y, x, w) :
    return sum((y-(w[0] + w[1]*x))**2)

# from https://stackoverflow.com/questions/17784587/gradient-descent-using-python-and-numpy
def gen_problem (n = 10, bias = 10, variance = 5):
    x = np.zeros(shape = n)
    y = np.zeros(shape = n)
    w = np.zeros(shape = 2)
    for i in range(n):
        x[i] = i
        y[i] = (i + bias) + np.random.uniform(0, 1)*variance
    return (y, x, w)

def show(y, x, w, show_reg = 1) :
    print("Solution: RSS={:,.3f}; w = [{:.4f}, {:.4f}]".format(cost(y, x, w), w[0], w[1]))
    plt.scatter(x, y)
    if show_reg:
        axes = plt.gca()
        x_vals = np.array(axes.get_xlim())
        y_vals = w[0] + w[1] * x_vals
        plt.plot(x_vals, y_vals, '--')

In [None]:
def gradient_descent(y, x, w, eta = 0.01, iters = 100, tol = 0.001, show = 0) :
    # assumes starting w values are passed - could set these as [0, 0 ] here
    # but this method lets you guess values if you want.
    if show:
        print("Starting Cost: {:,.3f}, w = [{:.3f}, {:.3f}]".format(
            cost(y, x, w), w[0], w[1]))
    for i in range(iters):
        diff = y - (w[0] + w[1]*x)
        pw0 = -2*sum(diff)
        pw1 = -2*sum(x*diff)
        # magnitude of the gradient vector
        mag = np.sqrt(pw0**2 + pw1**2)
        # normalize the gradients
        pw0 /= mag
        pw1 /= mag
        # are we close enough?
        if mag < tol:
            break
        w[0] = w[0] - eta*pw0
        w[1] = w[1] - eta*pw1
        if show:
            print("Iter {:3d} Cost: {:,.3f}, w = [{:.3f}, {:.3f}], pw = [{:.3f}, {:.3f}], mag = {:,.2f}".format(
                i + 1, cost(y, x, w), w[0], w[1],pw0, pw1,mag))
    return (w, cost(y, x, w), i, mag)

In [None]:
y, x, w = gen_problem(37, 50, 20)
show(y, x, w, 0)

In [None]:
# Scipy's regression function
w1, w0, rval, pval, stderr = scipy.stats.linregress(x, y)
print("Intercept: {:.2f}; Slope: {:.2f}; r-value: {:.2f}; p-value: {:.2f}, RSS: {:.2f}".format(
    w0, w1, rval,pval, cost(y, x, [w0, w1])))

In [None]:
# Our gradient descent function
w = [0,0]
w, rss, iters, mag = gradient_descent(y, x, w, 0.01, 10000, 1, 0)
show(y, x, w)
iters+1, mag

In [None]:
# Poverty dataset from https://newonlinecourses.science.psu.edu/stat462/node/101/
# Read the raw data file
poverty = pd.read_csv("../data/teen_birthrate_poverty.csv")

In [None]:
# linregress version
# Poverty
w = [0,0]
w[1], w[0], rval, pval, stderr = scipy.stats.linregress(poverty.PovPct, poverty.Brth15to17)
print("Intercept: {:.2f}; Slope: {:.2f}; r-Square: {:.2f}; p-value: {:.2f}; se grad.: {:.2f}".
      format(w[0], w[1], rval**2,pval, stderr))
show(poverty.Brth15to17, poverty.PovPct, w)

In [None]:
w = [0,0]
w, rss, iters, mag = gradient_descent(poverty.Brth15to17, poverty.PovPct, w, .001, 5000, 0.5, 0)
show(poverty.Brth15to17, poverty.PovPct, w)

In [None]:
# Cereals
# Cerals dataset from Larose and Larose
# Read the raw data file
cereals = pd.read_csv("../data/cereals.csv")
# The Name field has some trailing spaces -- remove them
cereals.Name = cereals.Name.str.strip()
# Get rid of Quaker Oatmeal -- no Sugar values (see the book)
cereals = cereals[cereals.Name != 'Quaker_Oatmeal']
w[1], w[0], rval, pval, stderr = scipy.stats.linregress(cereals.Sugars, cereals.Rating)
print("Intercept: {:.2f}; Slope: {:.2f}; r-Square: {:.2f}; p-value: {:.2f}; se grad.: {:.2f}".
      format(w[0], w[1], rval**2,pval, stderr))
show(cereals.Rating, cereals.Sugars, w)

In [None]:
w = [0,0]
x = cereals.Sugars
y = cereals.Rating
w, rss, iters, mag = gradient_descent(y,x, w, .05, 5000, 0.01, 0)
show(y,x, w)