In [1]:
def predict(alpha, beta, x_i):
    return beta * x_i + alpha

In [2]:
def error(alpha, beta, x_i, y_i):
    """The error from predicting beta * x_i + alpha when the actual value is y_i"""
    return y_i - predict(alpha, beta, x_i)

In [5]:
def sum_of_squared_errors(alpha, beta, x, y):
    return sum(error(alpha, beta, x_i, y_i) ** 2
               for x_i, y_i in zip(x, y))

In [15]:
import math

def dot(v, w):
    return sum(v_i * w_i
               for v_i, w_i in zip(v, w))

def sum_of_squares(v):
    return dot(v, v)

def vector_subtract(v, w):
    return [v_i - w_i 
            for v_i, w_i in zip(v, w)]

def squared_distance(v, w):
    return sum_of_squares(vector_subtract(v, w))

def magnitude(v):
    return math.sqrt(sum_of_squares(v))

def distance(v, w):
    return math.sqrt()

def shape(A):
    num_rows = len(A)
    num_cols = len(A[0]) if A else 0
    return num_rows, num_cols

def mean(x):
    return sum(x) / len(x)

def de_mean(x):
    x_bar = mean(x)
    return [x_i - x_bar 
            for x_i in x]

def variance(x):
    n = len(x)
    deviations = de_mean(x)
    return sum_of_squares(deviations) / (n - 1)

def standard_deviation(x):
    return math.sqrt(variance(x))

def covariance(x, y):
    n = len(x)
    return dot(de_mean(x), de_mean(y)) / (n - 1)

def correlation(x, y):
    stdev_x = standard_deviation(x)
    stdev_y = standard_deviation(y)
    if stdev_x > 0 and stdev_y > 0:
        return covariance(x, y) / stdev_x / stdev_y
    return 0 # If no covariance, correlation is 0.

In [16]:
def least_squares_fit(x, y):
    """Given training values for x and y, find the least-squares values of alpha and beta"""
    beta = correlation(x, y) * standard_deviation(y) / standard_deviation(x)
    alpha = mean(y) - beta * mean(x)
    return alpha, beta

In [18]:
# alpha, beta = least_squares_fit(num_friends_good, daily_minutes_good)
# # alpha = 22.95 and beta = 0.903, which means a user with n friends will spend (22.95 + n * 0.903 minutes) minutes on the site each day.

NameError: name 'num_friends_good' is not defined

In [19]:
def total_sum_of_square(y):
    """The total squared variation of y_i's from their mean."""
    return sum(v ** 2 for v in de_mean(y))

In [20]:
def r_squared(alpha, beta, x, y):
    """The fraction of variation in y captured by the model, which equals 1 - the fraction of variation 
    in y not captured by the model."""
    return 1.0 - (sum_of_squared_errors(alpha, beta, x, y) /
                  total_sum_of_squares(y))

In [21]:
r_squared(alpha, beta, num_friends_good, daily_minutes_good) # 0.329

NameError: name 'alpha' is not defined

## Using gradient descent

In [22]:
def squared_error(x_i, y_i, theta):
    alpha, beta = theta
    return error(alpha, beta, x_i, y_i) ** 2

In [23]:
def squared_error_gradient(x_i, y_i, theta):
    alpha, beta = theta
    return [-2 * error(alpha, beta, x_i, y_i), # alpha partial derivative.
            -2 * error(alpha, beta, x_i, y_i) * x_i] # beta partial derivative.

In [24]:
import random

random.seed(0)
theta = [random.random(), random.random()]
alpha, beta = minimize_stochastic(squared_error, 
                                  squared_error_gradient,
                                  num_friends_good,
                                  daily_minutes_good,
                                  theta,
                                  0.000_1)
alpha, beta

NameError: name 'minimize_stochastic' is not defined