In [1]:
from typing import List
from scratch.linear_algebra import dot, Vector
import random
import tqdm
from scratch.linear_algebra import vector_mean
from scratch.gradient_descent import gradient_step
from scratch.statistics import daily_minutes_good
from scratch.gradient_descent import gradient_step

In [2]:
def predict(x: Vector, beta: Vector) -> float:
    '''assumes that the first element of x is 1'''
    return dot(x, beta)

In [3]:
def error(x: Vector, y: float, beta: Vector) -> float:
        return predict(x, beta) - y

In [4]:
def squared_error(x: Vector, y: float, beta: Vector) -> float:
    return error(x, y, beta) ** 2

In [5]:
x = [1,2,3]
y = 30
beta = [4,4,4]

assert error(x,y,beta) == -6
assert squared_error(x,y,beta) == 36

In [6]:
def sqerror_gradient(x: Vector, y: float, beta: Vector) -> Vector:
    err=error(x,y,beta)
    return [2 * err * x_i for x_i in x]

assert sqerror_gradient(x,y,beta) == [-12,-24,-36]

In [7]:
def least_squares_fit(xs: List[Vector]
                     , ys: List[float]
                     , learning_rate: float = 0.001
                     , num_steps: int = 1000
                     , batch_size: int=1) -> Vector:
    '''
    Find the beta that minimizes the sum of squared errors assuming the model y=dpt(x, beta)
    '''
    #start with a random guess
    guess = [random.random() for _ in xs[0]]
    
    for _ in tqdm.trange(num_steps, desc="least squares fit"):
        for start in range(0, len(xs), batch_size):
            batch_xs = xs[start:start+batch_size]
            batch_ys = ys[start:start+batch_size]
            
            gradient = vector_mean([sqerror_gradient(x,y,guess)
                                   for x,y in zip(batch_xs, batch_ys)])
            guess = gradient_step(guess, gradient, -learning_rate)
            
    return guess

In [8]:
random.seed(0)
# I used trial and error to choose num_iters and step_size
# This will run for a while

learning_rate = 0.001

beta=least_squares_fit(inputs, daily_minutes_good, learning_rate, 5000, 25)
assert 30.50 < beta[0] < 30.70 #constants
assert 0.96  < beta[1] <  1.00 #num friends
assert -1.89 < beta[2] < -1.85 #work hours per day
assert 0.91  < beta[3] <  0.94 #has PhD

NameError: name 'inputs' is not defined