In [2]:
import numpy as np

In [3]:
import matplotlib.pyplot as plt
import pandas as pd

In [4]:
def compute_cost(x, y, w, b):
    """
    Computes the cost function for linear regression.
    
    Args:
      x (ndarray (m,)): Data, m examples 
      y (ndarray (m,)): target values
      w,b (scalar)    : model parameters  
    
    Returns
        total_cost (float): The cost of using w,b as the parameters for linear regression
               to fit the data points in x and y
    """
    # number of training examples
    m = x.shape[0]

    cost_sum = 0
    for i in range(m):
        f_wb = w * x[i] + b
        cost = (f_wb - y[i]) ** 2
        cost_sum = cost_sum + cost
    total_cost = (1 / (2 * m)) * cost_sum

    return total_cost

In [5]:
def compute_gradient(x, y, w, b):
    """
    Computes the gradient for linear regression 
    Args:
      x (ndarray (m,)): Data, m examples 
      y (ndarray (m,)): target values
      w,b (scalar)    : model parameters  
    Returns
      dj_dw (scalar): The gradient of the cost w.r.t. the parameters w
      dj_db (scalar): The gradient of the cost w.r.t. the parameter b     
     """

    # Number of training examples
    m = x.shape[0]
    dj_dw = 0
    dj_db = 0

    for i in range(m):
        f_wb = w * x[i] + b
        dj_dw_i = (f_wb - y[i]) * x[i]
        dj_db_i = f_wb - y[i]
        dj_db += dj_db_i
        dj_dw += dj_dw_i
    dj_dw = dj_dw / m
    dj_db = dj_db / m

    return dj_dw, dj_db

In [7]:
data = pd.read_csv("SAT_by_Year_Gender_1967_2001.csv")
print(data.head())

   Year  M_verbal  F_verbal  M_math  F_math  A_verbal  A_math  M_averages  \
0  1967       540       545     535     495       543     516         538   
1  1968       541       543     533     497       543     516         537   
2  1969       536       543     534     498       540     517         535   
3  1970       536       538     531     493       537     512         534   
4  1971       531       534     529     494       532     513         530   

   F_averages  A_averages  
0         520         529  
1         520         528  
2         520         528  
3         516         524  
4         514         522  


In [9]:
x = data['Year']
y = data["M_averages"]

In [12]:
X = np.array(x)
Y = np.array(y)
print(X)
print(Y)

[1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980
 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994
 1995 1996 1997 1998 1999 2000 2001]
[538 537 535 534 530 529 524 524 516 516 514 514 512 510 512 512 512 514
 518 519 518 516 516 513 512 512 514 512 515 517 518 520 520 520 521]


In [17]:
m = 0
b = 0
alpha = 0.01
iterations = 1000
v,f= compute_gradient(X, Y, m, b)
print(v)
print(f)

-1029596.7714285714
-518.9714285714285
