In [1]:
# note: datahub.io has good datasets

In [2]:
# imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import copy, math
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [3]:
# load the data
df = pd.read_csv('data/houses.csv')

# for cars.csv:
# target = df['price']
# predictors = df.drop('price', axis = 1)
# numeric_predictors = feature_values.select_dtypes(exclude=['object'])

print(df.dtypes)

size(sqft)    float64
bedrooms      float64
floors        float64
age           float64
price         float64
dtype: object


In [4]:
# split the data
X_train, X_test, y_train, y_test = train_test_split(
    df[['age', 'bedrooms', 'floors', 'size(sqft)']], df[['price']], train_size=0.7, test_size=0.3)

In [5]:
# scale/normalize the data
scaler = StandardScaler()
X_norm = pd.DataFrame(scaler.fit_transform(X_train), columns=X_train.columns)
X_test_norm = pd.DataFrame(
    scaler.fit_transform(X_test), columns=X_test.columns)


In [6]:
# convert to numpy arrays for vectorized calculations
# X_norm = X_norm.to_numpy()
# X_test_norm = X_test_norm.to_numpy()
X_train = X_train.to_numpy()
X_test = X_test.to_numpy()
y_train = y_train.to_numpy()
y_test = y_test.to_numpy()

In [7]:
print(f'X_train shape: {X_train.shape}')
print(f'X_test.shape: {X_test.shape}')
print(f'y_train.shape: {y_train.shape}')
print(f'y_test.shape: {y_test.shape}')


X_train shape: (70, 4)
X_test.shape: (30, 4)
y_train.shape: (70, 1)
y_test.shape: (30, 1)


In [8]:
X_train = np.array([[2104, 5, 1, 45], [1416, 3, 2, 40], [852, 2, 1, 35]])
y_train = np.array([460, 232, 178])


In [9]:
# initial parameters
m, n = X_train.shape
# w_init = np.zeros(n)
# b_init = 0

b_init = 785.1811367994083
w_init = np.array([0.39133535, 18.75376741, -53.36032453, -26.42131618])

print(f"w_init shape: {w_init.shape}, b_init type: {type(b_init)}")


w_init shape: (4,), b_init type: <class 'float'>


In [10]:
def predict(X, w, b):
  '''
  Predict values using weights and inputs
  
  Args:
  X (ndarray (m,n)) : examples with multiple features
  w (ndarray (n,))  : model parameters
  b (scalar)        : model parameter

  Returns:
  p (scalar)        : prediction 
  '''

  p = np.dot(X, w) + b

  return p

In [11]:
predict(X_train, w_init, b_init)

array([459.99999762, 231.99999837, 177.99999899])

In [12]:
def compute_cost(X, y, w, b):
  '''
  Computes cost for a model using current parameters
  
  Args:
  X (ndarray (m,n)) : data, m examples with n features
  y (ndarray (m,))  : target values
  w (ndarray (n,))  : model parameters
  b (scalar)        : model parameter
  
  Returns:
  cost (scalar)     : cost
  '''

  m,_ = X.shape
  pred = predict(X, w, b)
  cost = ((pred - y)**2).sum()
  cost = cost / (2 * m)

  return cost

In [13]:
compute_cost(X_train, y_train, w_init, b_init)

1.5578904045996674e-12

In [23]:
def compute_gradient(X, y, w, b):
  '''
  Computes the gradient(slope of cost function)

  Args:
  X (ndarray (m,n))    : data, m examples with n features
  y (ndarray (m,))     : target values
  w (ndarray (n,))     : model parameters
  b (scalar)           : model parameter

  Returns:
  dj_dw (ndarray (n,)) : gradient of the cost w.r.t. the parameters w
  dj_db (scalar)       : gradient of the cost w.r.t. the parameter b
  '''

  m, n = X.shape

  err = predict(X, w, b) - y
  dj_dw = np.dot(err, X) / m
  dj_db = err.sum() / m

  return dj_dw, dj_db


In [24]:
# compute and display gradient
tmp_dj_dw, tmp_dj_db = compute_gradient(X_train, y_train, w_init, b_init)
print(f'dj_db at initial w,b: {tmp_dj_db}')
print(f'dj_dw at initial w,b: \n {tmp_dj_dw}')


dj_db at initial w,b: -1.6739251122999121e-06
dj_dw at initial w,b: 
 [-2.72623574e-03 -6.27197255e-06 -2.21745574e-06 -6.92403377e-05]


In [None]:
def gradient_descent(X, y, w_in, b_in, alpha, num_iters):
  '''
  Performs batch gradient descent to learn the optimal parameters w and b.
  Updates parameters by taking num_iters gradient steps with learning rate alpha

  Args:
  X (ndarray (n,m))   : data, m examples with n features
  y (ndarray (m,))    : target values
  w_in (ndarray (n,)) : initial model parameters
  b_in (scalar)       : initial model parameter
  alpha (float)       : learning rate
  num_iters (int)     : number of iterations to run gradient descent

  Returns:
  w (ndarray (n,))    : updated values of parameters
  b (scalar)          : updated value of parameter
  '''

  # store the cost j over each iteration
  J_history = []

  # avoid changing original w inside function
  w_copy = copy.deepcopy(w_in)
  b = b_in

  for i in range(num_iters):

    # calculate gradient
    dj_dw, dj_db = compute_gradient(X, y, w, b)

    # update parameters using the gradient at each iteration
    w -= alpha * dj_dw
    b -= alpha * dj_db

    # record cost at each iteration
    if i < 100000:  # to prevent resource exhaustion
      J_history.append(compute_cost(X, y, w, b))

    # display cost 10 times during gradient descent
    if i % math.ceil(num_iters/10) == 0:
      print(f'Iteration: {i}:   Cost: {J_history[-1]:8.2f}')

  return w, b, J_history

In [None]:
print(f'w, b found by gradient descent: {w_final}, {b_final:0.2f}')

for i in range(X_train.shape[1]):
    print(
        f'prediction: {predict(X_test_norm, w_final, b_final)[i]:0.2f}, target value: {y_test[i]}')
