# Description and Details:
1. Load house sales data
2. write a function get_numpy_data to convert into numpy 
3. write a function predict_output to compute predictions
4. write a function feature_derivative to compute derivative
5. Implement gradient descent algorithm from scratch


In [35]:
import pandas as pd
import numpy as np
import math

In [36]:
print(pd.__version__)

1.0.5


## Load in house sales data

In [37]:
sales = pd.read_csv('kc_house_data.csv')
train_data = pd.read_csv('kc_house_train_data.csv')
train_data = pd.read_csv('kc_house_test_data.csv')

## Convert input to numpy array

Now we will write a function that will accept an SFrame, a list of feature names (e.g. ['sqft_living', 'bedrooms']) and an target feature e.g. ('price') and will return two things:
* A numpy matrix whose columns are the desired features plus a constant column (this is how we create an 'intercept')
* A numpy array containing the values of the output

In [38]:
def get_numpy_data(data, features, output):
    data['constant'] = 1 # add a constant column  
    features = ['constant'] + features # combine two lists

    feature_matrix = data[features].to_numpy()
    output_array = data[output].to_numpy()
    
    return(feature_matrix, output_array)

In [39]:
# test the function:
(example_features, example_output) = get_numpy_data(sales, ['sqft_living'], 'price') 
print (example_features[0,:]) # this accesses the first row of the data the ':' indicates 'all columns'
print (example_output[0]) # and the corresponding output

[   1 1180]
221900.0


## Predicting output given regression weights

In [40]:
# test:
my_weights = np.array([1., 1.]) # the example weights
my_features = example_features[0,] # we'll use the first data point
predicted_value = np.dot(my_features, my_weights)
print (predicted_value)

1181.0


In [41]:
def predict_output(feature_matrix, weights):
    predictions = np.dot(feature_matrix, weights)
    return(predictions)

In [42]:
# test:
test_predictions = predict_output(example_features, my_weights)
print (test_predictions[0]) # should be 1181.0
print (test_predictions[1]) # should be 2571.0

1181.0
2571.0


## Computing the Derivative

In [43]:
def feature_derivative(errors, feature):
    derivative =  (-2) * np.dot(errors, feature)
    return(derivative)

In [44]:
# test feature_derivative function:

(example_features, example_output) = get_numpy_data(sales, ['sqft_living'], 'price') 
my_weights = np.array([0., 0.]) # this makes all the predictions 0
test_predictions = predict_output(example_features, my_weights) 

errors = example_output - test_predictions
feature = example_features[:,0] 
derivative = feature_derivative(errors, feature) # compute derivative wrt constant
print (derivative)
print (-2 * np.sum(example_output))

-23345850016.0
-23345850016.0


## Gradient Descent Algorithm

In [45]:
def regression_gradient_descent(feature_matrix, output, initial_weights, step_size, tolerance):
    converged = False 
    weights = np.array(initial_weights) # make sure it's a numpy array
    while not converged:
        predictions = predict_output(feature_matrix, weights)   
        errors = output - predictions
        gradient_sum_squares = 0 # initialize the gradient sum of squares

        for i in range(len(weights)): 
            derivative = feature_derivative(errors, feature_matrix[:,i])
            weights[i] = weights[i] - step_size * derivative           
            gradient_sum_squares += derivative**2 # get the sum of squared derivatives (for assessing convergence)

        gradient_magnitude = math.sqrt(gradient_sum_squares)                                                                    
        if gradient_magnitude < tolerance:
            converged = True
    return(weights)

## Running the Gradient Descent as Simple Regression

In [46]:
# test the function:
simple_features = ['sqft_living']
my_output = 'price'
(simple_feature_matrix, output) = get_numpy_data(train_data, simple_features, my_output)
initial_weights = np.array([-47000., 1.])
step_size = 7e-12
tolerance = 2.5e7

In [47]:
simple_weights = regression_gradient_descent(simple_feature_matrix, output,initial_weights, step_size,tolerance)
print (simple_weights[1])

282.3594538991383


### compare with lm in sklearn

In [48]:
from sklearn import linear_model
lm = linear_model.LinearRegression()
model_1 = lm.fit(train_data['sqft_living'].values.reshape(-1,1), 
                 train_data['price'])
print(model_1.coef_)

[274.93662162]


## Running a multiple regression

In [49]:
model_features = ['sqft_living', 'sqft_living15'] # sqft_living15 is the average squarefeet for the nearest 15 neighbors. 
my_output = 'price'
(feature_matrix, output) = get_numpy_data(train_data, model_features, my_output)
initial_weights = np.array([-100000., 1., 1.])
step_size = 4e-12
tolerance = 1e9

In [50]:
multiple_weights = regression_gradient_descent(feature_matrix, output,initial_weights, step_size,tolerance)
multiple_weights

array([-9.99999374e+04,  2.29212241e+02,  8.33261573e+01])