In [1]:
import graphlab

graphlab.canvas.set_target('ipynb')
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

[INFO] graphlab.cython.cy_server: GraphLab Create v2.1 started. Logging: /tmp/graphlab_server_1541679759.log
INFO:graphlab.cython.cy_server:GraphLab Create v2.1 started. Logging: /tmp/graphlab_server_1541679759.log


This non-commercial license of GraphLab Create for academic use is assigned to gaurav.agrawal@zs.com and will expire on October 05, 2019.


In [2]:
sales = graphlab.SFrame('kc_house_data.gl/')

In [3]:
def get_numpy_data(data_sframe, features, output):
    data_sframe['constant'] = 1
    features = ['constant'] + features
    features_matrix = data_sframe[features].to_numpy()
    output_array = data_sframe[output].to_numpy()
    return(features_matrix, output_array)

In [4]:
data_sframe = sales
features = ['sqft_living']
output = 'price'

In [5]:
def predict_outcome(feature_matrix, weights):
    predictions = np.dot(feature_matrix, weights)
    return(predictions)

In [6]:
def feature_derivative(errors, feature):
    derivative = 2*np.dot(feature,errors)
    return(derivative)

In [7]:
def regression_gradient_descent(feature_matrix, output, initial_weights, step_size, tolerance):
    converged = False
    weights = np.array(initial_weights)
    while not converged:
        # compute the predictions based on feature_matrix and weights:
        predictions = predict_outcome(feature_matrix, weights)
        #print len(predictions), len(output)
        # compute the errors as predictions - output:
        error = predictions - output
        
        gradient_sum_squares = 0 # initialize the gradient
        # while not converged, update each weight individually:   
        for i in range(len(weights)):          
            # Recall that feature_matrix[:, i] is the feature column associated with weights[i]           
            # compute the derivative for weight[i]:
            #print error.shape, feature_matrix.shape
            derivative = feature_derivative(error, feature_matrix[:,i])
            # add the squared derivative to the gradient magnitude
            gradient_sum_squares  = gradient_sum_squares + derivative*derivative
            # update the weight based on step size and derivative:
            weights[i] = weights[i] - step_size*derivative
        gradient_magnitude = np.sqrt(gradient_sum_squares)
        if gradient_magnitude < tolerance:
            converged = True
    return(weights)

In [8]:
train_data,test_data = sales.random_split(.8,seed=0)

In [9]:
simple_features = ['sqft_living']
my_output= 'price'
(simple_feature_matrix, output) = get_numpy_data(train_data, simple_features, my_output)
initial_weights = np.array([-47000., 1.])
step_size = 7e-12
tolerance = 2.5e7

In [10]:
simple_weights = regression_gradient_descent(simple_feature_matrix, output,initial_weights, step_size,tolerance)

In [11]:
simple_weights

array([-46999.88716555,    281.91211912])

# Quiz Time

In [12]:
test_simple_feature_matrix, test_output = get_numpy_data(test_data, simple_features, my_output)

In [13]:
simple_test_predictions = predict_outcome(test_simple_feature_matrix,simple_weights)

In [14]:
simple_test_predictions[0]

356134.44317092974

In [15]:
def residual_sum_of_squares(predictions, actuals):
    diff = predictions - actuals
    RSS = (diff*diff).sum()
    return(RSS)

In [16]:
residual_sum_of_squares(simple_test_predictions, test_output)

275400047593155.94

In [17]:
model_features = ['sqft_living', 'sqft_living15']
my_output = 'price'
(feature_matrix, output) = get_numpy_data(train_data, model_features,my_output)
initial_weights = np.array([-100000., 1., 1.])
step_size = 4e-12
tolerance = 1e9

In [18]:
complex_weights = regression_gradient_descent(feature_matrix, output, initial_weights, step_size, tolerance)

In [19]:
complex_weights

array([ -9.99999688e+04,   2.45072603e+02,   6.52795277e+01])

In [20]:
test_complex_feature_matrix, test_output = get_numpy_data(test_data, model_features, my_output)

In [21]:
complex_test_predictions = predict_outcome(test_complex_feature_matrix, complex_weights)

In [22]:
complex_test_predictions[0]

366651.41203655908

In [23]:
test_data[0]['price']

310000.0

In [24]:
residual_sum_of_squares(complex_test_predictions,test_output) - residual_sum_of_squares(simple_test_predictions, test_output)

-5136601127911.875