# Estimate Regression Coefficients (Gradient Descent)

In [2]:
import graphlab

In [3]:
import numpy as np

In [7]:
sales = graphlab.SFrame('kc_house_data.gl/')

In [8]:
def get_numpy_data(data_sframe, features, output):
    data_sframe['constant'] = 1 
    features = ['constant'] + features 
    features_sframe=data_sframe[features]
    feature_matrix = features_sframe.to_numpy()
    output_sarray = data_sframe[output]
    output_array = output_sarray.to_numpy()
    return(feature_matrix, output_array)

In [9]:
(example_features, example_output) = get_numpy_data(sales, ['sqft_living'], 'price') # the [] around 'sqft_living' makes it a list
print type(example_features[0,:][0]) # this accesses the first row of the data the ':' indicates 'all columns'
print example_output[0] # and the corresponding output
#print len(example_features[:,0])

<type 'numpy.float64'>
221900.0


# Predicting Outputs

In [10]:
my_weights = np.array([1., 1.]) # the example weights
my_features = example_features[0,] # we'll use the first data point
predicted_value = np.dot(my_features, my_weights)
print predicted_value

#weights = np.ones((len(example_features[:,0]),len(example_features[0,:])))


1181.0


In [11]:
def predict_output(feature_matrix, weights):
    # assume feature_matrix is a numpy matrix containing the features as columns and weights is a corresponding numpy array
    # create the predictions vector by using np.dot()
    predictions = np.dot(feature_matrix, weights)
    return(predictions)

In [12]:

my_weights = np.array([1, 1])
test_predictions = predict_output(example_features, my_weights)
print test_predictions[0] # should be 1181.0
print test_predictions[1] # should be 2571.0

1181.0
2571.0


# compute the derivative

In [13]:
def feature_derivative(errors, feature):
    # Assume that errors and feature are both numpy arrays of the same length (number of data points)
    # compute twice the dot product of these vectors as 'derivative' and return the value
    #d = 2*np.dot(errors,feature)
    
    deri=0
    for i in range(0,len(feature)):
        dot = errors[i]*feature[i]
        deri = deri + dot
    
    
    derivative = 2*deri
    return(derivative)

In [14]:
(example_features, example_output) = get_numpy_data(sales, ['sqft_living'], 'price') 
my_weights = np.array([0., 0.]) # this makes all the predictions 0
test_predictions = predict_output(example_features, my_weights) 
# just like SFrames 2 numpy arrays can be elementwise subtracted with '-': 
errors = test_predictions - example_output # prediction errors in this case is just the -example_output
feature = example_features[:,0] # let's compute the derivative with respect to 'constant', the ":" indicates "all rows"
derivative = feature_derivative(errors, feature)
print derivative
print -np.sum(example_output)*2 # should be the same as derivative

print(len(my_weights))

-23345850022.0
-23345850022.0
2


# gradient descent

In [15]:
from math import sqrt

In [16]:
def regression_gradient_descent(feature_matrix, output, initial_weights, step_size, tolerance):
    converged = False 
    weights = np.array(initial_weights) # make sure it's a numpy array
    while not converged:
        # compute the predictions based on feature_matrix and weights using your predict_output() function
        prediction = predict_output(feature_matrix, weights)        

        # compute the errors as predictions - output
        errors = prediction - output

        gradient_sum_squares = 0 # initialize the gradient sum of squares
        # while we haven't reached the tolerance yet, update each feature's weight
        for i in range(len(weights)): # loop over each weight
            # Recall that feature_matrix[:, i] is the feature column associated with weights[i]
            # compute the derivative for weight[i]:
            derivative = feature_derivative(errors, feature_matrix[:,i])
            
            # add the squared value of the derivative to the gradient sum of squares (for assessing convergence)
            gradient_sum_squares = derivative**2 + gradient_sum_squares
            
            # subtract the step size times the derivative from the current weight
            weights[i] = weights[i] - step_size*derivative
            
        # compute the square-root of the gradient sum of squares to get the gradient matnigude:
        gradient_magnitude = sqrt(gradient_sum_squares)
        if gradient_magnitude < tolerance:
            converged = True
    return(weights)

In [17]:
train_data,test_data = sales.random_split(.8,seed=0)

In [51]:
# let's test out the gradient descent
simple_features = ['sqft_living']
my_output = 'price'
(simple_feature_matrix, output) = get_numpy_data(train_data, simple_features, my_output)
initial_weights = np.array([-47000., 1.])
step_size = 7e-12
tolerance = 2.5e7

simple_weights = regression_gradient_descent(simple_feature_matrix, output, initial_weights, step_size, tolerance)
    
print (simple_weights)

[-46999.88716555    281.91211912]


## test data

In [66]:
simple_features = ['sqft_living']
my_output = 'price'
(test_simple_feature_matrix, test_output) = get_numpy_data(test_data, simple_features, my_output)

predictions = predict_output(test_simple_feature_matrix, simple_weights)

print predictions
print test_output

[ 356134.44317093  784640.86422788  435069.83652353 ...,  663418.65300782
  604217.10799338  240550.4743332 ]
[ 310000.  650000.  233000. ...,  610685.  400000.  402101.]


In [39]:
def get_residual_sum_of_squares(model, weights, outcome):
    # First get the predictions
    predictions = predict_output(model, weights)

    # Then compute the residuals/errors
    diff = predictions - outcome    
    
    # Then square and add them up
    diffsq = diff*diff
    RSS = diffsq.sum()
    
    return(RSS)  

In [41]:
simple_features = ['sqft_living']
my_output = 'price'
(test_simple_feature_matrix, test_output) = get_numpy_data(test_data, simple_features, my_output)
initial_weights = np.array([-47000., 1.])
step_size = 7e-12
tolerance = 2.5e7

weights = regression_gradient_descent(test_simple_feature_matrix, test_output, initial_weights, step_size, tolerance)
    
print (weights)

RSS = get_residual_sum_of_squares(test_simple_feature_matrix, weights, test_output)

print (RSS)

[-46999.87880043    282.35945337]
2.75395693978e+14


# multiple variables

In [61]:
model_features = ['sqft_living', 'sqft_living15']
my_output = 'price'
(feature_matrix, output) = get_numpy_data(train_data, model_features, my_output)
initial_weights = np.array([-100000., 1., 1.])
step_size = 4e-12
tolerance = 1e9

weights = regression_gradient_descent(feature_matrix, output, initial_weights, step_size, tolerance)
    
print (weights)





[ -9.99999688e+04   2.45072603e+02   6.52795277e+01]


### test data below

In [64]:
model_features = ['sqft_living', 'sqft_living15']
my_output = 'price'
(feature_matrix, test_output) = get_numpy_data(test_data, model_features, my_output)









predictions = predict_output(feature_matrix, weights)
print predictions

# predicted price for the 1st house in the TEST data set for model 2 

RSS = get_residual_sum_of_squares(feature_matrix, weights,test_output)

print (RSS)

print(test_output)

[ 366651.41203656  762662.39786164  386312.09499712 ...,  682087.39928241
  585579.27865729  216559.20396617]
2.70263446465e+14
[ 310000.  650000.  233000. ...,  610685.  400000.  402101.]


In [67]:
# when testing predictions we use the weights from the prediction on the test data