# Week 2_2: Multiple regression (gradient descent)

In the first notebook we explored multiple regression using graphlab create. Now we will use graphlab along with numpy to solve for the regression weights with gradient descent.

In this notebook we will cover estimating multiple regression weights via gradient descent. You will:
* Add a constant column of 1's to a graphlab SFrame to account for the intercept
* Convert an SFrame into a Numpy array
* Write a predict_output() function using Numpy
* Write a numpy function to compute the derivative of the regression weights with respect to a single feature
* Write gradient descent function to compute the regression weights given an initial weight vector, step size and tolerance.
* Use the gradient descent function to estimate regression weights for multiple features

In [3]:
import graphlab
import numpy as np

[INFO] This non-commercial license of GraphLab Create is assigned to chengjun@chem.ku.dk and will expire on January 27, 2017. For commercial licensing options, visit https://dato.com/buy/.

[INFO] Start server at: ipc:///tmp/graphlab_server-36813 - Server binary: /usr/local/lib/python2.7/site-packages/graphlab/unity_server - Server log: /tmp/graphlab_server_1455003525.log
[INFO] GraphLab Server Version: 1.8.1


## Load the data

In [4]:
sales = graphlab.SFrame('kc_house_data.gl/')

In [5]:
train_data, test_data = sales.random_split(0.8, seed=0)

## Convert to Numpy array

In [6]:
print train_data[['bedrooms', 'bathrooms']].to_numpy()
print train_data[['price']].to_numpy()

[[ 3.    1.  ]
 [ 3.    2.25]
 [ 2.    1.  ]
 ..., 
 [ 3.    2.5 ]
 [ 3.    2.5 ]
 [ 2.    0.75]]
[[ 221900.]
 [ 538000.]
 [ 180000.]
 ..., 
 [ 360000.]
 [ 400000.]
 [ 325000.]]


In [7]:
def get_numpy_data(data_sframe, features, output):
    """
    features is a list of features.
    output is a list or a string.
    """
    data_sframe['constant'] = 1
    features = data_sframe[['constant']+features]
    features_matrix = features.to_numpy()
    
    output_array = data_sframe[output].to_numpy()
    return (features_matrix, output_array)

In [8]:
def predict_outcome(feature_matrix, weights):
    predictions = np.dot(feature_matrix, weights)
    return(predictions)

In [9]:
(example_features, example_output) = get_numpy_data(sales, ['sqft_living'], 'price')
my_weights = np.array([1., 1.])

## Predicting output given regression weights

In [10]:
test_predictions = predict_outcome(example_features, my_weights)

In [11]:
print test_predictions.shape

(21613,)


## Computing the Derivative

In [12]:
def feature_derivative(errors, feature):
    feature_derivative = 2 * np.dot(errors, feature)
    return (feature_derivative)

In [13]:
(example_features, example_output) = get_numpy_data(sales, ['sqft_living'], 'price') 
my_weights = np.array([0., 0.]) # this makes all the predictions 0
test_predictions = predict_outcome(example_features, my_weights) 
print test_predictions.shape
# just like SFrames 2 numpy arrays can be elementwise subtracted with '-': 
errors = test_predictions - example_output # prediction errors in this case is just the -example_output
print errors.shape
feature = example_features[:,0] # let's compute the derivative with respect to 'constant', the ":" indicates "all rows"
print feature.shape
derivative = feature_derivative(errors, feature)
print type(derivative)
print derivative
print -np.sum(example_output)*2 # should be the same as derivative

(21613,)
(21613,)
(21613,)
<type 'numpy.float64'>
-23345850022.0
-23345850022.0


## Gradient Descent

In [37]:
def regression_gradient_descent(feature_matrix, output, initial_weights, step_size, tolerance):
    converged = False
    weights = np.array(initial_weights)
    while not converged:
        # compute the predictions based on feature_matrix and weights:
        # compute the errors as predictions - output:
        errors = predict_outcome(feature_matrix, weights) - output
    
        gradient_sum_squares = 0 # initialize the gradient
        # while not converged, update each weight individually:
        for i in range(len(weights)):
            # Recall that feature_matrix[:, i] is the feature column associated with weights[i]
            # compute the derivative for weight[i]:
            derivative = feature_derivative(errors, feature_matrix[:, i])
            #print 'derivative:', derivative
            # add the squared derivative to the gradient magnitude
            gradient_sum_squares += derivative**2
            #print 'gradient_sum_squares:', gradient_sum_squares
            # update the weight based on step size and derivative:
            weights[i] = weights[i] - step_size * derivative
            #print weights
        gradient_magnitude = np.sqrt(gradient_sum_squares)
        print 'gradient_magnitude:', gradient_magnitude
        #converged = True
        if gradient_magnitude < tolerance:
            print 'Congrats, it is converged!'
            print 'tolerance:', tolerance
            converged = True
    return(weights)

## Running the Gradient Descent as Simple Regression

In [39]:
# quiz 9
simple_features = ['sqft_living']
my_output= 'price'
(simple_feature_matrix, output) = get_numpy_data(train_data, simple_features, my_output)
initial_weights = np.array([-47000., 1.])
step_size = 7e-12
tolerance = 2.5e7

simple_weights = regression_gradient_descent(simple_feature_matrix, 
                                             output,
                                             initial_weights, 
                                             step_size,                                             
                                             tolerance)

gradient_magnitude: 5.05515307744e+13
gradient_magnitude: 1.31274510235e+13
gradient_magnitude: 3.40899608253e+12
gradient_magnitude: 885263580100.0
gradient_magnitude: 229889265720.0
gradient_magnitude: 59698688259.7
gradient_magnitude: 15502826422.1
gradient_magnitude: 4025844401.5
gradient_magnitude: 1045449748.16
gradient_magnitude: 271487891.86
gradient_magnitude: 70504114.6976
gradient_magnitude: 18320016.7508
Congrats, it is converged!
tolerance: 25000000.0


In [40]:
simple_weights

array([-46999.88716555,    281.91211912])

In [41]:
simple_features = ['sqft_living']
my_output= 'price'
(test_simple_feature_matrix, test_output) = get_numpy_data(test_data, simple_features, my_output)

In [43]:
test_simple_feature_matrix.shape

(4229, 2)

In [44]:
predicted_test_output = predict_outcome(test_simple_feature_matrix, simple_weights)

In [48]:
predicted_test_output[0]

356134.44317092974

In [49]:
test_output[0]

310000.0

In [50]:
predicted_test_output - test_output

array([  46134.44317093,  134640.86422788,  202069.83652353, ...,
         52733.65300782,  204217.10799338, -161550.5256668 ])

In [55]:
rss = np.sum((predicted_test_output - test_output)**2)
print rss

2.75400047593e+14


## Running a multiple regression

In [56]:
# model 2
model_features = ['sqft_living', 'sqft_living15']
my_output = 'price'
(feature_matrix, output) = get_numpy_data(train_data, model_features,my_output)
initial_weights = np.array([-100000., 1., 1.])
step_size = 4e-12
tolerance = 1e9

multiple_weights = regression_gradient_descent(feature_matrix, 
                                             output,
                                             initial_weights, 
                                             step_size,                                             
                                             tolerance)


gradient_magnitude: 7.30720205489e+13
gradient_magnitude: 2.26732209651e+13
gradient_magnitude: 7.0607945821e+12
gradient_magnitude: 2.27568239427e+12
gradient_magnitude: 928984105638.0
gradient_magnitude: 656307425178.0
gradient_magnitude: 610615351821.0
gradient_magnitude: 593078765307.0
gradient_magnitude: 578705920128.0
gradient_magnitude: 564945676163.0
gradient_magnitude: 551538681425.0
gradient_magnitude: 538452422879.0
gradient_magnitude: 525676912708.0
gradient_magnitude: 513204543690.0
gradient_magnitude: 501028100319.0
gradient_magnitude: 489140559101.0
gradient_magnitude: 477535065233.0
gradient_magnitude: 466204926754.0
gradient_magnitude: 455143610499.0
gradient_magnitude: 444344738312.0
gradient_magnitude: 433802083366.0
gradient_magnitude: 423509566576.0
gradient_magnitude: 413461253090.0
gradient_magnitude: 403651348867.0
gradient_magnitude: 394074197340.0
gradient_magnitude: 384724276147.0
gradient_magnitude: 375596193956.0
gradient_magnitude: 366684687348.0
gradient_

In [57]:
multiple_weights

array([ -9.99999688e+04,   2.45072603e+02,   6.52795277e+01])

In [60]:
model_features = ['sqft_living', 'sqft_living15']
my_output = 'price'
(test_feature_matrix, test_output) = get_numpy_data(test_data, model_features,my_output)

predicted_test_output = predict_outcome(test_feature_matrix, multiple_weights)
print predicted_test_output[0]

366651.412037


In [61]:
rss = np.sum((predicted_test_output - test_output)**2)
print rss

2.70263446465e+14


In [62]:
test_output[0]

310000.0