In [136]:
dtype_dict = {'bathrooms':float, 'waterfront':int, 'sqft_above':int, 'sqft_living15':float, 'grade':int, 'yr_renovated':int, 'price':float, 'bedrooms':float, 'zipcode':str, 'long':float, 'sqft_lot15':float, 'sqft_living':float, 'floors':float, 'condition':int, 'lat':float, 'date':str, 'sqft_basement':int, 'yr_built':int, 'id':str, 'sqft_lot':int, 'view':int}

In [137]:
import pandas as pd

In [138]:
sales = pd.read_csv('kc_house_data.csv', dtype=dtype_dict)
train = pd.read_csv('kc_house_train_data.csv',dtype=dtype_dict)
test = pd.read_csv('kc_house_test_data.csv',dtype=dtype_dict)

In [139]:
import numpy as np

In [140]:
def get_numpy_data(data, features, output):
    data['constant'] = 1 # add a constant column to an SFrame
    # prepend variable 'constant' to the features list
    features = ['constant'] + features

    # select the columns of data_SFrame given by the ‘features’ list into the SFrame ‘features_sframe’
    feature_matrix = data[features].as_matrix(columns=None)

    # this will convert the features_sframe into a numpy matrix with GraphLab Create >= 1.7!!
    # features_matrix = features
    # assign the column of data_sframe associated with the target to the variable ‘output_sarray’

    # this will convert the SArray into a numpy array:
    output_array = data[output].as_matrix(columns=None) # GraphLab Create>= 1.7!!
    return (feature_matrix, output_array)

In [141]:
def predict_output(feature_matrix, weights):
    predictions = np.dot(feature_matrix, weights)
    return(predictions)

In [142]:
def normalize_features(features):
    norms = np.linalg.norm(features, axis=0)
    normalized_features = features / norms
    return (normalized_features, norms)

In [143]:
normalize_features(np.array([[3,6,9],[4,8,12]]))


(array([[ 0.6,  0.6,  0.6],
        [ 0.8,  0.8,  0.8]]), array([  5.,  10.,  15.]))

In [144]:
features = ['sqft_living', 'bedrooms']

In [145]:
(feature_matrix, output_array) = get_numpy_data(sales, features, 'price')

In [146]:
(normalized_features, norms) = normalize_features(feature_matrix)
normalized_features, norms

(array([[ 0.00680209,  0.00353021,  0.00583571],
        [ 0.00680209,  0.00768869,  0.00583571],
        [ 0.00680209,  0.00230361,  0.00389048],
        ..., 
        [ 0.00680209,  0.00305154,  0.00389048],
        [ 0.00680209,  0.00478673,  0.00583571],
        [ 0.00680209,  0.00305154,  0.00389048]]),
 array([  1.47013605e+02,   3.34257264e+05,   5.14075870e+02]))

In [147]:
weights = np.array([1, 4, 1])
predictions = predict_output(normalized_features, weights)

In [148]:
ro_1 = np.dot(normalized_features[:,1],(output_array - predictions + weights[1]*normalized_features[:,1]))

In [149]:
ro_2 = np.dot(normalized_features[:,2],(output_array - predictions + weights[2]*normalized_features[:,2]))

In [150]:
print ro_1, ro_2

87939470.8233 80966698.6662


In [151]:
def lasso_coordinate_descent_step(i, feature_matrix, output, weights, l1_penalty):
    # compute prediction
    prediction = predict_output(feature_matrix, weights)
    # compute ro[i] = SUM[ [feature_i]*(output - prediction + weight[i]*[feature_i]) ]
    ro_i = np.dot(feature_matrix[:,i],(output - prediction + weights[i]*feature_matrix[:,i]))
    
    if i == 0: # intercept -- do not regularize
        new_weight_i = ro_i
    elif ro_i < -l1_penalty/2.:
        new_weight_i = ro_i+ l1_penalty/2
    elif ro_i > l1_penalty/2.:
        new_weight_i = ro_i- l1_penalty/2
    else:
        new_weight_i = 0.
    
    return new_weight_i


In [163]:
# should print 0.425558846691
import math
print lasso_coordinate_descent_step(1, np.array([[3./math.sqrt(13),1./math.sqrt(10)],
                   [2./math.sqrt(13),3./math.sqrt(10)]]), np.array([1., 1.]), np.array([1., 4.]), 0.1)

0.425558846691


In [164]:
def lasso_cyclical_coordinate_descent(feature_matrix, output, initial_weights, l1_penalty, tolerance):
    weights = initial_weights
    converged = False
    while not converged:
        max_change = 0
        for i in range(len(weights)):
            old_weight_i = weights[i]
            weights[i] = lasso_coordinate_descent_step(i, feature_matrix, output, weights, l1_penalty)
            change = np.abs(weights[i] - old_weight_i)
            if change > max_change:
                max_change = change
        if max_change < tolerance:
            converged = True
    return weights

Using the following parameters, learn the weights on the sales dataset.

* Initial weights = all zeros
* L1 penalty = 1e7
* Tolerance = 1.0

In [165]:
lasso_cyclical_coordinate_descent(normalized_features, output_array, np.array([0., 0., 0.]), 1e7, 1.0)

array([ 21624997.95951911,  63157247.20788954,         0.        ])

In [166]:
lasso_weights = np.array([ 21624997.95951911,  63157247.20788954,         0.        ])
predictions = predict_output(normalized_features, lasso_weights)
RSS = np.sum((output_array - predictions) ** 2)

In [167]:
print RSS

1.63049247672e+15


## Evaluating LASSO fit with more features
17. Let us split the sales dataset into training and test sets. If you are using GraphLab Create, call ‘random_split’ with .8 ratio and seed=0. Otherwise, please down the corresponding csv files from the downloads section.

18. Create a normalized feature matrix from the TRAINING data with the following set of features.

bedrooms, bathrooms, sqft_living, sqft_lot, floors, waterfront, view, condition, grade, sqft_above, sqft_basement, yr_built, yr_renovated

Make sure you store the norms for the normalization, since we’ll use them later.



In [168]:
train_features = ['bedrooms','bathrooms','sqft_living','sqft_lot','floors','waterfront','view','condition','grade','sqft_above','sqft_basement','yr_built','yr_renovated']

In [169]:
output = 'price'
(train_feature_matrix, train_output_array) = get_numpy_data(train, train_features, output)

In [170]:
(normalized_train, norms_train) = normalize_features(train_feature_matrix)

19. First, learn the weights with l1_penalty=1e7, on the training data. Initialize weights to all zeros, and set the tolerance=1. Call resulting weights’ weights1e7’, you will need them later.

20. Quiz Question: What features had non-zero weight in this case?

In [172]:
initial_weights = np.zeros(14)
weights1e7 = lasso_cyclical_coordinate_descent(normalized_train, train_output_array, initial_weights, 1e7, 1)

In [173]:
pd.Series(weights1e7,index=['intercept']+ train_features)


intercept        2.442960e+07
bedrooms         0.000000e+00
bathrooms        0.000000e+00
sqft_living      4.838917e+07
sqft_lot         0.000000e+00
floors           0.000000e+00
waterfront       3.317511e+06
view             7.329962e+06
condition        0.000000e+00
grade            0.000000e+00
sqft_above       0.000000e+00
sqft_basement    0.000000e+00
yr_built         0.000000e+00
yr_renovated     0.000000e+00
dtype: float64

21. Next, learn the weights with l1_penalty=1e8, on the training data. Initialize weights to all zeros, and set the tolerance=1. Call resulting weights ‘weights1e8’, you will need them later.

22. Quiz Question: What features had non-zero weight in this case?



In [177]:
initial_weights = np.zeros(14)
weights1e8 = lasso_cyclical_coordinate_descent(normalized_train, train_output_array, initial_weights, 1e8, 1)

In [178]:
pd.Series(weights1e8,index=['intercept']+ train_features)

intercept        7.111463e+07
bedrooms         0.000000e+00
bathrooms        0.000000e+00
sqft_living      0.000000e+00
sqft_lot         0.000000e+00
floors           0.000000e+00
waterfront       0.000000e+00
view             0.000000e+00
condition        0.000000e+00
grade            0.000000e+00
sqft_above       0.000000e+00
sqft_basement    0.000000e+00
yr_built         0.000000e+00
yr_renovated     0.000000e+00
dtype: float64

23. Finally, learn the weights with l1_penalty=1e4, on the training data. Initialize weights to all zeros, and set the tolerance=5e5. Call resulting weights ‘weights1e4’, you will need them later. (This case will take quite a bit longer to converge than the others above.)

24. Quiz Question: What features had non-zero weight in this case?



In [179]:
initial_weights = np.zeros(14)
weights1e4 = lasso_cyclical_coordinate_descent(normalized_train, train_output_array, initial_weights, 1e4, 5e5)

In [180]:
pd.Series(weights1e4,index=['intercept']+ train_features)

intercept        7.856474e+07
bedrooms        -2.209740e+07
bathrooms        1.279107e+07
sqft_living      9.380809e+07
sqft_lot        -2.013173e+06
floors          -4.219185e+06
waterfront       6.482843e+06
view             7.127409e+06
condition        5.001665e+06
grade            1.432752e+07
sqft_above      -1.577096e+07
sqft_basement   -5.159591e+06
yr_built        -8.449534e+07
yr_renovated     2.824439e+06
dtype: float64

## Rescaling learned weights
25. Recall that we normalized our feature matrix, before learning the weights. To use these weights on a test set, we must normalize the test data in the same way. Alternatively, we can rescale the learned weights to include the normalization, so we never have to worry about normalizing the test data:

In this case, we must scale the resulting weights so that we can make predictions with original features:

Store the norms of the original features to a vector called ‘norms’:

features, norms = normalize_features(features)

Run Lasso on the normalized features and obtain a ‘weights’ vector
Compute the weights for the original features by performing element-wise division, i.e.

weights_normalized = weights / norms

Now, we can apply weights_normalized to the test data, without normalizing it!

26. Create a normalized version of each of the weights learned above. (‘weights1e4’, ‘weights1e7’, ‘weights1e8’). To check your results, if you call ‘normalized_weights1e7’ the normalized version of ‘weights1e7’, then

In [183]:
normalized_weights1e4 = weights1e4 / norms_train
normalized_weights1e7 = weights1e7 / norms_train
normalized_weights1e8 = weights1e8 / norms_train

In [184]:
print normalized_weights1e7[3]

161.317457646


Evaluating each of the learned models on the test data
27. Let's now evaluate the three models on the test data. Extract the feature matrix and output array from the TEST set. But this time, do NOT normalize the feature matrix. Instead, use the normalized version of weights to make predictions.

Computemthe RSS of each of the three normalized weights on the (unnormalized) feature matrix.

28. Quiz Question: Which model performed best on the test data?

In [189]:
(test_feature_matrix, test_output_array) = get_numpy_data(test, train_features, output)

In [191]:
predictions1e4 = predict_output(test_feature_matrix, weights1e4)
predictions1e7 = predict_output(test_feature_matrix, weights1e7)
predictions1e8 = predict_output(test_feature_matrix, weights1e8)

In [193]:
RSS1e4 = np.sum((test_output_array - predictions1e4) ** 2)
RSS1e7 = np.sum((test_output_array - predictions1e7) ** 2)
RSS1e8 = np.sum((test_output_array - predictions1e8) ** 2)

In [195]:
print RSS1e4, RSS1e7, RSS1e8

4.55258078738e+25 5.09512950079e+25 2.10624234224e+19
