In [124]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
plt.style.use('seaborn')

In [156]:
def get_numpy_data(dataset,features,output):
    dataset['constant'] = 1
    features.insert(0,'constant')
    X = np.array(dataset[features])
    Y = np.array(dataset[output]).reshape(-1,1)
    return X,Y
    

In [157]:
def pred_output(X,W):
    Y_pred = np.dot(X,W.T)#(21613,2) * (2,1) --> (21613,1) + (1,1)  -->(21613,1)
    return np.array(Y_pred) #(21613,1)

In [158]:
def normalize_features(features):
    norms = np.linalg.norm(features, axis=0)
    features_normalized = features/norms
    return features_normalized,norms

In [159]:
dtype_dict = {'bathrooms':float, 'waterfront':int, 'sqft_above':int, 'sqft_living15':float, 'grade':int, 'yr_renovated':int, 'price':float, 'bedrooms':float, 'zipcode':str, 'long':float, 'sqft_lot15':float, 'sqft_living':float, 'floors':str, 'condition':int, 'lat':float, 'date':str, 'sqft_basement':int, 'yr_built':int, 'id':str, 'sqft_lot':int, 'view':int}
house_data = pd.read_csv('kc_house_data.csv',dtype= dtype_dict)
house_data.shape

(21613, 21)

In [160]:
X,Y = get_numpy_data(dataset = house_data,features = ['sqft_living','bedrooms'],output=['price'])
print(X.shape,Y.shape)

(21613, 3) (21613, 1)


In [161]:
X_norm, norms = normalize_features(features = X)

In [162]:
W = np.array([1,4,1]).reshape(1,-1)
print(W.shape)

(1, 3)


In [163]:
pred_output(X_norm,W)

array([[0.02675867],
       [0.04339256],
       [0.01990703],
       ...,
       [0.02289873],
       [0.03178473],
       [0.02289873]])

In [164]:
X_norm.shape

(21613, 3)

In [165]:
X_norm[:,0].reshape(-1,1)

array([[0.00680209],
       [0.00680209],
       [0.00680209],
       ...,
       [0.00680209],
       [0.00680209],
       [0.00680209]])

In [166]:
Y.shape

(21613, 1)

In [167]:
W[0][0]

1

In [168]:
ro = []
for i in range(X_norm.shape[1]):
    feature_i = X_norm[:,i].reshape(-1,1)
    output = Y
    prediction = pred_output(X_norm,W)
    ro.append( np.sum( feature_i*(output - prediction + W[0][i]*feature_i) ) )

In [169]:
ro

[79400300.0145229, 87939470.82325175, 80966698.66623947]

In [170]:
87939470.82325175 * 2

175878941.6465035

In [171]:
80966698.66623947 > (175878941.6465035/2)

False

In [172]:
def lasso_coordinate_descent_step(i, feature_matrix, output, weights, l1_penalty):
    # compute prediction
    
    # compute ro[i] = SUM[ [feature_i]*(output - prediction + weight[i]*[feature_i]) ]
    feature_i = feature_matrix[:,i].reshape(-1,1)
    prediction = pred_output(feature_matrix,weights)
    ro = []
    new_weight_i = 0
    ro.append( np.sum( feature_i*(output - prediction + W[0][i]*feature_i) ) )
    ro_i = ro[0]
    if i == 0: # intercept -- do not regularize
        new_weight_i = ro_i
    elif ro_i < -l1_penalty/2.:
        new_weight_i = ro_i + (l1_penalty/2.)
    elif ro_i > l1_penalty/2.:
        new_weight_i = ro_i - (l1_penalty/2.)
    else:
        new_weight_i = 0.
    
    return new_weight_i

### Testing with dummy example

In [179]:
# should print 0.425558846691
import math
X_try = np.array([[3./math.sqrt(13),1./math.sqrt(10)],[2./math.sqrt(13),3./math.sqrt(10)]])
Y_try = np.array([1., 1.]).reshape(-1,1)
W_try = np.array([1., 4.]).reshape(1,-1)
l1_penalty_try = 0.1
print(X_try.shape,Y_try.shape,W_try.shape)
#print lasso_coordinate_descent_step(1, np.array([[3./math.sqrt(13),1./math.sqrt(10)],
 #                  [2./math.sqrt(13),3./math.sqrt(10)]]), np.array([1., 1.]), np.array([1., 4.]), 0.1)

(2, 2) (2, 1) (1, 2)


In [180]:
lasso_coordinate_descent_step(1,X_try,Y_try,W_try,0.1)

0.4255588466910251

In [207]:
def lasso_cyclical_coordinate_descent(feature_matrix, output, initial_weights, l1_penalty, tolerance):
    diff_w = []
    new_w = {}
    max_diff = 1e10
    while(max_diff > tolerance):
        diff_w = []
        new_ = {}
        for i in range(feature_matrix.shape[1]):
            new_w[str(i)] = lasso_coordinate_descent_step(i,feature_matrix, output, initial_weights, l1_penalty)
            diff_w.append(initial_weights[0][i] - new_w[str(i)])
#             print(f'Old: {initial_weights[0][i]}')
#             print(f'New: {new_w[str(i)]}')
#             print(f'Difference: {diff_w[i]}')
            initial_weights[0][i] = new_w[str(i)]
        max_diff = max(diff_w)
#         print('..........................')
    
    return new_w

In [182]:
X,Y = get_numpy_data(house_data,['sqft_living','bedrooms'],['price'])
print(X.shape,Y.shape)
X_norm,norms = normalize_features(X)
print(X_norm.shape,Y.shape)

(21613, 3) (21613, 1)
(21613, 3) (21613, 1)


array([[1.00e+00, 1.18e+03, 3.00e+00],
       [1.00e+00, 2.57e+03, 3.00e+00],
       [1.00e+00, 7.70e+02, 2.00e+00],
       ...,
       [1.00e+00, 1.02e+03, 2.00e+00],
       [1.00e+00, 1.60e+03, 3.00e+00],
       [1.00e+00, 1.02e+03, 2.00e+00]])

In [183]:
W = np.array([0.,0.,0.]).reshape(1,-1)
l1_penalty = 1e7
tolerance = 1.0
print(W.shape)

(1, 3)


In [184]:
weights_learned = lasso_cyclical_coordinate_descent(X_norm,Y,W,l1_penalty,tolerance)


Old: 0.0
New: 79400304.63764462
Difference: -79400304.63764462
Old: 0.0
New: 10305258.704949208
Difference: -10305258.704949208
Old: 0.0
New: -299724.1696074158
Difference: 299724.1696074158
..........................
Old: 79400304.63764462
New: 70262136.26121683
Difference: 9138168.376427785
Old: 10305258.704949208
New: 18947595.76476732
Difference: -8642337.059818111
Old: -299724.1696074158
New: 0.0
Difference: -299724.1696074158
..........................
Old: 70262136.26121683
New: 62067326.742834166
Difference: 8194809.5183826685
Old: 18947595.76476732
New: 26161208.263501395
Difference: -7213612.498734076
Old: 0.0
New: 0.0
Difference: 0.0
..........................
Old: 62067326.742834166
New: 55468421.66091432
Difference: 6598905.081919849
Old: 26161208.263501395
New: 32197788.172365278
Difference: -6036579.908863883
Old: 0.0
New: 0.0
Difference: 0.0
..........................
Old: 55468421.66091432
New: 49946248.430094436
Difference: 5522173.230819881
Old: 32197788.172365278
Ne

In [185]:
weights_learned

{'0': 21624997.95951909, '1': 63157247.20788956, '2': 0.0}

In [186]:
W_learned = np.array(list(weights_learned.values())).reshape(1,-1)

In [187]:
rss = np.sum(np.square(Y - pred_output(X_norm,W_learned)))

In [188]:
rss

1630492476715386.5

# Train-Test Split

In [217]:
train_data = pd.read_csv('wk3_kc_house_train_data.csv',dtype=dtype_dict)
print(train_data.shape)
test_data = pd.read_csv('wk3_kc_house_test_data.csv',dtype=dtype_dict)
print(test_data.shape)

(9761, 21)
(2217, 21)


In [218]:
feature = 'bedrooms bathrooms sqft_living sqft_lot floors waterfront view condition grade sqft_above sqft_basement yr_built yr_renovated'
feature.split(sep=' ')

['bedrooms',
 'bathrooms',
 'sqft_living',
 'sqft_lot',
 'floors',
 'waterfront',
 'view',
 'condition',
 'grade',
 'sqft_above',
 'sqft_basement',
 'yr_built',
 'yr_renovated']

In [224]:
features = ['bedrooms',
 'bathrooms',
 'sqft_living',
 'sqft_lot',
 'floors',
 'waterfront',
 'view',
 'condition',
 'grade',
 'sqft_above',
 'sqft_basement',
 'yr_built',
 'yr_renovated']


In [225]:
X_train,Y_train = get_numpy_data(dataset = train_data,features=features,output = ['price'])
print(X_train.shape,Y_train.shape)

(9761, 14) (9761, 1)


In [226]:
X_train

array([[1, 4.0, 3.0, ..., 910, 1965, 0],
       [1, 4.0, 4.5, ..., 1530, 2001, 0],
       [1, 2.0, 1.0, ..., 300, 1942, 0],
       ...,
       [1, 4.0, 3.5, ..., 910, 2009, 0],
       [1, 2.0, 0.75, ..., 0, 2009, 0],
       [1, 2.0, 0.75, ..., 0, 2008, 0]], dtype=object)

In [227]:
X_train_norm,norms_train_1e7 = normalize_features(X_train)
print(X_train_norm.shape)

TypeError: loop of ufunc does not support argument 48805 of type str which has no callable conjugate method

In [202]:
W = np.zeros(14).reshape(1,-1)
l1_penalty = 1e7
tolerance = 1.0
print(W.shape)

(1, 14)


# Questions:
10. Recall that, whenever ro[i] falls between -l1_penalty/2 and l1_penalty/2, the corresponding weight w[i] is sent to zero. Now suppose we were to take one step of coordinate descent on either feature 1 or feature 2. What range of values of l1_penalty would not set w[1] zero, but would set w[2] to zero, if we were to take a step in that coordinate?
##### ANS: 161933397.33247894
11. Quiz Question: What range of values of l1_penalty would set both w[1] and w[2] to zero, if we were to take a step in that coordinate?
##### ANS: 175878941.6465035
15. Quiz Question: What is the RSS of the learned model on the normalized dataset?
##### ANS: 1630492476715386.5
16. Quiz Question: Which features had weight zero at convergence?
#### ANS: bedrooms
20. Quiz Question: What features had non-zero weight in this case?
##### ANS:sqft_living,waterfront,view
22. Quiz Question: What features had non-zero weight in this case?
##### ANS: only bias
24. Quiz Question: What features had non-zero weight in this case?
##### ANS: ALL

In [204]:
weights_learned = lasso_cyclical_coordinate_descent(X_train_norm,Y_train,W,l1_penalty,tolerance)

Old: 0.0
New: 53621004.689714715
Difference: -53621004.689714715
Old: 0.0
New: 0.0
Difference: 0.0
Old: 0.0
New: 1790905.2332449434
Difference: -1790905.2332449434
Old: 0.0
New: 4007093.098883977
Difference: -4007093.098883977
Old: 0.0
New: 0.0
Difference: 0.0
Old: 0.0
New: 0.0
Difference: 0.0
Old: 0.0
New: 4581816.893492026
Difference: -4581816.893492026
Old: 0.0
New: 5259143.094896801
Difference: -5259143.094896801
Old: 0.0
New: -1948652.7334768223
Difference: 1948652.7334768223
Old: 0.0
New: 0.0
Difference: 0.0
Old: 0.0
New: 0.0
Difference: 0.0
Old: 0.0
New: 0.0
Difference: 0.0
Old: 0.0
New: -407816.87257254217
Difference: 407816.87257254217
Old: 0.0
New: 0.0
Difference: 0.0
..........................
Old: 53621004.689714715
New: 48599830.75796667
Difference: 5021173.9317480475
Old: 0.0
New: 0.0
Difference: 0.0
Old: 1790905.2332449434
New: 2521897.939997458
Difference: -730992.7067525145
Old: 4007093.098883977
New: 7417596.117351072
Difference: -3410503.0184670947
Old: 0.0
New: 0.0


Old: 30225817.677290358
New: 30268285.23762647
Difference: -42467.560336112976
Old: 0.0
New: 0.0
Difference: 0.0
Old: 0.0
New: 0.0
Difference: 0.0
Old: 1899422.8954106793
New: 1899770.960878821
Difference: -348.0654681418091
Old: 5724636.689828876
New: 5721665.449836362
Difference: 2971.2399925142527
Old: 0.0
New: 0.0
Difference: 0.0
Old: 0.0
New: 0.0
Difference: 0.0
Old: 0.0
New: 0.0
Difference: 0.0
Old: 0.0
New: 0.0
Difference: 0.0
Old: 0.0
New: 0.0
Difference: 0.0
Old: 0.0
New: 0.0
Difference: 0.0
..........................
Old: 24105722.893688038
New: 24067773.877180923
Difference: 37949.016507115215
Old: 0.0
New: 0.0
Difference: 0.0
Old: 0.0
New: 0.0
Difference: 0.0
Old: 30268285.23762647
New: 30304066.632495955
Difference: -35781.39486948401
Old: 0.0
New: 0.0
Difference: 0.0
Old: 0.0
New: 0.0
Difference: 0.0
Old: 1899770.960878821
New: 1900064.2263232563
Difference: -293.26544443517923
Old: 5721665.449836362
New: 5719162.006959433
Difference: 2503.4428769294173
Old: 0.0
New: 0.0


Old: 0.0
New: 0.0
Difference: 0.0
Old: 0.0
New: 0.0
Difference: 0.0
Old: 1901615.3997977944
New: 1901618.273618415
Difference: -2.8738206205889583
Old: 5705920.507630341
New: 5705895.97543297
Difference: 24.53219737112522
Old: 0.0
New: 0.0
Difference: 0.0
Old: 0.0
New: 0.0
Difference: 0.0
Old: 0.0
New: 0.0
Difference: 0.0
Old: 0.0
New: 0.0
Difference: 0.0
Old: 0.0
New: 0.0
Difference: 0.0
Old: 0.0
New: 0.0
Difference: 0.0
..........................
Old: 23866677.677215192
New: 23866364.349188738
Difference: 313.32802645489573
Old: 0.0
New: 0.0
Difference: 0.0
Old: 0.0
New: 0.0
Difference: 0.0
Old: 30493676.356278315
New: 30493971.7872241
Difference: -295.43094578385353
Old: 0.0
New: 0.0
Difference: 0.0
Old: 0.0
New: 0.0
Difference: 0.0
Old: 1901618.273618415
New: 1901620.6949797925
Difference: -2.4213613774627447
Old: 5705895.97543297
New: 5705875.305626787
Difference: 20.66980618238449
Old: 0.0
New: 0.0
Difference: 0.0
Old: 0.0
New: 0.0
Difference: 0.0
Old: 0.0
New: 0.0
Difference: 0.

Old: 0.0
New: 0.0
Difference: 0.0
Old: 0.0
New: 0.0
Difference: 0.0
Old: 0.0
New: 0.0
Difference: 0.0
Old: 0.0
New: 0.0
Difference: 0.0
Old: 0.0
New: 0.0
Difference: 0.0
Old: 0.0
New: 0.0
Difference: 0.0
..........................
Old: 23864699.221250545
New: 23864697.384721205
Difference: 1.836529340595007
Old: 0.0
New: 0.0
Difference: 0.0
Old: 0.0
New: 0.0
Difference: 0.0
Old: 30495541.80420342
New: 30495543.535831466
Difference: -1.7316280454397202
Old: 0.0
New: 0.0
Difference: 0.0
Old: 0.0
New: 0.0
Difference: 0.0
Old: 1901633.5628886288
New: 1901633.5770811085
Difference: -0.01419247966259718
Old: 5705765.459494801
New: 5705765.33834156
Difference: 0.12115324102342129
Old: 0.0
New: 0.0
Difference: 0.0
Old: 0.0
New: 0.0
Difference: 0.0
Old: 0.0
New: 0.0
Difference: 0.0
Old: 0.0
New: 0.0
Difference: 0.0
Old: 0.0
New: 0.0
Difference: 0.0
Old: 0.0
New: 0.0
Difference: 0.0
..........................
Old: 23864697.384721205
New: 23864695.8373382
Difference: 1.54738300293684
Old: 0.0
New

In [205]:
weights1e7 = list(weights_learned.values())

In [206]:
weights1e7

[23864692.509538405,
 0.0,
 0.0,
 30495548.132547192,
 0.0,
 0.0,
 1901633.614755936,
 5705765.016732657,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0]

In [209]:
W = np.zeros(14).reshape(1,-1)
l1_penalty = 1e8
tolerance = 1.0
print(W.shape)

(1, 14)


In [210]:
weights_learned = lasso_cyclical_coordinate_descent(X_train_norm,Y_train,W,l1_penalty,tolerance)
weights1e8 = list(weights_learned.values())
weights1e8

[53621004.689714715,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0]

In [211]:
W = np.zeros(14).reshape(1,-1)
l1_penalty = 1e4
tolerance = 5e5
print(W.shape)

(1, 14)


In [212]:
weights_learned = lasso_cyclical_coordinate_descent(X_train_norm,Y_train,W,l1_penalty,tolerance)
weights1e4 = list(weights_learned.values())
weights1e4

[54923038.22319205,
 -6742423.417440407,
 20350831.48712548,
 35945935.91404693,
 -1398167.9691648737,
 -10600644.365969915,
 5453748.952995855,
 6048183.427693555,
 -12285595.592441916,
 3689082.067837797,
 14460326.592993274,
 2090324.7998789153,
 -44983755.13326684,
 2036178.7331405978]