In [1]:
import pandas as pd
import numpy as np
from haversine import haversine
from sklearn.preprocessing import PolynomialFeatures
from sklearn.preprocessing import StandardScaler
import numpy as np

pd.set_option('display.max_columns', 300)

## Step 1: Read in hold out data, scalers, and best model

In [2]:
holdout = pd.read_csv('kc_house_data_test_features.csv', index_col=0)
holdout.head()

Unnamed: 0,id,date,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,condition,grade,sqft_above,sqft_basement,yr_built,yr_renovated,zipcode,lat,long,sqft_living15,sqft_lot15
0,1974300020,20140827T000000,4,2.5,2270,11500,1.0,0,0,3,8,1540,730,1967,0,98034,47.7089,-122.241,2020,10918
1,1974300020,20150218T000000,4,2.5,2270,11500,1.0,0,0,3,8,1540,730,1967,0,98034,47.7089,-122.241,2020,10918
2,3630020380,20141107T000000,3,2.5,1470,1779,2.0,0,0,3,8,1160,310,2005,0,98029,47.5472,-121.998,1470,1576
3,1771000290,20141203T000000,3,1.75,1280,16200,1.0,0,0,3,8,1030,250,1976,0,98077,47.7427,-122.071,1160,10565
4,5126310470,20150115T000000,4,2.75,2830,8126,2.0,0,0,3,8,2830,0,2005,0,98059,47.4863,-122.14,2830,7916


In [29]:
final_scaler = pd.read_pickle('scaler.pickle')
final_model = pd.read_pickle('model.pickle')
final_model_ridge = pd.read_pickle('ridge_model.pickle')

## Step 2: Feature Engineering for holdout set

Remember we have to perform the same transformations on our holdout data (feature engineering, extreme values, and scaling) that we performed on the original data.

In [4]:
#Extreme values

# Replaced 0 bedrooms with 4
holdout['bedrooms'] = np.where(((holdout.sqft_living > 1400) & (holdout.bedrooms == 0)), 4, holdout.bedrooms)

# Replaced 0 bathrooms with 3
holdout['bathrooms'] = np.where(((holdout.sqft_living > 1400) & (holdout.bathrooms == 0)), 3, holdout.bathrooms)

#replaced 33 with 3 bedrooms after  checking property in Zillow page.
holdout['bedrooms'] = holdout.bedrooms.replace({33:3})

#replaced 11 with 4 bedrooms after  checking property in Zillow page.
holdout['bedrooms'] = holdout.bedrooms.replace({11:4})

#use np.where to replace all sqft_lot above 650,00 with 650,000
holdout['sqft_lot'] = np.where(holdout.sqft_lot > 650000, 650000, holdout.sqft_lot)

In [5]:
conditions = [
    ((holdout.grade >=1) & (holdout.grade <=5)),
    holdout.grade == 6,
    holdout.grade == 7,
    holdout.grade == 8,
    holdout.grade == 9,
    holdout.grade == 10,
    ((holdout.grade >=11) & (holdout.grade <=13))
]

choices = [
    1,
    2,
    3,
    4,
    5,
    6,
    7    
]

holdout['grade_new'] = np.select(conditions, choices)

In [6]:
#feature enginearing
holdout['lat_long'] = tuple(zip(holdout.lat,holdout.long))
seattle = [47.6092, -122.3363]
bellevue = [47.61555, -122.20392]

seattle_distances = []
for i in holdout['lat_long']:
    seattle_distances.append(haversine((seattle), (i), unit = 'mi'))
holdout['distance_from_seattle'] = pd.Series(seattle_distances)

bellevue_distances = []
for i in holdout['lat_long']:
    bellevue_distances.append(haversine((bellevue), (i), unit = 'mi'))
holdout['distance_from_bellevue'] = pd.Series(bellevue_distances)

holdout['distance_from_epicenter'] = holdout[['distance_from_bellevue', 'distance_from_seattle']].min(axis=1)

#age column
holdout['age'] = 2016 - holdout.yr_built

#renovation flag
holdout['renovated_flg'] = np.where(holdout.yr_renovated == 0, 1, 0)

In [7]:
# Zip code dummies and droped 1 column 98103
dummies = pd.get_dummies(holdout.zipcode).drop(columns = 98103)
holdout = pd.concat([holdout, dummies], 1)

In [8]:
holdout = holdout.loc[:, ~holdout.columns.isin(['id','date', 'price', 'distance_from_seattle', 'distance_from_bellevue', 'lat_long', 'zipcode'])]

In [9]:
holdout.columns

Index([               'bedrooms',               'bathrooms',
                   'sqft_living',                'sqft_lot',
                        'floors',              'waterfront',
                          'view',               'condition',
                         'grade',              'sqft_above',
                 'sqft_basement',                'yr_built',
                  'yr_renovated',                     'lat',
                          'long',           'sqft_living15',
                    'sqft_lot15',               'grade_new',
       'distance_from_epicenter',                     'age',
                 'renovated_flg',                     98001,
                           98002,                     98003,
                           98004,                     98005,
                           98006,                     98007,
                           98008,                     98010,
                           98011,                     98014,
                        

In [10]:
poly = PolynomialFeatures(degree=2, include_bias=False)
transformed_holdout = poly.fit_transform(holdout)

In [11]:
transformed_holdout.shape

(4323, 4185)

In [12]:
holdout.columns = holdout.columns.map(str)

In [13]:
poly_columns = poly.get_feature_names(holdout.columns)

In [14]:
transformed_holdout = pd.DataFrame(transformed_holdout, columns=poly_columns)

In [15]:
transformed_holdout.head()

Unnamed: 0,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,condition,grade,sqft_above,sqft_basement,yr_built,yr_renovated,lat,long,sqft_living15,sqft_lot15,grade_new,distance_from_epicenter,age,renovated_flg,98001,98002,98003,98004,98005,98006,98007,98008,98010,98011,98014,98019,98022,98023,98024,98027,98028,98029,98030,98031,98032,98033,98034,98038,98039,98040,98042,98045,98052,98053,98055,98056,98058,98059,98065,98070,98072,98074,98075,98077,98092,98102,98105,98106,98107,98108,98109,98112,98115,98116,98117,98118,98119,98122,98125,98126,98133,98136,98144,98146,98148,98155,98166,98168,98177,98178,98188,98198,98199,bedrooms^2,bedrooms bathrooms,bedrooms sqft_living,bedrooms sqft_lot,bedrooms floors,bedrooms waterfront,bedrooms view,bedrooms condition,bedrooms grade,bedrooms sqft_above,bedrooms sqft_basement,bedrooms yr_built,bedrooms yr_renovated,bedrooms lat,bedrooms long,bedrooms sqft_living15,bedrooms sqft_lot15,bedrooms grade_new,bedrooms distance_from_epicenter,bedrooms age,bedrooms renovated_flg,bedrooms 98001,bedrooms 98002,bedrooms 98003,bedrooms 98004,bedrooms 98005,bedrooms 98006,bedrooms 98007,bedrooms 98008,bedrooms 98010,bedrooms 98011,bedrooms 98014,bedrooms 98019,bedrooms 98022,bedrooms 98023,bedrooms 98024,bedrooms 98027,bedrooms 98028,bedrooms 98029,bedrooms 98030,bedrooms 98031,bedrooms 98032,bedrooms 98033,bedrooms 98034,bedrooms 98038,bedrooms 98039,bedrooms 98040,bedrooms 98042,bedrooms 98045,bedrooms 98052,bedrooms 98053,bedrooms 98055,bedrooms 98056,bedrooms 98058,bedrooms 98059,bedrooms 98065,bedrooms 98070,bedrooms 98072,bedrooms 98074,bedrooms 98075,...,98119 98126,98119 98133,98119 98136,98119 98144,98119 98146,98119 98148,98119 98155,98119 98166,98119 98168,98119 98177,98119 98178,98119 98188,98119 98198,98119 98199,98122^2,98122 98125,98122 98126,98122 98133,98122 98136,98122 98144,98122 98146,98122 98148,98122 98155,98122 98166,98122 98168,98122 98177,98122 98178,98122 98188,98122 98198,98122 98199,98125^2,98125 98126,98125 98133,98125 98136,98125 98144,98125 98146,98125 98148,98125 98155,98125 98166,98125 98168,98125 98177,98125 98178,98125 98188,98125 98198,98125 98199,98126^2,98126 98133,98126 98136,98126 98144,98126 98146,98126 98148,98126 98155,98126 98166,98126 98168,98126 98177,98126 98178,98126 98188,98126 98198,98126 98199,98133^2,98133 98136,98133 98144,98133 98146,98133 98148,98133 98155,98133 98166,98133 98168,98133 98177,98133 98178,98133 98188,98133 98198,98133 98199,98136^2,98136 98144,98136 98146,98136 98148,98136 98155,98136 98166,98136 98168,98136 98177,98136 98178,98136 98188,98136 98198,98136 98199,98144^2,98144 98146,98144 98148,98144 98155,98144 98166,98144 98168,98144 98177,98144 98178,98144 98188,98144 98198,98144 98199,98146^2,98146 98148,98146 98155,98146 98166,98146 98168,98146 98177,98146 98178,98146 98188,98146 98198,98146 98199,98148^2,98148 98155,98148 98166,98148 98168,98148 98177,98148 98178,98148 98188,98148 98198,98148 98199,98155^2,98155 98166,98155 98168,98155 98177,98155 98178,98155 98188,98155 98198,98155 98199,98166^2,98166 98168,98166 98177,98166 98178,98166 98188,98166 98198,98166 98199,98168^2,98168 98177,98168 98178,98168 98188,98168 98198,98168 98199,98177^2,98177 98178,98177 98188,98177 98198,98177 98199,98178^2,98178 98188,98178 98198,98178 98199,98188^2,98188 98198,98188 98199,98198^2,98198 98199,98199^2
0,4.0,2.5,2270.0,11500.0,1.0,0.0,0.0,3.0,8.0,1540.0,730.0,1967.0,0.0,47.7089,-122.241,2020.0,10918.0,4.0,6.676688,49.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,16.0,10.0,9080.0,46000.0,4.0,0.0,0.0,12.0,32.0,6160.0,2920.0,7868.0,0.0,190.8356,-488.964,8080.0,43672.0,16.0,26.706753,196.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,4.0,2.5,2270.0,11500.0,1.0,0.0,0.0,3.0,8.0,1540.0,730.0,1967.0,0.0,47.7089,-122.241,2020.0,10918.0,4.0,6.676688,49.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,16.0,10.0,9080.0,46000.0,4.0,0.0,0.0,12.0,32.0,6160.0,2920.0,7868.0,0.0,190.8356,-488.964,8080.0,43672.0,16.0,26.706753,196.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,3.0,2.5,1470.0,1779.0,2.0,0.0,0.0,3.0,8.0,1160.0,310.0,2005.0,0.0,47.5472,-121.998,1470.0,1576.0,4.0,10.696189,11.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,9.0,7.5,4410.0,5337.0,6.0,0.0,0.0,9.0,24.0,3480.0,930.0,6015.0,0.0,142.6416,-365.994,4410.0,4728.0,12.0,32.088566,33.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,3.0,1.75,1280.0,16200.0,1.0,0.0,0.0,3.0,8.0,1030.0,250.0,1976.0,0.0,47.7427,-122.071,1160.0,10565.0,4.0,10.743091,40.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,9.0,5.25,3840.0,48600.0,3.0,0.0,0.0,9.0,24.0,3090.0,750.0,5928.0,0.0,143.2281,-366.213,3480.0,31695.0,12.0,32.229272,120.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,4.0,2.75,2830.0,8126.0,2.0,0.0,0.0,3.0,8.0,2830.0,0.0,2005.0,0.0,47.4863,-122.14,2830.0,7916.0,4.0,9.414666,11.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,16.0,11.0,11320.0,32504.0,8.0,0.0,0.0,12.0,32.0,11320.0,0.0,8020.0,0.0,189.9452,-488.56,11320.0,31664.0,16.0,37.658665,44.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [16]:
#transformed_holdout = final_model(holdout)

In [17]:
selected_rfe = ['sqft_living', 'grade', 'sqft_above', 'sqft_living15', 'grade_new',
       'bedrooms sqft_living', 'bedrooms sqft_above', 'bedrooms sqft_living15',
       'bedrooms grade_new', 'bathrooms^2', 'bathrooms sqft_living',
       'bathrooms grade', 'bathrooms sqft_above', 'bathrooms sqft_living15',
       'bathrooms grade_new', 'sqft_living^2', 'sqft_living floors',
       'sqft_living condition', 'sqft_living grade', 'sqft_living sqft_above',
       'sqft_living yr_built', 'sqft_living lat', 'sqft_living long',
       'sqft_living grade_new', 'condition sqft_above',
       'condition sqft_living15', 'condition grade_new', 'grade^2',
       'grade sqft_above', 'grade yr_built', 'grade lat', 'grade long',
       'grade sqft_living15', 'grade grade_new', 'sqft_above^2',
       'sqft_above yr_built', 'sqft_above lat', 'sqft_above long',
       'sqft_above sqft_living15', 'sqft_above grade_new',
       'yr_built sqft_living15', 'yr_built grade_new', 'lat sqft_living15',
       'lat grade_new', 'long sqft_living15', 'long grade_new',
       'sqft_living15^2', 'sqft_living15 grade_new', 'grade_new^2']

In [18]:
transformed_holdout_ridge = transformed_holdout[selected_rfe]

In [19]:
transformed_holdout_ridge = final_scaler.fit_transform(transformed_holdout_ridge)

## Step 3: Predict the holdout set

In [20]:
final_answers = final_model.predict(transformed_holdout)

In [21]:
final_answers

array([2.64718607e+10, 2.64718607e+10, 4.41869115e+09, ...,
       2.80151643e+09, 4.88142513e+09, 3.22934177e+09])

In [22]:
list(zip(transformed_holdout.columns, final_model.coef_))

[('bedrooms', 2056.7266742614993),
 ('bathrooms', 29304.350272651773),
 ('sqft_living', 281087.83561115596),
 ('sqft_lot', -176197.6649117701),
 ('floors', -260782.4292571274),
 ('waterfront', -3927.6411913119473),
 ('view', 61891.343102306084),
 ('condition', -30635.01465429808),
 ('grade', -20558.646056750964),
 ('sqft_above', -94411.31858575341),
 ('sqft_basement', 11337.197164801075),
 ('yr_built', -22422.5149398025),
 ('yr_renovated', 113108.13526326824),
 ('lat', -7302.828972425195),
 ('long', 60184.79282581086),
 ('sqft_living15', -109017.32944715161),
 ('sqft_lot15', -13460.832908751887),
 ('grade_new', 154.39324169353554),
 ('distance_from_epicenter', 140.66366776004327),
 ('age', -3298.1813657017547),
 ('renovated_flg', -2022.0010964388796),
 ('98001', 1304.772104807868),
 ('98002', -2750.782015973679),
 ('98003', -2249.5279687344687),
 ('98004', -2070.9148348942217),
 ('98005', -2469.8808145538183),
 ('98006', -2142.2451653906464),
 ('98007', -4159.3490222022765),
 ('98008',

In [23]:
final_answers_ridge = final_model_ridge.predict(transformed_holdout_ridge)

In [24]:
final_answers_ridge = np.exp(final_answers_ridge)

## Step 4: Export your predictions

In [25]:
pd.DataFrame(final_answers).to_csv('housing_preds_Edward_De_Jesus.csv')

In [26]:
pd.DataFrame(final_answers_ridge).to_csv('housing_preds_Edward_De_Jesus01.csv')