In [1]:
import pandas as pd
from sklearn.preprocessing import PolynomialFeatures
import pickle
pd.set_option('display.max_columns', 300)

## Step 1: Read in hold out data, scalers, and best model

In [2]:
holdout = pd.read_csv('kc_house_data_test_features.csv', index_col=0)

In [3]:
holdout.drop(columns='id', inplace=True)

In [4]:
with open('model.pickle', 'rb') as model:
    final_model= pickle.load(model)

# with open('transform.pickle', 'rb') as transform:
#     transformation = pickle.load(transform)

## Step 2: Feature Engineering for holdout set

Remember we have to perform the same transformations on our holdout data (feature engineering, extreme values, and scaling) that we performed on the original data.

In [5]:
holdout['yr_sold'] = holdout['date'].apply(lambda x: int(x[:4]))
holdout['yr_since_reno'] = holdout['yr_sold'] - holdout['yr_renovated']
holdout['month_sold'] = holdout['date'].apply(lambda x: int(x[4:6]))

In [6]:
holdout.drop(columns='date', inplace=True)

In [7]:
features = ['bedrooms', 'bathrooms', 'sqft_living', 'sqft_lot', 'floors',
       'waterfront', 'view', 'condition', 'grade', 'sqft_above',
       'sqft_basement', 'yr_built', 'yr_renovated', 'zipcode', 'lat', 'long',
       'sqft_living15', 'sqft_lot15', 'yr_sold', 'yr_since_reno',
       'month_sold']

In [8]:
df_features = holdout[features]

In [9]:
poly_2 = PolynomialFeatures(degree=2, include_bias=False)
poly2_data = poly_2.fit_transform(df_features)
poly2_columns = poly_2.get_feature_names(df_features.columns)
transformed_holdout = pd.DataFrame(poly2_data, columns=poly2_columns)
transformed_holdout.shape

(4323, 252)

## Step 3: Predict the holdout set

In [10]:
final_answers = final_model.predict(transformed_holdout)

In [11]:
price_predictions = pd.DataFrame(final_answers)

## Step 4: Export your predictions

In [12]:
price_predictions.to_csv('housing_preds_david_bruce.csv')