In [2]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

#### Prerequisites
1.	Download data file from https://www.kaggle.com/competitions/house-prices-advanced-regression-techniques/data
2.	Download Kaggle data set and upload it in snowflake
a.	You can use the snowflake snowsight (web UI) to upload a dataset and create a table in a single step.
b.	In this solution, the table is created with the name HOUSE_PRICES_RAW_DATA for the train data set and HOUSE_PRICES_TEST_DATA for the test data set.
3. Create Kaggle account or Google Collab account or you may setup Jypter to run locally.
4. If you had already followed through the previous chapters, you would already have a database called "RAW" and schema "RETAIL". If not, the database and schema used in this example is RAW.RETAIL and it needs to be changed as appropritate.

#### Create session and load data

In [4]:
from snowflake.snowpark import Session
connection_parameters = {
    "account": "",
    "user": "",
    "password": "",
    "warehouse": "",
    "database":"RAW",
    "schema":"RETAIL"
}  

session = Session.builder.configs(connection_parameters).create()  

raw = session.table('HOUSE_PRICES_RAW_DATA').to_pandas()

print("Categorical variables:",raw.select_dtypes(include=['object']).columns)
print("Integer variables:", raw.select_dtypes(include=['int8','int16','int32']).columns)
print("Float variables:",raw.select_dtypes(include=['int64','float']).columns)

Categorical variables: Index(['MSZONING', 'LOTFRONTAGE', 'STREET', 'ALLEY', 'LOTSHAPE', 'LANDCONTOUR',
       'UTILITIES', 'LOTCONFIG', 'LANDSLOPE', 'NEIGHBORHOOD', 'CONDITION1',
       'CONDITION2', 'BLDGTYPE', 'HOUSESTYLE', 'ROOFSTYLE', 'ROOFMATL',
       'EXTERIOR1ST', 'EXTERIOR2ND', 'MASVNRTYPE', 'MASVNRAREA', 'EXTERQUAL',
       'EXTERCOND', 'FOUNDATION', 'BSMTQUAL', 'BSMTCOND', 'BSMTEXPOSURE',
       'BSMTFINTYPE1', 'BSMTFINTYPE2', 'HEATING', 'HEATINGQC', 'ELECTRICAL',
       'KITCHENQUAL', 'FUNCTIONAL', 'FIREPLACEQU', 'GARAGETYPE', 'GARAGEYRBLT',
       'GARAGEFINISH', 'GARAGEQUAL', 'GARAGECOND', 'PAVEDDRIVE', 'POOLQC',
       'FENCE', 'MISCFEATURE', 'SALETYPE', 'SALECONDITION'],
      dtype='object')
Integer variables: Index(['ID', 'MSSUBCLASS', 'LOTAREA', 'OVERALLQUAL', 'OVERALLCOND',
       'YEARBUILT', 'YEARREMODADD', 'BSMTFINSF1', 'BSMTFINSF2', 'BSMTUNFSF',
       'TOTALBSMTSF', '1stFlrSF', '2ndFlrSF', 'LOWQUALFINSF', 'GRLIVAREA',
       'BSMTFULLBATH', 'BSMTHALFBATH', 'FUL

In [5]:
raw.head()

Unnamed: 0,ID,MSSUBCLASS,MSZONING,LOTFRONTAGE,LOTAREA,STREET,ALLEY,LOTSHAPE,LANDCONTOUR,UTILITIES,...,POOLAREA,POOLQC,FENCE,MISCFEATURE,MISCVAL,MOSOLD,YRSOLD,SALETYPE,SALECONDITION,SALEPRICE
0,1,60,RL,65,8450,Pave,,Reg,Lvl,AllPub,...,0,,,,0,2,2008,WD,Normal,208500
1,2,20,RL,80,9600,Pave,,Reg,Lvl,AllPub,...,0,,,,0,5,2007,WD,Normal,181500
2,3,60,RL,68,11250,Pave,,IR1,Lvl,AllPub,...,0,,,,0,9,2008,WD,Normal,223500
3,4,70,RL,60,9550,Pave,,IR1,Lvl,AllPub,...,0,,,,0,2,2006,WD,Abnorml,140000
4,5,60,RL,84,14260,Pave,,IR1,Lvl,AllPub,...,0,,,,0,12,2008,WD,Normal,250000


In [6]:
raw.columns

Index(['ID', 'MSSUBCLASS', 'MSZONING', 'LOTFRONTAGE', 'LOTAREA', 'STREET',
       'ALLEY', 'LOTSHAPE', 'LANDCONTOUR', 'UTILITIES', 'LOTCONFIG',
       'LANDSLOPE', 'NEIGHBORHOOD', 'CONDITION1', 'CONDITION2', 'BLDGTYPE',
       'HOUSESTYLE', 'OVERALLQUAL', 'OVERALLCOND', 'YEARBUILT', 'YEARREMODADD',
       'ROOFSTYLE', 'ROOFMATL', 'EXTERIOR1ST', 'EXTERIOR2ND', 'MASVNRTYPE',
       'MASVNRAREA', 'EXTERQUAL', 'EXTERCOND', 'FOUNDATION', 'BSMTQUAL',
       'BSMTCOND', 'BSMTEXPOSURE', 'BSMTFINTYPE1', 'BSMTFINSF1',
       'BSMTFINTYPE2', 'BSMTFINSF2', 'BSMTUNFSF', 'TOTALBSMTSF', 'HEATING',
       'HEATINGQC', 'CENTRALAIR', 'ELECTRICAL', '1stFlrSF', '2ndFlrSF',
       'LOWQUALFINSF', 'GRLIVAREA', 'BSMTFULLBATH', 'BSMTHALFBATH', 'FULLBATH',
       'HALFBATH', 'BEDROOMABVGR', 'KITCHENABVGR', 'KITCHENQUAL',
       'TOTRMSABVGRD', 'FUNCTIONAL', 'FIREPLACES', 'FIREPLACEQU', 'GARAGETYPE',
       'GARAGEYRBLT', 'GARAGEFINISH', 'GARAGECARS', 'GARAGEAREA', 'GARAGEQUAL',
       'GARAGECOND', 'PAVEDDRIVE

In [7]:
train_dataset = raw[["SALEPRICE","BLDGTYPE", "OVERALLCOND", "MSSUBCLASS", "MSZONING","LOTAREA", "LOTCONFIG", "YEARBUILT", "EXTERIOR1ST", "FOUNDATION"]]

#### Look for missing data 

In [8]:
#missing data
total = train_dataset.isnull().sum().sort_values(ascending=False)
percent = (train_dataset.isnull().sum()/train_dataset.isnull().count()).sort_values(ascending=False)
missing_data = pd.concat([total, percent], axis=1, keys=['Total', 'Percent'])
missing_data.head(20)

Unnamed: 0,Total,Percent
SALEPRICE,0,0.0
BLDGTYPE,0,0.0
OVERALLCOND,0,0.0
MSSUBCLASS,0,0.0
MSZONING,0,0.0
LOTAREA,0,0.0
LOTCONFIG,0,0.0
YEARBUILT,0,0.0
EXTERIOR1ST,0,0.0
FOUNDATION,0,0.0


#### Convert Categorical data to Numeric

We will use the OneHotEncoder available in sklearn.

In [9]:
from sklearn.preprocessing import OneHotEncoder
 
s = (train_dataset.dtypes == 'object')
object_cols = list(s[s].index)
print("Categorical variables:")
print(object_cols)
print('No. of. categorical features: ', len(object_cols))

Categorical variables:
['BLDGTYPE', 'MSZONING', 'LOTCONFIG', 'EXTERIOR1ST', 'FOUNDATION']
No. of. categorical features:  5


In [10]:
OH_encoder = OneHotEncoder(sparse_output=False).set_output(transform='pandas')
OH_cols = pd.DataFrame(OH_encoder.fit_transform(train_dataset[object_cols]))
OH_cols

Unnamed: 0,BLDGTYPE_1Fam,BLDGTYPE_2fmCon,BLDGTYPE_Duplex,BLDGTYPE_Twnhs,BLDGTYPE_TwnhsE,MSZONING_C (all),MSZONING_FV,MSZONING_RH,MSZONING_RL,MSZONING_RM,...,EXTERIOR1ST_Stucco,EXTERIOR1ST_VinylSd,EXTERIOR1ST_Wd Sdng,EXTERIOR1ST_WdShing,FOUNDATION_BrkTil,FOUNDATION_CBlock,FOUNDATION_PConc,FOUNDATION_Slab,FOUNDATION_Stone,FOUNDATION_Wood
0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
1,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
2,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
3,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
4,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1455,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
1456,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
1457,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
1458,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0


Drop the categorical columns

In [11]:
train_df = pd.concat([train_dataset, OH_cols], axis=1).drop(columns = object_cols)

In [12]:
train_df.head()

Unnamed: 0,SALEPRICE,OVERALLCOND,MSSUBCLASS,LOTAREA,YEARBUILT,BLDGTYPE_1Fam,BLDGTYPE_2fmCon,BLDGTYPE_Duplex,BLDGTYPE_Twnhs,BLDGTYPE_TwnhsE,...,EXTERIOR1ST_Stucco,EXTERIOR1ST_VinylSd,EXTERIOR1ST_Wd Sdng,EXTERIOR1ST_WdShing,FOUNDATION_BrkTil,FOUNDATION_CBlock,FOUNDATION_PConc,FOUNDATION_Slab,FOUNDATION_Stone,FOUNDATION_Wood
0,208500,5,60,8450,2003,1.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
1,181500,8,20,9600,1976,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
2,223500,5,60,11250,2001,1.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
3,140000,5,70,9550,1915,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
4,250000,5,60,14260,2000,1.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0


In [13]:
train_df.columns

Index(['SALEPRICE', 'OVERALLCOND', 'MSSUBCLASS', 'LOTAREA', 'YEARBUILT',
       'BLDGTYPE_1Fam', 'BLDGTYPE_2fmCon', 'BLDGTYPE_Duplex', 'BLDGTYPE_Twnhs',
       'BLDGTYPE_TwnhsE', 'MSZONING_C (all)', 'MSZONING_FV', 'MSZONING_RH',
       'MSZONING_RL', 'MSZONING_RM', 'LOTCONFIG_Corner', 'LOTCONFIG_CulDSac',
       'LOTCONFIG_FR2', 'LOTCONFIG_FR3', 'LOTCONFIG_Inside',
       'EXTERIOR1ST_AsbShng', 'EXTERIOR1ST_AsphShn', 'EXTERIOR1ST_BrkComm',
       'EXTERIOR1ST_BrkFace', 'EXTERIOR1ST_CBlock', 'EXTERIOR1ST_CemntBd',
       'EXTERIOR1ST_HdBoard', 'EXTERIOR1ST_ImStucc', 'EXTERIOR1ST_MetalSd',
       'EXTERIOR1ST_Plywood', 'EXTERIOR1ST_Stone', 'EXTERIOR1ST_Stucco',
       'EXTERIOR1ST_VinylSd', 'EXTERIOR1ST_Wd Sdng', 'EXTERIOR1ST_WdShing',
       'FOUNDATION_BrkTil', 'FOUNDATION_CBlock', 'FOUNDATION_PConc',
       'FOUNDATION_Slab', 'FOUNDATION_Stone', 'FOUNDATION_Wood'],
      dtype='object')

#### Train - Test Split

In [14]:
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.model_selection import train_test_split
 
X = train_df.drop(['SALEPRICE'], axis=1)
Y = train_df['SALEPRICE']
 
# Split the training set into 
# training and validation set
X_train, X_valid, Y_train, Y_valid = train_test_split(X, Y, train_size=0.8, test_size=0.2, random_state=0)

#### Fitting Model - SVC

In [15]:
from sklearn import svm
from sklearn.svm import SVC
from sklearn.metrics import mean_absolute_percentage_error
 
model_SVR = svm.SVR()
model_SVR.fit(X_train,Y_train)
Y_pred = model_SVR.predict(X_valid)
 
print(mean_absolute_percentage_error(Y_valid, Y_pred))

0.3009689116780821


#### Fitting Model - Linear Regression

In [16]:
from sklearn.linear_model import LinearRegression
 
model_LR = LinearRegression()
model_LR.fit(X_train, Y_train)
Y_pred = model_LR.predict(X_valid)
 
print(mean_absolute_percentage_error(Y_valid, Y_pred))

0.23140877599791285


#### Fitting Model - Random Forest

In [17]:
from sklearn.ensemble import RandomForestRegressor

model_RFR = RandomForestRegressor(n_estimators=10)
model_RFR.fit(X_train, Y_train)
Y_pred = model_RFR.predict(X_valid)

mean_absolute_percentage_error(Y_valid, Y_pred)

0.19572672114792675

#### Check feature importance

In [18]:
feat_importance = pd.DataFrame(
        model_RFR.feature_importances_, X.columns, columns=["FeatImportance"]
    ).to_dict()
print(feat_importance)

{'FeatImportance': {'OVERALLCOND': 0.04965346805397171, 'MSSUBCLASS': 0.03320094667157202, 'LOTAREA': 0.3520717889504505, 'YEARBUILT': 0.4404561249033006, 'BLDGTYPE_1Fam': 0.0009190169642007608, 'BLDGTYPE_2fmCon': 0.00012701046040707124, 'BLDGTYPE_Duplex': 0.00019818319193320652, 'BLDGTYPE_Twnhs': 9.343170928715887e-05, 'BLDGTYPE_TwnhsE': 0.0006409680251017423, 'MSZONING_C (all)': 0.0017661066394095795, 'MSZONING_FV': 0.0017808690999354545, 'MSZONING_RH': 0.00025082422427059116, 'MSZONING_RL': 0.010593956636397613, 'MSZONING_RM': 0.0038158204769389937, 'LOTCONFIG_Corner': 0.010767137040818133, 'LOTCONFIG_CulDSac': 0.005833230158896206, 'LOTCONFIG_FR2': 0.0009273299321719143, 'LOTCONFIG_FR3': 0.0, 'LOTCONFIG_Inside': 0.01339044642823457, 'EXTERIOR1ST_AsbShng': 0.0006082449727599602, 'EXTERIOR1ST_AsphShn': 7.945407719322192e-05, 'EXTERIOR1ST_BrkComm': 0.00015750058928675265, 'EXTERIOR1ST_BrkFace': 0.014925645148378488, 'EXTERIOR1ST_CBlock': 9.411012265735979e-08, 'EXTERIOR1ST_CemntBd': 0

#### Testing it on the test data set

In [19]:
test_raw = session.table('HOUSE_PRICES_TEST_DATA').to_pandas()

In [20]:
test_dataset = test_raw[["BLDGTYPE", "OVERALLCOND", "MSSUBCLASS", "MSZONING","LOTAREA", "LOTCONFIG", "YEARBUILT", "EXTERIOR1ST", "FOUNDATION"]]

In [21]:
from sklearn.preprocessing import OneHotEncoder
 
s = (test_dataset.dtypes == 'object')
object_cols = list(s[s].index)
print("Categorical variables:")
print(object_cols)
print('No. of. categorical features: ', len(object_cols))

Categorical variables:
['BLDGTYPE', 'MSZONING', 'LOTCONFIG', 'EXTERIOR1ST', 'FOUNDATION']
No. of. categorical features:  5


In [22]:
OH_encoder = OneHotEncoder(sparse_output=False).set_output(transform='pandas')
OH_cols = pd.DataFrame(OH_encoder.fit_transform(test_dataset[object_cols]))
OH_cols

Unnamed: 0,BLDGTYPE_1Fam,BLDGTYPE_2fmCon,BLDGTYPE_Duplex,BLDGTYPE_Twnhs,BLDGTYPE_TwnhsE,MSZONING_C (all),MSZONING_FV,MSZONING_NA,MSZONING_RH,MSZONING_RL,...,EXTERIOR1ST_Stucco,EXTERIOR1ST_VinylSd,EXTERIOR1ST_Wd Sdng,EXTERIOR1ST_WdShing,FOUNDATION_BrkTil,FOUNDATION_CBlock,FOUNDATION_PConc,FOUNDATION_Slab,FOUNDATION_Stone,FOUNDATION_Wood
0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
1,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
2,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,...,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
3,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,...,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1454,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
1455,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
1456,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,...,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
1457,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0


In [23]:
test_dataset = pd.concat([test_dataset, OH_cols], axis=1).drop(columns = object_cols)

In [24]:
test_dataset = test_dataset[test_dataset.columns.intersection(X_train.columns)]

In [25]:
test_dataset.head()

Unnamed: 0,OVERALLCOND,MSSUBCLASS,LOTAREA,YEARBUILT,BLDGTYPE_1Fam,BLDGTYPE_2fmCon,BLDGTYPE_Duplex,BLDGTYPE_Twnhs,BLDGTYPE_TwnhsE,MSZONING_C (all),...,EXTERIOR1ST_Stucco,EXTERIOR1ST_VinylSd,EXTERIOR1ST_Wd Sdng,EXTERIOR1ST_WdShing,FOUNDATION_BrkTil,FOUNDATION_CBlock,FOUNDATION_PConc,FOUNDATION_Slab,FOUNDATION_Stone,FOUNDATION_Wood
0,6,20,11622,1961,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
1,6,20,14267,1958,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
2,5,60,13830,1997,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
3,6,60,9978,1998,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
4,5,120,5005,1992,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0


In [26]:
set(X_train.columns).difference(test_dataset.columns)

{'EXTERIOR1ST_ImStucc', 'EXTERIOR1ST_Stone'}

Initializing these features to 0 as it had low feature importance

In [27]:
test_dataset['EXTERIOR1ST_ImStucc'] = 0

In [28]:
test_dataset['EXTERIOR1ST_Stone'] = 0

In [29]:
test_dataset.head()

Unnamed: 0,OVERALLCOND,MSSUBCLASS,LOTAREA,YEARBUILT,BLDGTYPE_1Fam,BLDGTYPE_2fmCon,BLDGTYPE_Duplex,BLDGTYPE_Twnhs,BLDGTYPE_TwnhsE,MSZONING_C (all),...,EXTERIOR1ST_Wd Sdng,EXTERIOR1ST_WdShing,FOUNDATION_BrkTil,FOUNDATION_CBlock,FOUNDATION_PConc,FOUNDATION_Slab,FOUNDATION_Stone,FOUNDATION_Wood,EXTERIOR1ST_ImStucc,EXTERIOR1ST_Stone
0,6,20,11622,1961,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0,0
1,6,20,14267,1958,1.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0,0
2,5,60,13830,1997,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0,0
3,6,60,9978,1998,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0,0
4,5,120,5005,1992,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0,0


In [30]:
test_dataset = test_dataset[X_train.columns]

In [31]:
X_train.columns

Index(['OVERALLCOND', 'MSSUBCLASS', 'LOTAREA', 'YEARBUILT', 'BLDGTYPE_1Fam',
       'BLDGTYPE_2fmCon', 'BLDGTYPE_Duplex', 'BLDGTYPE_Twnhs',
       'BLDGTYPE_TwnhsE', 'MSZONING_C (all)', 'MSZONING_FV', 'MSZONING_RH',
       'MSZONING_RL', 'MSZONING_RM', 'LOTCONFIG_Corner', 'LOTCONFIG_CulDSac',
       'LOTCONFIG_FR2', 'LOTCONFIG_FR3', 'LOTCONFIG_Inside',
       'EXTERIOR1ST_AsbShng', 'EXTERIOR1ST_AsphShn', 'EXTERIOR1ST_BrkComm',
       'EXTERIOR1ST_BrkFace', 'EXTERIOR1ST_CBlock', 'EXTERIOR1ST_CemntBd',
       'EXTERIOR1ST_HdBoard', 'EXTERIOR1ST_ImStucc', 'EXTERIOR1ST_MetalSd',
       'EXTERIOR1ST_Plywood', 'EXTERIOR1ST_Stone', 'EXTERIOR1ST_Stucco',
       'EXTERIOR1ST_VinylSd', 'EXTERIOR1ST_Wd Sdng', 'EXTERIOR1ST_WdShing',
       'FOUNDATION_BrkTil', 'FOUNDATION_CBlock', 'FOUNDATION_PConc',
       'FOUNDATION_Slab', 'FOUNDATION_Stone', 'FOUNDATION_Wood'],
      dtype='object')

In [32]:
Y_pred = model_RFR.predict(test_dataset)

In [33]:
Y_pred

array([141485., 198930., 214810., ..., 204250., 197428., 254010.])

In [34]:
session.close()