In [31]:
# Imports

import pandas as pd
import numpy as np

from sklearn.linear_model import LinearRegression, LassoCV, RidgeCV, Ridge,Lasso
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
import pickle


In [32]:
# Read in test data

test = pd.read_csv('./datasets/test_eng.csv')

## Feature Removal from Testing Data

In [33]:
#Baseline features

features = [col for col in test.select_dtypes(exclude=['object']).columns]

# Removed before Poly
features.remove('Id')
features.remove('PID')
features.remove('MS SubClass')
features.remove('Alley')
features.remove('BsmtFin Type 2')
features.remove('Garage Type')
features.remove('Garage Finish')
features.remove('Garage Cars')
features.remove('Fence')
features.remove('Misc Feature')
features.remove('Misc Val')
features.remove('Mo Sold')
features.remove('Yr Sold')
features.remove('Year Built')
features.remove('Year Remod/Add')
features.remove('Functional')
features.remove('Pool Area')
features.remove('Pool QC')
features.remove('3Ssn Porch')
features.remove('Bsmt Half Bath')
features.remove('Garage Yr Blt')
features.remove('Garage Qual')
features.remove('Exter Cond')
features.remove('Bsmt Cond')
features.remove('Amenities')
features.remove('Garage')
features.remove('Kitchen AbvGr')
features.remove('Pool Overall')
features.remove('Central Air')

# Not Removed before Poly
features.remove('Mas Vnr Area')
features.remove('TotRms AbvGrd')
features.remove('Garage Area')
features.remove('Total Bsmt SF')
features.remove('Overall Cond')
features.remove('Bsmt Unf SF')
features.remove('Bsmt Full Bath')
features.remove('Half Bath')
features.remove('Fireplace Qu')
features.remove('Bsmt Qual')
features.remove('Garage Cond')
features.remove('Exter Qual')
features.remove('Low Qual Fin SF')
features.remove('BsmtFin SF 2')
features.remove('1st Flr SF')
features.remove('2nd Flr SF')
features.remove('Bsmt Exposure')
features.remove('BsmtFin Type 1')
features.remove('BsmtFin SF 1')

# Removed for Poly
features.remove('Wood Deck SF')
features.remove('Open Porch SF')
features.remove('Enclosed Porch')
features.remove('Screen Porch')
features.remove('Heating QC')
features.remove('Lot Frontage')
features.remove('Fireplaces')

X_test = test[features]

## Transforming Testing Data

In [34]:
poly = PolynomialFeatures(include_bias = False)
X_test_poly = poly.fit_transform(X_test)

ss = StandardScaler()
Z_test = ss.fit_transform(X_test_poly)

In [35]:
# Opening the pickled model file to use in this notebook

with open('Lasso_model.pkl', 'rb') as file:  
    Pickled_LAS_Model = pickle.load(file)

Pickled_LAS_Model

LassoCV(alphas=array([1.00000000e+01, 1.23284674e+01, 1.51991108e+01, 1.87381742e+01,
       2.31012970e+01, 2.84803587e+01, 3.51119173e+01, 4.32876128e+01,
       5.33669923e+01, 6.57933225e+01, 8.11130831e+01, 1.00000000e+02,
       1.23284674e+02, 1.51991108e+02, 1.87381742e+02, 2.31012970e+02,
       2.84803587e+02, 3.51119173e+02, 4.32876128e+02, 5.33669923e+02,
       6.57933225e+02, 8.11130831e+0...
       8.11130831e+07, 1.00000000e+08, 1.23284674e+08, 1.51991108e+08,
       1.87381742e+08, 2.31012970e+08, 2.84803587e+08, 3.51119173e+08,
       4.32876128e+08, 5.33669923e+08, 6.57933225e+08, 8.11130831e+08,
       1.00000000e+09, 1.23284674e+09, 1.51991108e+09, 1.87381742e+09,
       2.31012970e+09, 2.84803587e+09, 3.51119173e+09, 4.32876128e+09,
       5.33669923e+09, 6.57933225e+09, 8.11130831e+09, 1.00000000e+10]),
        max_iter=500000, n_jobs=-1)

## Predictions with Trained Model and Test Data

In [41]:
# Predicting the housing prices using the reloaded model
Ypredict = Pickled_LAS_Model.predict(Z_test)  

np_array = np.array(Ypredict.round(decimals = 2))
pred_series = pd.Series(np_array)

In [37]:
# Formating the predicted data into a DataFrame

df = pd.concat({'Id': test['Id'], 'SalePrice': pred_series}, axis = 1)

In [38]:
# Saving to csv for Kaggle submission

df.to_csv('./datasets/submission_lasso.csv', index = False)