### Import Libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# modeling imports
from sklearn.linear_model import LinearRegression, Ridge, RidgeCV, Lasso, LassoCV, ElasticNetCV, ElasticNet
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn import metrics
from sklearn import decomposition

#set_style
plt.style.use('fivethirtyeight')

%config InlineBackend.figure_format = 'retina'

### Import raw training data 

In [2]:
ames = pd.read_csv('../datasets/test.csv')

In [3]:
ames.shape

(878, 80)

#### Apply same transformations as to model training data

In [4]:
#want to change columns names to lower/snake_case so a little easier to work with
ames.columns = ames.columns.str.lower().str.replace(' ', '_')

In [5]:
#increase pandas default max_rows so I can view all columns at once
pd.options.display.max_rows = 100
pd.options.display.max_columns = 100

### Nulls 

#### Lot Frontage 

In [6]:
#replace np.nan with average for lot_config of that type
ames.loc[(ames['lot_frontage'].isna()) & (ames['lot_config'] == 'Corner'), 'lot_frontage'] = 83.25
ames.loc[(ames['lot_frontage'].isna()) & (ames['lot_config'] == 'CulDSac'), 'lot_frontage'] = 54.734375        
ames.loc[(ames['lot_frontage'].isna()) & (ames['lot_config'] == 'FR2'), 'lot_frontage'] = 60.836735        
ames.loc[(ames['lot_frontage'].isna()) & (ames['lot_config'] == 'FR3'), 'lot_frontage'] = 87.000000        
ames.loc[(ames['lot_frontage'].isna()) & (ames['lot_config'] == 'Inside'), 'lot_frontage'] = 66.759571        

#### Alley

In [7]:
ames['alley'] = ames['alley'].replace(np.nan, 'NA') 

#### Masonry veneer type and area

In [8]:
ames['mas_vnr_type'] = ames['mas_vnr_type'].replace(np.nan, 'None') #replace masonry np.nan with None
ames['mas_vnr_area'] = ames['mas_vnr_area'].replace(np.nan, 0) #replace masonry np.nan with 0

#### Basement related:  
* bsmt_qual (Evaluates the height of the basement), 
* bsmt_cond (Evaluates the general condition of the basement), 
* bsmt_exposure (Refers to walkout or garden level walls)
* bsmtfin_type_1
* bsmtfin_sf_1
* bsmtfin_type_2
* bsmtfin_sf_2
* bsmt_unf_sf
* total_bsmt_sf
* bsmt_full_bath
* bsmt_half_bath

In [9]:
#these all receive an NA if no basement
basement_to_na = ['bsmt_qual', 'bsmt_cond', 'bsmt_exposure', 'bsmtfin_type_1', 
       'bsmtfin_type_2']

#these receive 0sqft if basement is NA
basement_to_zero = ['bsmtfin_sf_2', 'bsmt_unf_sf', 'total_bsmt_sf', 'bsmtfin_sf_1', 'bsmt_full_bath',
       'bsmt_half_bath']

for i in basement_to_na:
    ames[i] = ames[i].replace(np.nan, 'NA') 

In [10]:
#set basement sqft to 0 IF basement is NA, else drop as incomplete basement data and only 1 null record for each    
for i in basement_to_zero:
    ames[i] = ames[i].replace(np.nan, 0) 

#### fireplace_qu

In [11]:
ames['fireplace_qu'] = ames['fireplace_qu'].replace(np.nan, 'NA') #NA for 'No Fireplace'

#### Garage related:
* garage_type
* garage_yr_blt
* garage_finish
* garage_cars
* garage_area
* garage_qual
* garage_cond


In [12]:
#create two lists, one for catagorical and one for numeric

garage_to_na = ['garage_type','garage_finish','garage_qual','garage_cond']
garage_to_0 = ['garage_yr_blt','garage_cars', 'garage_area']

for i in garage_to_na:
    ames[i] = ames[i].replace(np.nan, 'NA') 

In [13]:
for i in garage_to_0:
    ames[i] = ames[i].replace(np.nan, 0) 

#### pool_qc

In [14]:
for i in ames[(ames['pool_qc'].isna()) & (ames['pool_area'] == 0)]:
    ames['pool_qc'] = ames['pool_qc'].replace(np.nan, 'NA')     

#### fence

In [15]:
ames['fence'] = ames['fence'].replace(np.nan, 'NA') 

#### misc_feature

In [16]:
for i in ames[(ames['misc_feature'].isna()) & (ames['misc_val'] == 0)]:
    ames['misc_feature'] = ames['misc_feature'].replace(np.nan, 'NA')  

### Final Null Check

In [17]:
ames.isnull().sum()

id                 0
pid                0
ms_subclass        0
ms_zoning          0
lot_frontage       0
lot_area           0
street             0
alley              0
lot_shape          0
land_contour       0
utilities          0
lot_config         0
land_slope         0
neighborhood       0
condition_1        0
condition_2        0
bldg_type          0
house_style        0
overall_qual       0
overall_cond       0
year_built         0
year_remod/add     0
roof_style         0
roof_matl          0
exterior_1st       0
exterior_2nd       0
mas_vnr_type       0
mas_vnr_area       0
exter_qual         0
exter_cond         0
foundation         0
bsmt_qual          0
bsmt_cond          0
bsmt_exposure      0
bsmtfin_type_1     0
bsmtfin_sf_1       0
bsmtfin_type_2     0
bsmtfin_sf_2       0
bsmt_unf_sf        0
total_bsmt_sf      0
heating            0
heating_qc         0
central_air        0
electrical         1
1st_flr_sf         0
2nd_flr_sf         0
low_qual_fin_sf    0
gr_liv_area  

In [18]:
#catch anything that was missed, 
ames.fillna(0,inplace= True)

## Data Transformations to get model features

In [19]:
#columns I want to replace values in
ames_ordinal_str_columns = ames[['lot_shape','utilities','land_slope','exter_qual', 
                 'exter_cond','bsmt_qual',
       'bsmt_cond', 'bsmt_exposure', 'bsmtfin_type_1','bsmtfin_type_2','heating_qc','electrical','kitchen_qual',
                'functional','fireplace_qu','garage_finish','garage_qual','garage_cond','paved_drive','pool_qc','fence']]

dict_ordinal = {
    #overall_qual, overall_cond,exter_qual,exter_cond,bsmt_qual,bsmt_cond,bsmt_exposure,heatingqc,kitchenqual
    #firplacequ,garage_qual,garage_cond,pool_qc,fence
    'Ex': 5,
    'Gd': 4,
    'TA': 3,
    'Fa': 2,
    'Po': 1,
    'NA': 0,
     #lot_shape
    'Reg':4,
    'IR1':3,
    'IR2':2,
    'IR3':1,
    #utilities
    'AllPub':4,
    'NoSewr':3,
    'NoSeWa':2,
    'ELO':1,
    #land_slope
    'Gtl':3,
    'Mod':2,
    'Sev':1,
    #bsmt_exposure
    'Av': 3,
    'Mn': 2,
    'No': 1,
    #bsmtfin_type_1, bsmtfin_type_1
    'GLQ':6,
    'ALQ':5,
    'BLQ':4,
    'Rec':3,
    'LwQ':2,
    'Unf':1,
    #Electrical
    'SBrkr':1,
    'FuseA':0,
    'FuseF':0,
    'FuseP':0,
    'Mix':0,
    #Functional
    'Typ':1,
    'Min1':0,
    'Min2':0,
    'Mod':0,
    'Maj1':0,
    'Maj2':0,
    'Sev':0,
    'Sal':0,
    #garage_finish    
    'Fin': 3,
    'RFn': 2,
    'Unf':1,
    #paved_drive
    'Y' : 2,
    'P' : 1,
    'N' : 0,
    #fence
    'GdPrv':4,
    'MnPrv':3,
    'GdWo':2,
    'MnWw':1   
}

#loop to get the job done
for i in ames_ordinal_str_columns.columns:
    ames.replace({i: dict_ordinal},inplace = True)

In [20]:
#Create neighborhood ranking
ames['neighborhood_order'] = ames['neighborhood']
ames_ordinal_str_columns = ames[['neighborhood_order']]

dict_ordinal = {
    'MeadowV': 1,
    'IDOTRR' : 1,
    'BrDale' : 1,
    'OldTown': 1,
    'BrkSide': 1,
    'Edwards': 1,
    'SWISU': 1,
    'Landmrk':2,
    'Sawyer':2,
    'NPkVill':2,
    'Blueste':2,
    'NAmes':2,
    'Mitchel':2,
    'SawyerW':2,
    'Greens':3,
    'Gilbert':3,
    'NWAmes':3,
    'Blmngtn':3,
    'CollgCr':3,
    'Crawfor':3,
    'ClearCr':3,
    'Somerst':4,
    'Timber':4,
    'Veenker':4,
    'GrnHill':4,
    'StoneBr':4,
    'NoRidge':4,
    'NridgHt':4,
        }

for i in ames_ordinal_str_columns.columns:
    ames.replace({i: dict_ordinal},inplace = True)

In [21]:
#Type of sale
ames['sale_type_order'] = ames['sale_type']
ames_ordinal_str_columns = ames[['sale_type_order']]

dict_ordinal = {
   'ConLw':1,
    'Oth':1,
    'ConLD':1,
    'COD':1,
    'WD':2,
    'WD ':2,
    'CWD':3,
    'ConLI':3,
    'Con':3,
    'New':3
        }

#loop to get the job done
for i in ames_ordinal_str_columns.columns:
    ames.replace({i: dict_ordinal},inplace = True)

In [22]:
ames['yr*rem'] = ames['year_remod/add'] * ames['year_built']

In [23]:
ames['basement_overall'] = ames['bsmt_qual'] *ames['bsmt_exposure']

In [24]:
ames['bsmt_type*sf_all'] = (ames['bsmtfin_type_1']*ames['bsmtfin_sf_1']) + (ames['bsmtfin_type_2']*ames['bsmtfin_sf_2']) #quality*sf interaction variable

In [25]:
ames['garage_qual*cond'] = ames['garage_qual']*ames['garage_cond']

In [26]:
 #this is the shop variable
ames['garage_fin*sqft'] = ames['garage_area']*ames['garage_finish']

In [27]:
#total finished sqft above ground
ames['quality_above_sqft'] = (ames['1st_flr_sf']+ames['2nd_flr_sf']) - ames['low_qual_fin_sf']

In [28]:
#combine basment baths
ames['bsmt_baths'] = ames['bsmt_full_bath']+(ames['bsmt_half_bath']/2) 

In [29]:
#combine upstairs baths
ames['above_baths'] = ames['full_bath']+(ames['half_bath']/2) 

In [30]:
#I like this all bath feature, more how I think about baths in a home.
ames['all_baths'] = ames['above_baths'] + ames['bsmt_baths'] 

In [31]:
#sqft per room w/o baths. This performs better than with baths.
ames['room_size'] = ames['gr_liv_area'] / (ames['totrms_abvgrd']) 

In [32]:
#combine different porches as a measure of developed outside space for houses. 
ames['developed_outside_sf'] = ames['open_porch_sf']+ ames['screen_porch'] + ames['enclosed_porch'] +ames['3ssn_porch'] +ames['wood_deck_sf'] 

In [33]:
#dummy for if house has masonry or not
ames['mas_vnr_type_dummy'] = ames['mas_vnr_type']
ames_ordinal_str_columns = ames[['mas_vnr_type_dummy']]

dict_ordinal = {
   'BrkCmn':1,
    'None':0,
    'CBlock':1,
    'BrkFace':1,
    'Stone':1,   
        }

#loop to get the job done
for i in ames_ordinal_str_columns.columns:
    ames.replace({i: dict_ordinal},inplace = True)

In [34]:
#age of property at time of sale
ames['age'] = ames['yr_sold'] - ames['year_built'] 

In [35]:
#dummy for if property was remodeled
ames['was_remodeled'] =  ames['year_remod/add'] - ames['year_built'] 
ames['was_remodeled'] = [1 if i> 0 else 0 for i in ames['was_remodeled']];

In [36]:
#dummy for if property is residential
ames['is_residential'] = ames['ms_zoning']
ames_ordinal_str_columns = ames[['is_residential']]

dict_ordinal = {
   'A (agr)':0,
    'I (all)':0,
    'C (all)':0,
    'RM':1,
    'RH':1,
    'RL':1,
    'FV':1,
        }

#loop to get the job done
for i in ames_ordinal_str_columns.columns:
    ames.replace({i: dict_ordinal},inplace = True)

In [37]:
#2 if by positive feature, 1 if normal, 0 if negative feature. 
ames['positive_condition1'] = ames['condition_1']
ames['positive_condition2'] = ames['condition_2']
ames_ordinal_str_columns = ames[['positive_condition2', 'positive_condition1']]


dict_ordinal = {
   'Artery':0,
    'RRNe':0,
    'RRAe':0,
    'Feedr':0,
    'RRAn':0,
    'Norm':1,
    'RRNn':0,
    'PosN':2,
    'PosA':2,
        }

#loop to get the job done
for i in ames_ordinal_str_columns.columns:
    ames.replace({i: dict_ordinal},inplace = True)

In [38]:
#combine conditions for a local_condition column
ames['local_conditions'] = ames['positive_condition1']+ames['positive_condition2']

In [39]:
#Order Exterior house features 
ames['exterior_ordinal1'] = ames['exterior_1st']
ames['exterior_ordinal2'] = ames['exterior_2nd']
ames_ordinal_str_columns = ames[['exterior_ordinal2', 'exterior_ordinal1']]

dict_ordinal = {
    'AsphShn':1,
    'CBlock':1,
    'AsbShng':1,
    'Brk Cmn':2,
    'BrkComm':2,
     'Stucco':2,
    'PreCast':2,
    'Other':3,
    'Wd Shng':3,
    'Wd Sdng':3,
    'MetalSd':3,
    'WdShing':3,
    'HdBoard':3,      
    'Plywood':4,
    'BrkFace':5,       
    'VinylSd':5,
    'CmentBd':5,
    'CemntBd':5,
    'Stone':5,    
    'ImStucc':5,    
        }
#loop to get the job done
for i in ames_ordinal_str_columns.columns:
    ames.replace({i: dict_ordinal},inplace = True)

In [40]:
#combine external ordered house features
ames['external_feature'] = ames['exterior_ordinal1'] + ames['exterior_ordinal2']

In [41]:
# if a house is single story or two story. This will be used to test an intercation between building type and floors, but is not very useful on its on.
ames['house_style_ordinal'] = ames['house_style'] 
ames_ordinal_str_columns = ames[['house_style_ordinal']]

dict_ordinal = {
    '1.5Unf':0,
    '1.5Fin':0,
    'SFoyer':0,
    'SLvl':0,
    '1Story':0,
     '2.5Unf':1,
    '2Story':1,
    '2.5Fin':1    
        }
#loop to get the job done
for i in ames_ordinal_str_columns.columns:
    ames.replace({i: dict_ordinal},inplace = True)

In [42]:
# trying to split into rental and purchase types. Not sure if this does this well, but I am curious how this interacts with the above 1/2 story feature. 
ames['bldg_type_order'] = ames['bldg_type'] 
ames_ordinal_str_columns = ames[['bldg_type_order']]

dict_ordinal = {
    '2fmCon':0,
    'Twnhs':0,
    'SFoyer':0,
    'Duplex':0,
    '1Fam':1,
     'TwnhsE':1,   
        }
#loop to get the job done
for i in ames_ordinal_str_columns.columns:
    ames.replace({i: dict_ordinal},inplace = True)

In [43]:
ames = pd.get_dummies(data = ames, columns=['bldg_type'], drop_first=True)

In [44]:
# dummy for hip/nicer roof styles
ames['is_hip_roof'] = ames['roof_style'] 
ames_ordinal_str_columns = ames[['is_hip_roof']]

dict_ordinal = {
    'Gambrel':0,
    'Mansard':0,
    'Gable':0,
    'Flat':1,
    'Shed':1,
     'Hip':1,   
        }
#loop to get the job done
for i in ames_ordinal_str_columns.columns:
    ames.replace({i: dict_ordinal},inplace = True)

In [45]:
#set ames_eval to the features wanted for model
ames_eval = ames[['id','age','neighborhood_order','local_conditions','was_remodeled','overall_qual',
                  'exter_qual','external_feature','bldg_type_TwnhsE','bldg_type_Twnhs','bldg_type_2fmCon', 'is_hip_roof',
                  'mas_vnr_area',  
                  'functional', 'lot_frontage', 'lot_area','developed_outside_sf'
                  ,'garage_fin*sqft','garage_cars', 'paved_drive',
                 'bsmt_qual','bsmt_type*sf_all', 'bsmt_exposure',
                 'heating_qc','kitchen_qual', 'fireplace_qu', 
                 'quality_above_sqft', 'totrms_abvgrd', 'room_size']]

In [46]:
ames_eval.to_csv('../datasets/ames_test_clean.csv', index = False)

# LassoCV Model

In [47]:
ames_clean = pd.read_csv('../datasets/ames_clean_v8.csv')

In [48]:
X_train = ames_clean.drop(columns=['saleprice'])
y_train = ames_clean['saleprice']
X_test = ames_eval.drop(columns= ['id'])

In [89]:
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)

(2033, 28)
(2033,)
(878, 28)


In [90]:
pipe_power = Pipeline([
    #('poly', PolynomialFeatures()), #uncomment me to release to power
    ('sc', StandardScaler()),
    ('lassocv', Lasso())
])

In [91]:
lasso_params1 = {'lassocv__alpha':np.logspace(-1, 4, 75),
               'lassocv__max_iter':[10000],
                 #'lassocv__tol':[.2]
                }

In [92]:
pipe_gridsearch = GridSearchCV(
                                pipe_power, 
                                param_grid=lasso_params1,
                            )

In [93]:
pipe_gridsearch.fit(X_train, y_train)
pipe_gridsearch.best_estimator_

Pipeline(steps=[('sc', StandardScaler()),
                ('lassocv', Lasso(alpha=0.1365007806546014, max_iter=10000))])

In [94]:
pipe_gridsearch.best_score_

0.8922291043238555

In [95]:
X_test.shape

(878, 28)

In [96]:
preds_lasso = pipe_gridsearch.predict(X_test)

In [97]:
ames_eval['saleprice_lasso'] = preds_lasso

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ames_eval['saleprice'] = preds


In [98]:
lasso_submission = ames_eval[['id', 'saleprice_lasso']]

In [99]:
lasso_submission.head(3)

Unnamed: 0,id,saleprice
0,2658,129231.551389
1,2718,163512.812872
2,2414,224013.708144


In [100]:
submission.to_csv('../datasets/_kaggle_submission_lasso.csv', index = False) 
#submission.to_csv('../datasets/submission_poly_lasso_iteration_4.csv', index = False) #THIS IS THE BEST SCORE YET!!!! #Uncomment me and polyfeatures to score

## Lasso Coef_ Evaluation

In [61]:
sc_lasso = StandardScaler()
sc_lasso.fit_transform(X_train)

array([[-0.07300932, -0.35530817, -2.20997115, ...,  0.00951839,
        -0.26508589,  0.3421764 ],
       [-0.77235568, -0.35530817,  0.26680468, ...,  1.37130231,
         1.05904093,  0.76130187],
       [ 0.69294147, -0.35530817,  0.26680468, ..., -0.88421849,
        -0.9271493 , -0.44242649],
       ...,
       [ 1.49219446,  0.58401759,  0.26680468, ...,  0.92866959,
         1.72110434, -0.41659594],
       [ 0.55973263, -0.35530817,  0.26680468, ..., -0.58136452,
        -0.26508589, -0.69725478],
       [-0.8389601 ,  0.58401759,  0.26680468, ...,  0.69782286,
         0.39697752,  0.59285335]])

In [62]:
lasso_lasso= Lasso(alpha = .1, max_iter=10000)

In [63]:
lasso_lasso.fit(X_train, y_train)
lasso_lasso.score(X_train, y_train)

0.8967216783723705

In [64]:
pd.DataFrame(list(zip(X_train.columns, lasso_lasso.coef_)))

Unnamed: 0,0,1
0,age,-38.85587
1,neighborhood_order,7191.034989
2,local_conditions,8198.644343
3,was_remodeled,5189.658465
4,overall_qual,10130.028208
5,exter_qual,11984.221302
6,external_feature,953.476589
7,bldg_type_TwnhsE,-12061.240406
8,bldg_type_Twnhs,-13803.0724
9,bldg_type_2fmCon,-7155.502373


## LassoCV with Polynomial Features coef_ evaluation.

In [65]:
# Instantiate our PolynomialFeatures object to create all two-way terms.
poly = PolynomialFeatures(degree=2, interaction_only=False, include_bias=False)

# Fit and transform our X data.
X_overfit = poly.fit_transform(X_train)

In [66]:
#Uncomment to review all features that were created
#poly.get_feature_names(X_train.columns)

In [67]:
X_overfit.shape #there are a lot of them

(2033, 434)

In [68]:
#scale
sc_ =  StandardScaler()
sc_.fit_transform(X_overfit)

array([[-0.07300932, -0.35530817, -2.20997115, ..., -0.33972273,
        -0.01287683,  0.23670371],
       [-0.77235568, -0.35530817,  0.26680468, ...,  0.97295558,
         1.35198473,  0.6643518 ],
       [ 0.69294147, -0.35530817,  0.26680468, ..., -0.85541779,
        -0.90863356, -0.47961316],
       ...,
       [ 1.49219446,  0.58401759,  0.26680468, ...,  1.76993884,
         0.90835165, -0.45777889],
       [ 0.55973263, -0.35530817,  0.26680468, ..., -0.33972273,
        -0.60509514, -0.68863735],
       [-0.8389601 ,  0.58401759,  0.26680468, ...,  0.26973505,
         0.67698321,  0.48871163]])

In [69]:
#alpha based on trial run.
lasso = Lasso(alpha=115.13953993264481,max_iter=10000, tol=.1)
lasso.fit(X_overfit,y_train)
lasso.score(X_overfit, y_train)

0.9006830869090925

In [70]:
#increase pandas default max_rows so I can view all coef_ columns at once
pd.options.display.max_rows = 600

In [71]:
lasso_coef = lasso.coef_
poly_columns = poly.get_feature_names(X_train.columns)
pd.DataFrame(list(zip(poly_columns, lasso_coef)))

Unnamed: 0,0,1
0,age,-1318.03425
1,neighborhood_order,16776.424134
2,local_conditions,6309.356558
3,was_remodeled,28057.792504
4,overall_qual,14142.568868
5,exter_qual,1297.259004
6,external_feature,-3177.375235
7,bldg_type_TwnhsE,-30736.607865
8,bldg_type_Twnhs,-44882.46173
9,bldg_type_2fmCon,2622.236699


# Ridge Model

In [72]:
#create pipeline for ridge model
pipe_ridge = Pipeline([
    ('sc', StandardScaler()),
    ('ridge', Ridge())
])

In [73]:
#create parameters for gridsearch
ridge_params1 = {'ridge__alpha':np.logspace(-1, 5, 500),
               'ridge__max_iter':[10000]}

In [74]:
#apply pipeline and gridsearch to GridSearchCV
pipe_ridge_gridsearch = GridSearchCV(
                                pipe_ridge, 
                                param_grid=ridge_params1,
                            )

In [75]:
#fit best result from grid search
pipe_ridge_gridsearch.fit(X_train, y_train)
pipe_ridge_gridsearch.best_estimator_

Pipeline(steps=[('sc', StandardScaler()),
                ('ridge', Ridge(alpha=16.766750345276993, max_iter=10000))])

In [76]:
pipe_ridge_gridsearch.best_score_ #this models score

0.8922787950009882

In [77]:
preds_ridge = pipe_ridge_gridsearch.predict(X_test)

In [78]:
ames_eval['saleprice_ridge'] = preds_ridge
ridge_submission = ames_eval[['id', 'saleprice_ridge']]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ames_eval['saleprice'] = preds


In [79]:
ridge_submission.head(3)

Unnamed: 0,id,saleprice
0,2658,128839.292381
1,2718,164153.893323
2,2414,224061.479093


In [None]:
# Not using ridge files

In [80]:
#ridge submission file
#submission.to_csv('../datasets/kaggle_ridge_submission.csv', index = False) 

# ElasticNetCV Model --best of both worlds

In [81]:
pipe_mario = Pipeline([
            ('poly', PolynomialFeatures(degree=3)),
            ('sc', StandardScaler()),
            ('elasticnet', ElasticNet())
                        ])

In [82]:
parameters_mario = {'elasticnet__alpha': np.logspace(-1, 4, 75),
                      'elasticnet__normalize': [True, False],
                      'elasticnet__selection': ['cyclic', 'random'],
                        'elasticnet__max_iter': [10000],
                        'elasticnet__tol': [.1]
                       }

In [88]:
elastic_net_model = GridSearchCV(
                pipe_mario, 
           param_grid= parameters_mario)
elastic_net_model.fit(X_train, y_train)
elastic_net_model.best_estimator_

KeyboardInterrupt: 

In [None]:
elastic_net_model.best_score_

In [None]:
preds_elastic_net_model = elastic_net_model.predict(X_test)

In [None]:
ames_eval['saleprice_elasticnet'] = preds_elastic_net_model
submission = ames_eval[['id', 'saleprice_elasticnet']]
submission.head(3)

In [None]:
#elastic net submission
submission.to_csv('../datasets/kaggle_elasticnet_submission.csv', index = False)