In [20]:
from sklearn.pipeline import Pipeline
from sklearn.linear_model import ElasticNet
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import Imputer
from sklearn.preprocessing import StandardScaler

import pandas as pd
import numpy as np

train = pd.read_csv('/Users/chewychevy/Desktop/Data Science Projects/house_prices/raw_data/train.csv')
test = pd.read_csv('/Users/chewychevy/Desktop/Data Science Projects/house_prices/raw_data/test.csv')

In [31]:
y = train.SalePrice
X = train.drop('SalePrice', axis=1)

In [41]:
df = X.select_dtypes(exclude=['object'])

In [55]:
df_test = test.select_dtypes(exclude=['object'])

In [54]:
steps = [('imputation', Imputer(missing_values='NaN', strategy='mean', axis=0)),
         ('scaler', StandardScaler()),
         ('elasticnet', ElasticNet(max_iter=10000))]
pipeline = Pipeline(steps)
parameters = {'elasticnet__l1_ratio':np.linspace(0,1,30)}
X_train, X_test, y_train, y_test = train_test_split(df, y, test_size = 0.3, 
                                                    random_state=42)
gm_cv = GridSearchCV(pipeline, parameters, cv=5)
gm_cv.fit(X_train, y_train)
r2 = gm_cv.score(X_test, y_test)
print("Tuned ElasticNet Alpha: {}".format(gm_cv.best_params_))
print("Tuned ElasticNet R squared: {}".format(r2))



Tuned ElasticNet Alpha: {'elasticnet__l1_ratio': 0.51724137931034486}
Tuned ElasticNet R squared: 0.8089697222547062


In [56]:
prediction = gm_cv.predict(df_test)

In [60]:
predicted_price = pd.DataFrame(test.Id)

In [61]:
predicted_price['SalePrice'] = prediction

In [63]:
predicted_price.to_csv('house_price_fast.csv', index=False)