In [None]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split

In [None]:
# load data
housing_df = pd.read_csv('HousingData.csv')
housing_df.head()

In [None]:
# drop null values
housing_df = housing_df.dropna()

In [None]:
# declare X and y
X = housing_df.iloc[:,:-1]
y = housing_df.iloc[:, -1]

In [None]:
#Create training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)

In [None]:
#Create the regressor: reg
reg = LinearRegression()

In [None]:
#Fit the regressor to the training data
reg.fit(X_train, y_train )

In [None]:
# Predict on the test data: y_pred
y_pred = reg.predict(X_test)

In [None]:
# Compute and print RMSE
rmse = np.sqrt(mean_squared_error(y_test, y_pred))

In [None]:

#Exercise146 begins from here


In [None]:
from sklearn.model_selection import cross_val_score

In [None]:
def regression_model_cv(model, k=5):
    scores = cross_val_score(model, X, y, scoring='neg_mean_squared_error', cv=k)
    rmse = np.sqrt(-scores)
    print('Reg rmse:', rmse)
    print('Reg mean:', rmse.mean ())

In [None]:
regression_model_cv(LinearRegression())

In [None]:
regression_model_cv(LinearRegression(), k=3)

In [None]:
regression_model_cv(LinearRegression(), k=6)

In [None]:

#Exercise147 begins from here


In [None]:
from sklearn.neighbors import KNeighborsRegressor
regression_model_cv(KNeighborsRegressor())

In [None]:
regression_model_cv(KNeighborsRegressor(n_neighbors=4))

In [None]:
regression_model_cv(KNeighborsRegressor(n_neighbors=7))

In [None]:
regression_model_cv(KNeighborsRegressor(n_neighbors=10))

In [None]:

#Exercise148 begins from here


In [None]:
from sklearn.model_selection import GridSearchCV

In [None]:
neighbors = np.linspace(1, 20, 20)

In [None]:
k = neighbors.astype(int)

In [None]:
param_grid = {'n_neighbors': k}

In [None]:
knn = KNeighborsRegressor()

In [None]:
knn_tuned = GridSearchCV(knn, param_grid, cv=5, scoring='neg_mean_squared_error')

In [None]:
knn_tuned.fit(X, y)

In [None]:
k = knn_tuned.best_params_
print("Best n_neighbors: {}".format(k))
score = knn_tuned.best_score_
rsm = np.sqrt(-score)
print("Best score: {}".format(rsm))

In [None]:

#Exercise149 begins from here


In [None]:
from sklearn import tree
regression_model_cv(tree.DecisionTreeRegressor())

In [None]:
from sklearn.ensemble import RandomForestRegressor
regression_model_cv(RandomForestRegressor())

In [None]:

#Exercise150 begins from here


In [None]:
regression_model_cv(RandomForestRegressor(n_jobs=-1, n_estimators=100))

In [None]:
from sklearn.model_selection import RandomizedSearchCV

In [None]:
param_grid = {'max_depth': [None, 10, 30, 50, 70, 100, 200, 400],
             'min_samples_split': [2, 3, 4, 5],
             'min_samples_leaf': [1, 2, 3],
             'max_features': ['auto', 'sqrt']}

In [None]:
reg = RandomForestRegressor(n_jobs = -1)

In [None]:
reg_tuned = RandomizedSearchCV(reg, param_grid, cv=5, scoring='neg_mean_squared_error')

In [None]:
reg_tuned.fit(X, y)

In [None]:
p = reg_tuned.best_params_
print("Best n_neighbors: {}".format(p))
score = reg_tuned.best_score_
rsm = np.sqrt(-score)
print("Best score: {}".format(rsm))

In [None]:
# Setup the hyperparameter grid
regression_model_cv(RandomForestRegressor(n_jobs=-1, n_estimators=500))