In [2]:
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn import metrics
import matplotlib.pyplot as plt
%matplotlib inline

from jupyterthemes import jtplot
jtplot.style()

In [3]:
# read in the iris data
boston = load_boston()

# create X (features) and y (response)
X = boston.data
y = boston.target

In [4]:
from sklearn.model_selection import GridSearchCV

In [5]:
X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2)

In [6]:
from sklearn.preprocessing import StandardScaler

In [7]:
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_valid = sc.transform(X_valid)

# LinearRegression

In [8]:
from sklearn.linear_model import LinearRegression

In [9]:
lr_params = {
    'fit_intercept': ['True', 'False']
}

In [10]:
lr = LinearRegression()
lr_grid =  GridSearchCV(lr, lr_params, cv=10)
lr_grid.fit(X_train, y_train)



GridSearchCV(cv=10, error_score='raise-deprecating',
       estimator=LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,
         normalize=False),
       fit_params=None, iid='warn', n_jobs=None,
       param_grid={'fit_intercept': ['True', 'False']},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=0)

In [11]:
y_pred = lr_grid.predict(X_valid)

In [12]:
from sklearn.metrics import mean_squared_error

In [13]:
import numpy as np

In [14]:
final_mse = mean_squared_error(y_valid, y_pred)
final_rmse = np.sqrt(final_mse)
final_rmse

4.456926419247989

In [15]:
y.mean()

22.532806324110677

In [16]:
y.std()

9.188011545278203

In [17]:
from sklearn.metrics import explained_variance_score

In [18]:
explained_variance_score(y_valid, y_pred) 

0.7123942357469233

In [19]:
print(lr_grid.best_params_)
print(lr_grid.best_estimator_)

{'fit_intercept': 'True'}
LinearRegression(copy_X=True, fit_intercept='True', n_jobs=None,
         normalize=False)


In [20]:
from sklearn.metrics import r2_score

In [21]:
print("R^2:", r2_score(y_valid, y_pred))

R^2: 0.7123354767440528


# KNN

In [31]:
from sklearn.neighbors import KNeighborsRegressor

In [32]:
k_range = range(1, 31)
weight_options = ['uniform', 'distance']
param_grid = dict(n_neighbors=k_range, weights = weight_options)

In [33]:
knn = KNeighborsRegressor()
knn_grid =  GridSearchCV(knn, param_grid, cv=10)
knn_grid.fit(X_train, y_train)



GridSearchCV(cv=10, error_score='raise-deprecating',
       estimator=KNeighborsRegressor(algorithm='auto', leaf_size=30, metric='minkowski',
          metric_params=None, n_jobs=None, n_neighbors=5, p=2,
          weights='uniform'),
       fit_params=None, iid='warn', n_jobs=None,
       param_grid={'n_neighbors': range(1, 31), 'weights': ['uniform', 'distance']},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=0)

In [34]:
y_pred_knn = knn_grid.predict(X_valid)

In [35]:
final_mse = mean_squared_error(y_valid, y_pred_knn)
final_rmse = np.sqrt(final_mse)
final_rmse

3.313128681869492

In [37]:
explained_variance_score(y_valid, y_pred_knn) 

0.850364911084809

In [39]:
print("R^2:", r2_score(y_valid, y_pred_knn))

R^2: 0.8410384880220847


# Random Forest

In [42]:
from sklearn.ensemble import RandomForestRegressor

In [46]:
forest_params = {
    'n_estimators': [5, 10, 20, 40],
    'max_depth': [1, 3, 5, 7, 9, None],
    'min_samples_leaf': [3, 5, 7, 9, 12, 16]
}

In [47]:
forest = RandomForestRegressor()
forest_grid =  GridSearchCV(forest, forest_params, cv=10)
forest_grid.fit(X_train, y_train)



GridSearchCV(cv=10, error_score='raise-deprecating',
       estimator=RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,
           max_features='auto', max_leaf_nodes=None,
           min_impurity_decrease=0.0, min_impurity_split=None,
           min_samples_leaf=1, min_samples_split=2,
           min_weight_fraction_leaf=0.0, n_estimators='warn', n_jobs=None,
           oob_score=False, random_state=None, verbose=0, warm_start=False),
       fit_params=None, iid='warn', n_jobs=None,
       param_grid={'n_estimators': [5, 10, 20, 40], 'max_depth': [1, 3, 5, 7, 9, None], 'min_samples_leaf': [3, 5, 7, 9, 12, 16]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=0)

In [48]:
y_pred_forest = knn_grid.predict(X_valid)

In [49]:
final_mse = mean_squared_error(y_valid, y_pred_forest)
final_rmse = np.sqrt(final_mse)
final_rmse

3.313128681869492

In [50]:
explained_variance_score(y_valid, y_pred_forest) 

0.850364911084809

In [51]:
print("R^2:", r2_score(y_valid, y_pred_forest))

R^2: 0.8410384880220847
