# Exercise - Grid Search (Regression) 
- Answer the questions

In [1]:
# loading libraries
from sklearn.datasets import load_boston
import pandas as pd 
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Ridge, Lasso, ElasticNet, LinearRegression
from sklearn.model_selection import train_test_split

In [2]:
# loading boston dataset from sklearn
data = load_boston()
df = pd.DataFrame(data.data, columns=data.feature_names)

In [3]:
df['target'] = data.target

In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 506 entries, 0 to 505
Data columns (total 14 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   CRIM     506 non-null    float64
 1   ZN       506 non-null    float64
 2   INDUS    506 non-null    float64
 3   CHAS     506 non-null    float64
 4   NOX      506 non-null    float64
 5   RM       506 non-null    float64
 6   AGE      506 non-null    float64
 7   DIS      506 non-null    float64
 8   RAD      506 non-null    float64
 9   TAX      506 non-null    float64
 10  PTRATIO  506 non-null    float64
 11  B        506 non-null    float64
 12  LSTAT    506 non-null    float64
 13  target   506 non-null    float64
dtypes: float64(14)
memory usage: 55.5 KB


### Standardization of variables
- The variables involved in making the machine learning model needs to be **standardized** as the regularization methods will not work best if the variables are of different magnitudes. 

In [5]:
# Defining the variables for the machine learning model
X = df.drop(['target'], axis = 1)
y = df['target']

In [6]:
# train test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, shuffle=True)

In [7]:
# fitting data onto the test data
ss = StandardScaler()
Xs_train = ss.fit_transform(X_train)

In [8]:
Xs_test = ss.transform(X_test)

## Run Grid Search  on Elastic Net 
- Use Grid Search
- Use at least 10 values of `alpha`
- Use at least 10 values of `l1_ratio`
- Use scoring `mean_squared_error` & `r2`
- What are the best hyperpareters?

In [29]:
from sklearn.metrics import accuracy_score
from sklearn.linear_model import Ridge, Lasso, ElasticNet, LinearRegression
from sklearn.model_selection import GridSearchCV



In [59]:
regr_elas = ElasticNet(random_state=0)
regr_elas.fit(Xs_train,y_train)

ElasticNet(alpha=1.0, copy_X=True, fit_intercept=True, l1_ratio=0.5,
           max_iter=1000, normalize=False, positive=False, precompute=False,
           random_state=0, selection='cyclic', tol=0.0001, warm_start=False)

In [81]:
pred_train = regr_elas.predict(Xs_train)
#acc = accuracy_score(y_train, pred_train)
#print('accuracy', acc)

In [32]:

param_grid_elas = {
    'alpha' : [1,2, 3, 4, 5,6,7,8,18,20],
    'l1_ratio' : [ 1,3, 5, 7, 9,12,13,14,18,20],
       
}

In [33]:

scoring_param = ['r2']

gs_elas = GridSearchCV(regr_elas, 
                  param_grid_elas, 
                  cv=5,
                  scoring=scoring_param,
                  refit='r2'
                  )

In [34]:
gs_elas.fit(Xs_train, y_train)


GridSearchCV(cv=5, error_score=nan,
             estimator=ElasticNet(alpha=1.0, copy_X=True, fit_intercept=True,
                                  l1_ratio=0.5, max_iter=1000, normalize=False,
                                  positive=False, precompute=False,
                                  random_state=0, selection='cyclic',
                                  tol=0.0001, warm_start=False),
             iid='deprecated', n_jobs=None,
             param_grid={'alpha': [1, 2, 3, 4, 5, 6, 7, 8, 18, 20],
                         'l1_ratio': [1, 3, 5, 7, 9, 12, 13, 14, 18, 20]},
             pre_dispatch='2*n_jobs', refit='r2', return_train_score=False,
             scoring=['r2'], verbose=0)

In [36]:
gs_elas.best_estimator_


ElasticNet(alpha=1, copy_X=True, fit_intercept=True, l1_ratio=1, max_iter=1000,
           normalize=False, positive=False, precompute=False, random_state=0,
           selection='cyclic', tol=0.0001, warm_start=False)

In [37]:
gs_elas.best_params_


{'alpha': 1, 'l1_ratio': 1}

In [38]:
gs_elas.score(Xs_test, y_test)


0.6794562142087717

In [45]:
# mean square error
scoring_param = ['neg_mean_squared_error']


In [46]:
gs_elas_1 = GridSearchCV(regr_elas, 
                  param_grid_elas, 
                  cv=5,
                  scoring=scoring_param,
                  refit='neg_mean_squared_error'
                  )

In [47]:
gs_elas_1.fit(Xs_train, y_train)


GridSearchCV(cv=5, error_score=nan,
             estimator=ElasticNet(alpha=1.0, copy_X=True, fit_intercept=True,
                                  l1_ratio=0.5, max_iter=1000, normalize=False,
                                  positive=False, precompute=False,
                                  random_state=0, selection='cyclic',
                                  tol=0.0001, warm_start=False),
             iid='deprecated', n_jobs=None,
             param_grid={'alpha': [1, 2, 3, 4, 5, 6, 7, 8, 18, 20],
                         'l1_ratio': [1, 3, 5, 7, 9, 12, 13, 14, 18, 20]},
             pre_dispatch='2*n_jobs', refit='neg_mean_squared_error',
             return_train_score=False, scoring=['neg_mean_squared_error'],
             verbose=0)

In [48]:
gs_elas_1.best_estimator_


ElasticNet(alpha=1, copy_X=True, fit_intercept=True, l1_ratio=1, max_iter=1000,
           normalize=False, positive=False, precompute=False, random_state=0,
           selection='cyclic', tol=0.0001, warm_start=False)

In [49]:
gs_elas_1.best_params_


{'alpha': 1, 'l1_ratio': 1}

In [50]:
gs_elas_1.score(Xs_test, y_test)


-31.94470109061735

## Run Grid Search  on KNeighborsRegressor 
- Use Grid Search
- Use at least `5` different `n_neighbours` 
- use weights `distance` and `uniform`
- use metrics `manhattan`, `euclidean`
- Use scoring `mean_squared_error` & `r2`
- What are the best hyperpareters?

In [66]:
from sklearn.neighbors import KNeighborsRegressor


In [67]:
classifier = KNeighborsRegressor()
#classifier.fit(Xs_train, y_train)
#regr_elas.fit(Xs_train,y_train)

In [68]:
param_grid_knn = {
    'n_neighbors' : [3, 5, 7, 9,12,13,14,18,20],
    'weights' : [ 'distance','uniform'],
    'metric' : ['manhattan', 'euclidean']   
}

In [69]:
scoring_param = ['r2']


gs_knn = GridSearchCV(classifier, 
                  param_grid_knn, 
                  cv=5,
                  scoring=scoring_param,
                  refit='r2'
                  )

In [70]:
gs_knn.fit(Xs_train, y_train)


GridSearchCV(cv=5, error_score=nan,
             estimator=KNeighborsRegressor(algorithm='auto', leaf_size=30,
                                           metric='minkowski',
                                           metric_params=None, n_jobs=None,
                                           n_neighbors=5, p=2,
                                           weights='uniform'),
             iid='deprecated', n_jobs=None,
             param_grid={'metric': ['manhattan', 'euclidean'],
                         'n_neighbors': [3, 5, 7, 9, 12, 13, 14, 18, 20],
                         'weights': ['distance', 'uniform']},
             pre_dispatch='2*n_jobs', refit='r2', return_train_score=False,
             scoring=['r2'], verbose=0)

In [71]:
gs_knn.best_estimator_


KNeighborsRegressor(algorithm='auto', leaf_size=30, metric='manhattan',
                    metric_params=None, n_jobs=None, n_neighbors=3, p=2,
                    weights='distance')

In [72]:
gs_knn.best_params_


{'metric': 'manhattan', 'n_neighbors': 3, 'weights': 'distance'}

In [73]:
gs_knn.score(Xs_test, y_test)


0.8948253642078148

In [76]:
# means square
scoring_param = ['neg_mean_squared_error']


gs_knn_1 = GridSearchCV(classifier, 
                  param_grid_knn, 
                  cv=5,
                  scoring=scoring_param,
                  refit='neg_mean_squared_error'
                  )

In [77]:
gs_knn_1.fit(Xs_train, y_train)


GridSearchCV(cv=5, error_score=nan,
             estimator=KNeighborsRegressor(algorithm='auto', leaf_size=30,
                                           metric='minkowski',
                                           metric_params=None, n_jobs=None,
                                           n_neighbors=5, p=2,
                                           weights='uniform'),
             iid='deprecated', n_jobs=None,
             param_grid={'metric': ['manhattan', 'euclidean'],
                         'n_neighbors': [3, 5, 7, 9, 12, 13, 14, 18, 20],
                         'weights': ['distance', 'uniform']},
             pre_dispatch='2*n_jobs', refit='neg_mean_squared_error',
             return_train_score=False, scoring=['neg_mean_squared_error'],
             verbose=0)

In [78]:
gs_knn_1.best_estimator_


KNeighborsRegressor(algorithm='auto', leaf_size=30, metric='manhattan',
                    metric_params=None, n_jobs=None, n_neighbors=3, p=2,
                    weights='distance')

In [79]:
gs_knn_1.best_params_


{'metric': 'manhattan', 'n_neighbors': 3, 'weights': 'distance'}

In [80]:
gs_knn_1.score(Xs_test, y_test)


-10.481476951433207