`Grid Search is good when we work with a small number of hyperparameters. However, if the number of parameters to consider is particularly high and the magnitudes of influence are imbalanced, the better choice is to use the Random Search.`
# GridSearchCV

In [2]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt

In [4]:
data = pd.read_csv('https://raw.githubusercontent.com/krishnaik06/GRIDSearchCV/master/Advertising_data.csv')
X = data.iloc[:,[2,3]].values
y = data.iloc[:,4].values

In [5]:
data.head()

Unnamed: 0,User ID,Gender,Age,EstimatedSalary,Purchased
0,15624510,Male,19.0,19000.0,0
1,15810944,Male,35.0,20000.0,0
2,15668575,Female,26.0,43000.0,0
3,15603246,Female,27.0,57000.0,0
4,15804002,Male,19.0,76000.0,0


In [6]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train,y_test = train_test_split(X,y,test_size=0.30, random_state=5)

In [7]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.fit_transform(X_test) 

In [8]:
from sklearn.svm import SVC
clf = SVC(kernel='linear', random_state=0)
clf.fit(X_train,y_train)

SVC(kernel='linear', random_state=0)

In [30]:
y_pred = clf.predict(X_test)

In [31]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test,y_pred)

In [32]:
cm

array([[72,  6],
       [11, 31]])

In [33]:
accuracy_score(y_test, y_pred)

0.8583333333333333

In [20]:
from sklearn.model_selection import GridSearchCV
parameters = [{'C': [1, 10, 100, 1000], 'kernel': ['linear']},
              {'C': [1, 10, 100, 1000], 'kernel': ['rbf'], 'gamma': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]}]



grid_search = GridSearchCV(estimator = clf,
                           param_grid = parameters,
                           scoring = 'accuracy',
                           cv = 10,
                           n_jobs = -1)

grid_search = grid_search.fit(X_train, y_train)

In [21]:
grid_search.best_estimator_

SVC(C=10, gamma=0.4, random_state=0)

In [22]:
grid_search.best_score_

0.9071428571428571

In [23]:
grid_search.best_params_

{'C': 10, 'gamma': 0.4, 'kernel': 'rbf'}

In [34]:
y_pred = grid_search.predict(X_test)

In [35]:
accuracy_score(y_test, y_pred)

0.925

In [36]:
cm = confusion_matrix(y_test,y_pred)
cm

array([[72,  6],
       [ 3, 39]])

# RandomizedSearchCV

In [37]:
df = pd.read_csv('https://raw.githubusercontent.com/krishnaik06/RandomizedSearchCV/master/Social_Network_Ads.csv')

In [38]:
df.head()

Unnamed: 0,User ID,Gender,Age,EstimatedSalary,Purchased
0,15624510,Male,19,19000,0
1,15810944,Male,35,20000,0
2,15668575,Female,26,43000,0
3,15603246,Female,27,57000,0
4,15804002,Male,19,76000,0


In [39]:
X = df.iloc[:,[2,3]].values
y = df.iloc[:,4].values

In [53]:
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.25, random_state=0)

In [54]:
X_train = sc.fit_transform(X_train)
X_test = sc.fit_transform(X_test)

In [55]:
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier(n_estimators=10, criterion='entropy', random_state=50)
rf.fit(x_train,y_train)

RandomForestClassifier(criterion='entropy', n_estimators=10, random_state=50)

In [56]:
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import randint

In [57]:
est = RandomForestClassifier( n_jobs=-1)
rf_p_dist={'max_depth':[3,5,10,None],
              'n_estimators':[10,100,200,300,400,500],
              'max_features':randint(1,3),
               'criterion':['gini','entropy'],
               'bootstrap':[True,False],
               'min_samples_leaf':randint(1,4),
              }

In [58]:
def hypertuning_rscv(est, p_distr, nbr_iter,X,y):
    rdmsearch = RandomizedSearchCV(est, param_distributions=p_distr,
                                  n_jobs=-1, n_iter=nbr_iter, cv=9)
    #CV = Cross-Validation ( here using Stratified KFold CV)
    rdmsearch.fit(X,y)
    ht_params = rdmsearch.best_params_
    ht_score = rdmsearch.best_score_
    return ht_params, ht_score

rf_parameters, rf_ht_score = hypertuning_rscv(est, rf_p_dist, 40, X, y)

In [59]:
rf_parameters, rf_ht_score

({'bootstrap': True,
  'criterion': 'entropy',
  'max_depth': 3,
  'max_features': 2,
  'min_samples_leaf': 1,
  'n_estimators': 200},
 0.9120089786756453)

In [60]:
claasifier=RandomForestClassifier(n_jobs=-1, **rf_parameters)
claasifier.fit(X_train,y_train)

RandomForestClassifier(criterion='entropy', max_depth=3, max_features=2,
                       n_estimators=200, n_jobs=-1)

In [61]:
y_pred = claasifier.predict(X_test)

In [62]:
from sklearn.metrics import confusion_matrix,accuracy_score
cm = confusion_matrix(y_test, y_pred)

accuracy_score=accuracy_score(y_test,y_pred)

#claasifier=RandomForestClassifier(n_jobs=-1, n_estimators=300,bootstrap= True,criterion='entropy',max_depth=3,max_features=2,min_samples_leaf= 3)

## Cross Validation good for selecting models
from sklearn.model_selection import cross_val_score

cross_val=cross_val_score(claasifier,X,y,cv=10,scoring='accuracy').mean()

TIP:
    param = {‘max_depth: [6,9, None],
    
         ‘n_estimators’:[50, 70, 100, 150], 
         
          'max_features': randint(1,6),
          
          'criterion' : ['gini', 'entropy'],
          
          'bootstrap':[True, False],
          
          'mln_samples_leaf': randint(1,4)}