# Harish Practise: Grid Search CV

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split

# Generating a synthetic dataset

In [2]:
X, y = make_classification(
    n_features=10, 
    n_samples=1000, 
    n_informative=8,
    n_redundant=2,
    n_repeated=0,
    n_classes=2, 
    random_state=42
)

### Method 1: Evaluate the model using train, test split and tune parameters by trial and error

In [6]:
from sklearn.metrics import classification_report
from sklearn.tree import DecisionTreeClassifier

# Splitting Data into Training and Test Set

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.25,random_state=42)

In [8]:
model = DecisionTreeClassifier(criterion="entropy", max_depth=10) #criteria: "gini" or "entropy", max_depth=5 or 10
model.fit(X_train,y_train)

In [9]:
y_pred = model.predict(X_test)
report = classification_report(y_test, y_pred)
print(report)

              precision    recall  f1-score   support

           0       0.84      0.75      0.79       130
           1       0.76      0.84      0.80       120

    accuracy                           0.80       250
   macro avg       0.80      0.80      0.80       250
weighted avg       0.80      0.80      0.80       250



### Method 2: Use K Fold Cross Validation

In [10]:
from sklearn.model_selection import cross_val_score

cross_val_score(DecisionTreeClassifier(criterion='gini', max_depth=5), X, y, cv=5)

array([0.78 , 0.8  , 0.74 , 0.79 , 0.775])

In [11]:
cross_val_score(DecisionTreeClassifier(criterion="entropy", max_depth=5), X, y, cv=5)

array([0.765, 0.775, 0.745, 0.815, 0.79 ])

In [12]:
criterion = ["gini","entropy"]
max_depth = [5,10,15]

avg_scores = {}

for c in criterion:
    for d in max_depth:
        clf = DecisionTreeClassifier(criterion=c, max_depth=d)
        score_list = cross_val_score(clf,X,y, cv =5)
        avg_scores[c + "_" + str(d)] = np.average(score_list)

avg_scores

{'gini_5': 0.779,
 'gini_10': 0.785,
 'gini_15': 0.794,
 'entropy_5': 0.7789999999999999,
 'entropy_10': 0.786,
 'entropy_15': 0.8100000000000002}

### Method 3: Use GridSearchCV

In [14]:
from sklearn.model_selection import GridSearchCV

clf = GridSearchCV(
    DecisionTreeClassifier(),
    {'criterion': ["gini","entropy"],'max_depth':[5,10,15]},
    cv=5,
    return_train_score=False
)
clf.fit(X,y)
clf.cv_results_

{'mean_fit_time': array([0.00578647, 0.00937061, 0.0091754 , 0.00816126, 0.01296439,
        0.01416092]),
 'std_fit_time': array([0.00039027, 0.00184814, 0.00116291, 0.00073358, 0.00065148,
        0.00039894]),
 'mean_score_time': array([0.00120363, 0.00100102, 0.00080447, 0.0008028 , 0.0009984 ,
        0.0009984 ]),
 'std_score_time': array([4.05057227e-04, 1.56040179e-05, 4.02425346e-04, 4.01516294e-04,
        5.56082906e-07, 5.76164530e-07]),
 'param_criterion': masked_array(data=['gini', 'gini', 'gini', 'entropy', 'entropy',
                    'entropy'],
              mask=[False, False, False, False, False, False],
        fill_value='?',
             dtype=object),
 'param_max_depth': masked_array(data=[5, 10, 15, 5, 10, 15],
              mask=[False, False, False, False, False, False],
        fill_value=999999),
 'params': [{'criterion': 'gini', 'max_depth': 5},
  {'criterion': 'gini', 'max_depth': 10},
  {'criterion': 'gini', 'max_depth': 15},
  {'criterion': 'entropy',

In [16]:
df = pd.DataFrame(clf.cv_results_)
df

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_criterion,param_max_depth,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.005786,0.00039,0.001204,0.0004050572,gini,5,"{'criterion': 'gini', 'max_depth': 5}",0.78,0.795,0.75,0.805,0.775,0.781,0.018815,5
1,0.009371,0.001848,0.001001,1.560402e-05,gini,10,"{'criterion': 'gini', 'max_depth': 10}",0.8,0.745,0.805,0.795,0.81,0.791,0.023537,2
2,0.009175,0.001163,0.000804,0.0004024253,gini,15,"{'criterion': 'gini', 'max_depth': 15}",0.775,0.74,0.82,0.8,0.815,0.79,0.029496,4
3,0.008161,0.000734,0.000803,0.0004015163,entropy,5,"{'criterion': 'entropy', 'max_depth': 5}",0.765,0.78,0.755,0.815,0.78,0.779,0.020347,6
4,0.012964,0.000651,0.000998,5.560829e-07,entropy,10,"{'criterion': 'entropy', 'max_depth': 10}",0.77,0.81,0.81,0.77,0.795,0.791,0.018,2
5,0.014161,0.000399,0.000998,5.761645e-07,entropy,15,"{'criterion': 'entropy', 'max_depth': 15}",0.76,0.8,0.83,0.78,0.845,0.803,0.031241,1


In [17]:
df[["param_criterion", "param_max_depth", "mean_test_score"]]

Unnamed: 0,param_criterion,param_max_depth,mean_test_score
0,gini,5,0.781
1,gini,10,0.791
2,gini,15,0.79
3,entropy,5,0.779
4,entropy,10,0.791
5,entropy,15,0.803


In [18]:
clf.best_params_

{'criterion': 'entropy', 'max_depth': 15}

In [19]:
model = clf.best_estimator_
model

### Now let's try different models with different parameters

In [21]:
from sklearn import svm 

model_params = {
    'decision_tree': {
        'model': DecisionTreeClassifier(),
        'params' : {
            'criterion': ["gini","entropy"],
            'max_depth': [5,10,15]
        }

    },
    'svm':{
        'model': svm.SVC(gamma='auto'),
        'params':{
            'C': [1,10,20],
            'kernel': ['rbf','linear']
        }
    }
}

scores = []

for key, val in model_params.items():
    clf = GridSearchCV(
        val['model'],
        val['params'],
        cv=5,
        return_train_score=False

    )

    clf.fit(X,y)
    scores.append({
        'model':key,
        'model_score':clf.best_score_,
        'best_param':clf.best_params_
    })

scores          

[{'model': 'decision_tree',
  'model_score': 0.8109999999999999,
  'best_param': {'criterion': 'entropy', 'max_depth': 15}},
 {'model': 'svm',
  'model_score': 0.9260000000000002,
  'best_param': {'C': 1, 'kernel': 'rbf'}}]

In [24]:
df = pd.DataFrame(scores)
df

Unnamed: 0,model,model_score,best_param
0,decision_tree,0.811,"{'criterion': 'entropy', 'max_depth': 15}"
1,svm,0.926,"{'C': 1, 'kernel': 'rbf'}"
