# Grid Search

#### Import modules

In [0]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

## Import dataset

In [0]:
url = "https://raw.githubusercontent.com/h4r1c0t/WildCodeSchool/master/Odyssey/Dataset/titanic.csv"
df_titanic = pd.read_csv(url)
df_titanic = pd.get_dummies(df_titanic, columns = ['Sex'], drop_first = True)

## Decision Tree Classification

### Train test split

In [0]:
X = df_titanic[['Pclass', 'Sex_male', 'Age']]
y = df_titanic['Survived']

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=2, train_size = 0.75)

### Model fiting 

In [4]:
DTC = DecisionTreeClassifier()
DTCmodel = DTC.fit(X_train, y_train)

y_pred = DTCmodel.predict(X_test)

print("Score for the Test dataset :", round(accuracy_score(y_pred, y_test), 5))

Score for the Test dataset : 0.81532


## Grid search

In [5]:
parameters = {'max_depth':[2, 3, 5, 10], 'min_samples_leaf':[1, 3, 5, 10]}

gsearch = GridSearchCV(DTC, parameters, cv = 5, scoring = 'accuracy', verbose = 3)
gsearch.fit(X_train, y_train)

Fitting 5 folds for each of 16 candidates, totalling 80 fits
[CV] max_depth=2, min_samples_leaf=1 .................................
[CV] ..... max_depth=2, min_samples_leaf=1, score=0.759, total=   0.0s
[CV] max_depth=2, min_samples_leaf=1 .................................
[CV] ..... max_depth=2, min_samples_leaf=1, score=0.782, total=   0.0s
[CV] max_depth=2, min_samples_leaf=1 .................................
[CV] ..... max_depth=2, min_samples_leaf=1, score=0.782, total=   0.0s
[CV] max_depth=2, min_samples_leaf=1 .................................
[CV] ..... max_depth=2, min_samples_leaf=1, score=0.805, total=   0.0s
[CV] max_depth=2, min_samples_leaf=1 .................................
[CV] ..... max_depth=2, min_samples_leaf=1, score=0.782, total=   0.0s
[CV] max_depth=2, min_samples_leaf=3 .................................
[CV] ..... max_depth=2, min_samples_leaf=3, score=0.759, total=   0.0s
[CV] max_depth=2, min_samples_leaf=3 .................................
[CV] ..... max_d

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s


[CV] ..... max_depth=5, min_samples_leaf=3, score=0.789, total=   0.0s
[CV] max_depth=5, min_samples_leaf=3 .................................
[CV] ..... max_depth=5, min_samples_leaf=3, score=0.812, total=   0.0s
[CV] max_depth=5, min_samples_leaf=3 .................................
[CV] ..... max_depth=5, min_samples_leaf=3, score=0.820, total=   0.0s
[CV] max_depth=5, min_samples_leaf=3 .................................
[CV] ..... max_depth=5, min_samples_leaf=3, score=0.812, total=   0.0s
[CV] max_depth=5, min_samples_leaf=3 .................................
[CV] ..... max_depth=5, min_samples_leaf=3, score=0.812, total=   0.0s
[CV] max_depth=5, min_samples_leaf=5 .................................
[CV] ..... max_depth=5, min_samples_leaf=5, score=0.789, total=   0.0s
[CV] max_depth=5, min_samples_leaf=5 .................................
[CV] ..... max_depth=5, min_samples_leaf=5, score=0.827, total=   0.0s
[CV] max_depth=5, min_samples_leaf=5 .................................
[CV] .

[Parallel(n_jobs=1)]: Done  80 out of  80 | elapsed:    0.4s finished


GridSearchCV(cv=5, error_score=nan,
             estimator=DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None,
                                              criterion='gini', max_depth=None,
                                              max_features=None,
                                              max_leaf_nodes=None,
                                              min_impurity_decrease=0.0,
                                              min_impurity_split=None,
                                              min_samples_leaf=1,
                                              min_samples_split=2,
                                              min_weight_fraction_leaf=0.0,
                                              presort='deprecated',
                                              random_state=None,
                                              splitter='best'),
             iid='deprecated', n_jobs=None,
             param_grid={'max_depth': [2, 3, 5, 10],
                         

**Best accuracy score**: 0.8135338345864662

In [6]:
gsearch.best_score_

0.8135338345864662

**Best parameters**: {'max_depth': 5, 'min_samples_leaf': 5}

In [7]:
gsearch.best_params_

{'max_depth': 5, 'min_samples_leaf': 5}

## Random Search

In [8]:
parameters = {'max_depth':np.arange(1, 10), 'min_samples_leaf':np.arange(2, 10)}

rsearch = RandomizedSearchCV(DTC, parameters, n_iter=100, cv = 5, scoring = 'accuracy', verbose = 3)
rsearch.fit(X_train, y_train)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s


Fitting 5 folds for each of 72 candidates, totalling 360 fits
[CV] min_samples_leaf=2, max_depth=1 .................................
[CV] ..... min_samples_leaf=2, max_depth=1, score=0.805, total=   0.0s
[CV] min_samples_leaf=2, max_depth=1 .................................
[CV] ..... min_samples_leaf=2, max_depth=1, score=0.782, total=   0.0s
[CV] min_samples_leaf=2, max_depth=1 .................................
[CV] ..... min_samples_leaf=2, max_depth=1, score=0.767, total=   0.0s
[CV] min_samples_leaf=2, max_depth=1 .................................
[CV] ..... min_samples_leaf=2, max_depth=1, score=0.789, total=   0.0s
[CV] min_samples_leaf=2, max_depth=1 .................................
[CV] ..... min_samples_leaf=2, max_depth=1, score=0.782, total=   0.0s
[CV] min_samples_leaf=3, max_depth=1 .................................
[CV] ..... min_samples_leaf=3, max_depth=1, score=0.805, total=   0.0s
[CV] min_samples_leaf=3, max_depth=1 .................................
[CV] ..... min_

[Parallel(n_jobs=1)]: Done 360 out of 360 | elapsed:    1.6s finished


RandomizedSearchCV(cv=5, error_score=nan,
                   estimator=DecisionTreeClassifier(ccp_alpha=0.0,
                                                    class_weight=None,
                                                    criterion='gini',
                                                    max_depth=None,
                                                    max_features=None,
                                                    max_leaf_nodes=None,
                                                    min_impurity_decrease=0.0,
                                                    min_impurity_split=None,
                                                    min_samples_leaf=1,
                                                    min_samples_split=2,
                                                    min_weight_fraction_leaf=0.0,
                                                    presort='deprecated',
                                                    random_state=None,
          

**Best accuracy score**: 0.825563909774436

In [9]:
rsearch.best_score_

0.825563909774436

**Best parameters**: {'max_depth': 7, 'min_samples_leaf': 7}

In [10]:
rsearch.best_params_

{'max_depth': 7, 'min_samples_leaf': 7}