# Grid Search and Grid Search CV

In [1]:
import numpy as np
import random
import matplotlib.pyplot as plt
import pandas as pd
import math
import scipy
%matplotlib inline
plt.style.use('seaborn')

In [2]:
import numpy as np
import sklearn
import matplotlib
import pandas as pd
import sys
libraries = (('Matplotlib', matplotlib), ('Numpy', np), ('Pandas', pd))

print("Python Version:", sys.version, '\n')
for lib in libraries:
    print('{0} Version: {1}'.format(lib[0], lib[1].__version__))

Python Version: 3.6.2 |Anaconda custom (64-bit)| (default, Sep 21 2017, 18:29:43) 
[GCC 4.2.1 Compatible Clang 4.0.1 (tags/RELEASE_401/final)] 

Matplotlib Version: 2.0.2
Numpy Version: 1.12.1
Pandas Version: 0.20.3


In [50]:
from itertools import product

class grid_search():
    
    def __init__(self, model_name, param_grid):
        self._base_model = str(model_name).replace(')','')
        self._param_grid = param_grid
        self.models = self.get_models()
        
    def get_models(self):
        params = []
        order = []
        for key, value in self._param_grid.items():
            order.append(key)
            params.append(value)
        options = list(product(*params))

        command_list = []
        for option in options:
            cmd = self._base_model
            if cmd[-1] != '(':
                cmd+=', '
            for i,j in zip(order, option):
                cmd += str(i)+"="+str(j)+", "
            command_list.append(cmd[:-2]+')')
        return command_list
    
    def fit(self, X, y):
        results = []
        for model_name in self.models:
            model = eval(model_name)
            model.fit(X,y)
            s = model.score(X,y)
            results.append([model, s, model_name])
        self.all_results = sorted(results, key=lambda x: x[1], reverse=True)
        self.best_model = self.all_results[0][0]
        self.best_score = self.all_results[0][1]
        
    def print_results(self):
        if self.all_results:
            print("Model    |    Score\n--------------------\n")
            for result in self.all_results:
                print(result[2], "   |   ", result[1],"\n")

In [51]:
import sys 
sys.path.append('../../modules')
from random_forest_classifier import random_forest_classifier

param_grid = {"n_trees": [1,10,20],"max_depth":[1,5,10]}

gs = grid_search('random_forest_classifier(n_feat=7)',param_grid)
gs.models

['random_forest_classifier(n_feat=7, n_trees=1, max_depth=1)',
 'random_forest_classifier(n_feat=7, n_trees=1, max_depth=5)',
 'random_forest_classifier(n_feat=7, n_trees=1, max_depth=10)',
 'random_forest_classifier(n_feat=7, n_trees=10, max_depth=1)',
 'random_forest_classifier(n_feat=7, n_trees=10, max_depth=5)',
 'random_forest_classifier(n_feat=7, n_trees=10, max_depth=10)',
 'random_forest_classifier(n_feat=7, n_trees=20, max_depth=1)',
 'random_forest_classifier(n_feat=7, n_trees=20, max_depth=5)',
 'random_forest_classifier(n_feat=7, n_trees=20, max_depth=10)']

In [5]:
from sklearn.datasets import load_iris
X = load_iris().data
y = load_iris().target

In [19]:
gs.fit(X,y)

In [20]:
gs.print_results()

Model    |    Score
--------------------

random_forest_classifier(n_trees=10, max_depth=10)    |    1.0 

random_forest_classifier(n_trees=20, max_depth=10)    |    0.9933333333333333 

random_forest_classifier(n_trees=10, max_depth=5)    |    0.9866666666666667 

random_forest_classifier(n_trees=20, max_depth=5)    |    0.9866666666666667 

random_forest_classifier(n_trees=1, max_depth=10)    |    0.9733333333333334 

random_forest_classifier(n_trees=1, max_depth=5)    |    0.9666666666666667 

random_forest_classifier(n_trees=10, max_depth=1)    |    0.66 

random_forest_classifier(n_trees=20, max_depth=1)    |    0.66 

random_forest_classifier(n_trees=1, max_depth=1)    |    0.6533333333333333 



In [54]:
from itertools import product
import sys 
sys.path.append('../../modules')
from data_splitting import cross_val

class grid_search_cv():
    
    def __init__(self, model_name, param_grid={}, k=5):
        self._base_model = str(model_name).replace(')','')
        self._param_grid = param_grid
        self.models = self.get_models()
        self.k = k
        
    def get_models(self):
        params = []
        order = []
        for key, value in self._param_grid.items():
            order.append(key)
            params.append(value)
        options = list(product(*params))

        command_list = []
        for option in options:
            cmd = self._base_model
            if cmd[-1] != '(':
                cmd+=', '
            for i,j in zip(order, option):
                cmd += str(i)+"="+str(j)+", "
            command_list.append(cmd[:-2]+')')
        return command_list
    
    def fit(self, X, y):
        results = []
        for model_name in self.models:
            model = eval(model_name)
            cv = cross_val()
            cv.cross_validation_scores(model, X, y, self.k)
            results.append([model, cv.score_folds, model_name])
        self.all_results = sorted(results, key=lambda x: x[1], reverse=True)
        self.best_model = self.all_results[0][0]
        self.best_score = self.all_results[0][1]
        
    def print_results(self, coefs=False, mean=False):
        if self.all_results:
            print("Model    |    Scores\n--------------------")
            for result in self.all_results:
                if mean:
                    print(result[2], "   |   ", np.mean(result[1]))
                else:
                    print(result[2], "   |   ", result[1])
                if coefs:
                    try:
                        print("Coefs: ", result[0].coefs_)
                    except AttributeError:
                        print("No Coefficients in model!")    
                print()

In [60]:
param_grid = {"n_trees": [1,10],"max_depth":[1,5]}
gs = grid_search_cv('random_forest_classifier(mode="rfnode")',param_grid)
gs.fit(X,y)

In [62]:
gs.print_results(mean=False, coefs=False)

Model    |    Scores
--------------------
random_forest_classifier(mode="rfnode", n_trees=1, max_depth=5)    |    [0.96, 1.0, 1.0, 1.0, 0.9666666666666667]

random_forest_classifier(mode="rfnode", n_trees=10, max_depth=5)    |    [0.96, 0.9642857142857143, 0.9629629629629629, 1.0, 1.0]

random_forest_classifier(mode="rfnode", n_trees=10, max_depth=1)    |    [0.56, 0.6071428571428571, 0.9629629629629629, 1.0, 0.9]

random_forest_classifier(mode="rfnode", n_trees=1, max_depth=1)    |    [0.52, 0.5714285714285714, 0.7777777777777778, 0.675, 0.7]

