# RandomizedSearchCV y GridSearchCV

https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.RandomizedSearchCV.html

https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.GridSearchCV.html



In [1]:
import numpy as np

from sklearn.model_selection import RandomizedSearchCV     
from sklearn.model_selection import GridSearchCV


from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier

import sklearn.datasets
from sklearn.model_selection import train_test_split 
from sklearn.metrics import confusion_matrix

import matplotlib.pyplot as plt

In [2]:
N=1000  
noisy_data = sklearn.datasets.make_moons(n_samples=N, noise=0.3, random_state=17)

In [None]:
X, Y = noisy_data
print(X.shape)
print(Y.shape)

In [None]:
plt.scatter(X[:,0], X[:,1], c=Y, s=30, cmap=plt.cm.Spectral);

In [5]:
Xtrain, Xtest, Ytrain, Ytest = train_test_split(X, Y, test_size=.4, random_state=17)

# RandomizedSearchCV - Logistic Regression:

Ver: 

https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html 

In [6]:
m_lr = LogisticRegression()

In [None]:
m_lr.get_params()

In [9]:
lr_search_random_grid = {
    'C': [0.5, 1.0, 1.2, 2.0, 10, 100],
    'penalty': ['l1', 'l2', None],
    'solver': ['lbfgs', 'saga']
}

In [12]:
m_lr_search_random_grid = RandomizedSearchCV(
    estimator= m_lr,
    param_distributions= lr_search_random_grid,
    n_iter = 20,   # número de combinaciones diferentes a intentar del total de posibles n-adas. 
    scoring='accuracy',
    cv = 5,  # valor k para la partición del Cross-Validation: Ver: https://scikit-learn.org/stable/modules/model_evaluation.html#scoring-parameter 
    verbose=2,
    random_state=17, 
    n_jobs=-1,
    return_train_score=False   # Para ir calculando los scores parciales durante el entrenamiento... obviamente esto requiere más tiempo de cómputo.
    )

In [None]:
m_lr_search_random_grid.fit(Xtrain, Ytrain);

In [None]:
m_lr_search_random_grid.best_params_

In [None]:
# Predictions & Confussion matrix:
results = m_lr_search_random_grid.predict(Xtest)
cm = confusion_matrix(Ytest, results)
print(cm)

In [None]:
# Accuracy:
(cm[0,0] + cm[1,1]) / (cm[0,0] + cm[1,1] + cm[0,1] + cm[1,0])

# Grid Search with Cross Validation: Decision Tree Classifier -- Árbol de decisión simple:

https://scikit-learn.org/stable/modules/generated/sklearn.tree.DecisionTreeClassifier.html 

In [None]:
m_dtc = DecisionTreeClassifier()
m_dtc.get_params()

In [18]:
dtc_search_random_grid = {
    'ccp_alpha': [0.0, 0.2, 0.5, 0.9, 1.0, 1.5],
    'criterion': ['gini', 'entropy'],
    'max_depth': [1,2,5,10],
    'min_samples_split': [10,20,50]
}

In [None]:
m_dtc_search_random_grid = RandomizedSearchCV(
    estimator= m_dtc,
    param_distributions= dtc_search_random_grid,
    n_iter = 20,   # número de combinaciones diferentes a intentar del total de posibles n-adas. 
    scoring='accuracy',
    cv = 5,  # valor k para la partición del Cross-Validation: Ver: https://scikit-learn.org/stable/modules/model_evaluation.html#scoring-parameter 
    verbose=2,
    random_state=17, 
    n_jobs=-1,
    return_train_score=True   # Para ir calculando los scores parciales durante el entrenamiento... obviamente esto requiere más tiempo de cómputo y lo usas spongo como para ver Learning_Curves o algo parecido.
    )

In [19]:
m_dtc_grid_search = GridSearchCV(
    estimator= m_dtc,
    param_grid = dtc_search_random_grid,
    cv = 5,
    n_jobs= -1,
    verbose= 2
)

In [None]:
#m_dtc_search_random_grid.fit(Xtrain, Ytrain);
m_dtc_grid_search.fit(Xtrain, Ytrain);

In [None]:
m_dtc_grid_search.best_params_

In [None]:
results = m_dtc_grid_search.predict(Xtest)
cm = confusion_matrix(Ytest, results)
print(cm)

In [None]:
(cm[0,0] + cm[1,1]) / (cm[0,0] + cm[1,1] + cm[0,1] + cm[1,0])