#GRID SEARCH

In [1]:
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import pandas as pd

# Load a sample dataset
data = load_iris()
X = data.data
y = data.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the model
model = SVC()

# Define the hyperparameters and the range of values to test
param_grid = {
    'C': [0.1, 1, 10, 100],  # Regularization parameter
    'gamma': ['scale', 'auto', 0.001, 0.01, 0.1, 1],  # Kernel coefficient
    'kernel': ['linear', 'rbf', 'poly']  # Kernel type
}

# Set up the Grid Search
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=5, n_jobs=-1, scoring='accuracy')

# Perform the Grid Search
grid_search.fit(X_train, y_train)

# Find the best parameters
best_params = grid_search.best_params_
print(f"Best Hyperparameters: {best_params}")

# Train the final model using the best parameters
best_model = grid_search.best_estimator_

# Make predictions on the test set
y_pred = best_model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Test Set Accuracy: {accuracy:.4f}")


Best Hyperparameters: {'C': 0.1, 'gamma': 0.1, 'kernel': 'poly'}
Test Set Accuracy: 1.0000


In [6]:
grid_search.cv_results_.keys()

dict_keys(['mean_fit_time', 'std_fit_time', 'mean_score_time', 'std_score_time', 'param_C', 'param_gamma', 'param_kernel', 'params', 'split0_test_score', 'split1_test_score', 'split2_test_score', 'split3_test_score', 'split4_test_score', 'mean_test_score', 'std_test_score', 'rank_test_score'])

In [2]:


# Get all the combinations tried and their corresponding results
results = pd.DataFrame(grid_search.cv_results_)

# Display the columns of interest
results1 = results[['param_C', 'param_gamma', 'param_kernel', 'mean_test_score', 'std_test_score', 'rank_test_score']]

# Sort by the best score
results1 = results1.sort_values(by='rank_test_score')

# Show the DataFrame
display(results1)


Unnamed: 0,param_C,param_gamma,param_kernel,mean_test_score,std_test_score,rank_test_score
14,0.1,0.1,poly,0.958333,0.064550,1
27,1.0,0.01,linear,0.958333,0.045644,1
21,1.0,auto,linear,0.958333,0.045644,1
30,1.0,0.1,linear,0.958333,0.045644,1
24,1.0,0.001,linear,0.958333,0.045644,1
...,...,...,...,...,...,...
62,100.0,0.001,poly,0.425000,0.183333,68
11,0.1,0.01,poly,0.425000,0.183333,68
44,10.0,0.001,poly,0.425000,0.183333,68
8,0.1,0.001,poly,0.425000,0.183333,68


You can access the results of all the combinations tried by `GridSearchCV` through the `cv_results_` attribute, which contains detailed information about each combination of hyperparameters. Here's how you can extract this information and display it in a `pandas` DataFrame:


### Explanation:

- **`cv_results_`:** This attribute of `GridSearchCV` contains all the details of the grid search, including the parameters tried and the corresponding scores.
- **`pd.DataFrame(grid_search.cv_results_)`:** Converts the dictionary of results into a `pandas` DataFrame.
- **`param_C`, `param_gamma`, `param_kernel`:** These columns represent the hyperparameters that were tried.
- **`mean_test_score`:** The mean cross-validated score for each combination.
- **`std_test_score`:** The standard deviation of the test score across the cross-validation folds.
- **`rank_test_score`:** The rank of the test scores, where 1 is the best.

### Output:
The DataFrame will contain all the tried combinations, sorted by the rank of their test scores, allowing you to see which combinations were the most effective.

This approach gives you an in-depth view of how each combination performed, which can be very useful for understanding the model's behavior and for further fine-tuning.

In [3]:
results .columns

Index(['mean_fit_time', 'std_fit_time', 'mean_score_time', 'std_score_time',
       'param_C', 'param_gamma', 'param_kernel', 'params', 'split0_test_score',
       'split1_test_score', 'split2_test_score', 'split3_test_score',
       'split4_test_score', 'mean_test_score', 'std_test_score',
       'rank_test_score'],
      dtype='object')

In [4]:
grid_search.cv_results_

{'mean_fit_time': array([0.00311074, 0.00439115, 0.00452065, 0.00272293, 0.00382242,
        0.00189991, 0.00310678, 0.00262761, 0.02065444, 0.00969596,
        0.01744795, 0.00730362, 0.01295004, 0.00926447, 0.00729775,
        0.00874858, 0.01040692, 0.02494998, 0.00200286, 0.00787849,
        0.00952716, 0.00498495, 0.0100893 , 0.00732288, 0.00194845,
        0.00391531, 0.01164021, 0.0070744 , 0.01814137, 0.01021667,
        0.01019158, 0.01378555, 0.01188979, 0.01292639, 0.01334238,
        0.09118724, 0.02401228, 0.00861535, 0.0095233 , 0.00865583,
        0.01293964, 0.02219577, 0.01087885, 0.00610366, 0.00230236,
        0.00345597, 0.00418835, 0.00245185, 0.00712223, 0.00548472,
        0.00994854, 0.00982628, 0.00322208, 0.17954841, 0.00342779,
        0.00550566, 0.01091542, 0.0124259 , 0.01218452, 0.11045589,
        0.00475445, 0.01339993, 0.00854192, 0.00271707, 0.01209188,
        0.0083025 , 0.00239744, 0.00685334, 0.01410255, 0.01438017,
        0.00740399, 0.92229123]