In [1]:
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(rc = {"axes.titlesize": 20, "axes.labelsize": 15, "legend.fontsize": 15, "lines.linewidth": 3, "figure.figsize": (9, 4)})

In [2]:
from sklearn import datasets
from sklearn.model_selection import train_test_split

# Load dataset (classification) and divide into training and testing.
X, y = datasets.load_digits(return_X_y = True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 1)

In [3]:
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier

import warnings
warnings.filterwarnings("ignore") # Hide some annoying warnings.

# Enumerate (model, hyper-parameter) pair into cv.
cv_candidate_list = []

for kernel in ["sigmoid", "rbf"]:
    for C in [1, 10]:
        cv_candidate_list.append( SVC(kernel = kernel, C = C) )

for max_iter in [100, 500, 2000]:
    cv_candidate_list.append( LogisticRegression(max_iter = max_iter) )

for activation in ["logistic", "tanh", "relu"]:
    for solver in ["lbfgs", "sgd", "adam"]:
        cv_candidate_list.append( MLPClassifier(activation = activation, solver = solver) )

In [None]:
from sklearn.model_selection import cross_val_score    # Only return scores.
# from sklearn.model_selection import cross_validate    # Return time as well.

scoring = "accuracy"
scoring = "f1_macro"

cv_score_list, training_score_list, testing_score_list = [], [], []
for cv_candidate in tqdm(cv_candidate_list):
    cv_score_list.append( np.average(cross_val_score( cv_candidate, X_train, y_train, cv = 5, scoring = scoring)) )
    
    model = cv_candidate.fit(X_train, y_train)
    training_score_list.append( model.score(X_train, y_train) )
    testing_score_list.append( model.score(X_test, y_test) )

 50%|█████████████████████████████████████████▌                                         | 8/16 [00:16<00:24,  3.09s/it]

In [None]:
# Plot the score between training and testing.
model_number_list = list(range(1, len(cv_candidate_list) + 1))
plt.plot(model_number_list, training_score_list, color = "red",  label = "training")
plt.plot(model_number_list, testing_score_list, color = "blue", label = "testing")
plt.xlabel("Model index")
plt.ylabel("Performance")
plt.title("training_score vs testing_score")
plt.legend(loc = "lower right")
plt.xticks(model_number_list)
plt.show()

In [None]:
# Plot the score between cv and testing.
model_number_list = list(range(1, len(cv_candidate_list) + 1))
plt.plot(model_number_list, cv_score_list, color = "red",  label = "cv")
plt.plot(model_number_list, testing_score_list, color = "blue", label = "testing")
plt.xlabel("Model index")
plt.ylabel("Performance")
plt.title("CV_score vs testing_score")
plt.legend(loc = "lower right")
plt.xticks(model_number_list)
plt.show()