In [1]:
from sklearn import datasets, linear_model
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.multiclass import OneVsRestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, classification_report
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

In [2]:
df = datasets.load_digits(return_X_y=False, as_frame=True)
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
# X_train.shape, y_train.shape
percentage = 0.8

# Get the index of the records that will be used for training. i.e. 80% of the data for training. The remaining will be used for testing
index_ = df.data.sample(frac=percentage, random_state=76).index
X_train = df.data.iloc[index_]
y_train = df.target.iloc[index_]

X_test = df.data.drop(index_)
y_test = df.target.drop(index_)

X_train.shape, y_train.shape, X_test.shape, y_test.shape

((1438, 64), (1438,), (359, 64), (359,))

In [4]:
C = 10
kernel = 1.0 * RBF([1.0, 1.0])  # for GPC

classifiers = {
    "L1 logistic": linear_model.LogisticRegression(C=C, penalty="l1", solver="saga", max_iter=10000),
    "L2 logistic (Multinomial)": linear_model.LogisticRegression(C=C, penalty="l2", solver="saga", max_iter=10000),
    "L2 logistic (OvR)": OneVsRestClassifier(linear_model.LogisticRegression(C=C, penalty="l2", solver="saga", max_iter=10000)),
    "Linear SVC": SVC(kernel="linear", C=C, probability=True, random_state=0),
    "Ridge Classifier": linear_model.RidgeClassifier(),
    "SGD Classifier": linear_model.SGDClassifier(max_iter=1000, tol=1e-3, loss="hinge", penalty="l2")
}

for classifier_idx, (name, model) in enumerate(classifiers.items()):
    model.fit(X_train, y_train)

    y_predicted = model.predict(X_test)

    score = accuracy_score(y_test, y_predicted)
    print("{} : {:.2f}%".format(name, score*100))

    # Calculate precision and recall
    precision_micro = precision_score(y_test, y_predicted, average='micro')
    recall_micro = recall_score(y_test, y_predicted, average='micro')

    precision_macro = precision_score(y_test, y_predicted, average='macro')
    recall_macro = recall_score(y_test, y_predicted, average='macro')

    precision_weighted = precision_score(y_test, y_predicted, average='weighted')
    recall_weighted = recall_score(y_test, y_predicted, average='weighted')

    print(f"Micro-Averaged Precision: {precision_micro:.2f}")
    print(f"Micro-Averaged Recall: {recall_micro:.2f}")

    print(f"Macro-Averaged Precision: {precision_macro:.2f}")
    print(f"Macro-Averaged Recall: {recall_macro:.2f}")

    print(f"Weighted-Averaged Precision: {precision_weighted:.2f}")
    print(f"Weighted-Averaged Recall: {recall_weighted:.2f}")

    # Detailed classification report
    print("\nClassification Report:\n")
    print(classification_report(y_test, y_predicted, labels=df.target_names))
    print("\n======================================================\n")

L1 logistic : 95.26%
Micro-Averaged Precision: 0.95
Micro-Averaged Recall: 0.95
Macro-Averaged Precision: 0.95
Macro-Averaged Recall: 0.95
Weighted-Averaged Precision: 0.96
Weighted-Averaged Recall: 0.95

Classification Report:

              precision    recall  f1-score   support

           0       1.00      0.97      0.99        38
           1       0.81      0.96      0.88        26
           2       0.97      1.00      0.99        36
           3       1.00      0.90      0.95        40
           4       0.97      0.94      0.96        34
           5       0.90      0.97      0.93        29
           6       1.00      0.98      0.99        43
           7       1.00      0.95      0.97        40
           8       0.90      0.90      0.90        39
           9       0.94      0.97      0.96        34

    accuracy                           0.95       359
   macro avg       0.95      0.95      0.95       359
weighted avg       0.96      0.95      0.95       359



L2 logisti