In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.datasets import fetch_openml
from sklearn.model_selection import StratifiedShuffleSplit

from sklearn.metrics import precision_recall_fscore_support

data= fetch_openml('mnist_784', version=1, parser='auto')  # data from https://www.openml.org/d/554
dfData = pd.DataFrame(np.c_[data["data"], data["target"]],
                      columns = data["feature_names"] + ["target"])

In [None]:
stratSplit = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=0)

for train_index, test_index in stratSplit.split(dfData[data["feature_names"]], dfData["target"]):
    X_train = dfData[data["feature_names"]].iloc[train_index]
    X_test = dfData[data["feature_names"]].iloc[test_index]
    
    y_train = dfData["target"].iloc[train_index]
    y_test = dfData["target"].iloc[test_index]

In [5]:
from sklearn.linear_model import LogisticRegression
logReg_clf = LogisticRegression()

In [None]:
from sklearn.model_selection import cross_validate

results = cross_validate(logReg_clf,
                         X = X_train,
                         y = y_train,
                         scoring = ["accuracy","roc_auc_ovr_weighted","f1_macro"],
                         cv = 5,  # If our estimator is classifier automatically do stratified CV
                         n_jobs = 1,  # Num CPUs to use for calculation, -1 means all
                         verbose = True,  # Output status updates
                         return_train_score = True,
                         return_estimator = True)

In [None]:
print(np.mean(results["test_f1_macro"]))
# Variation in our predictions
print(np.std(results["test_f1_macro"]))

In [8]:
logReg_clf = LogisticRegression(tol = 0.0001,  # requirement for convergence
                                fit_intercept=True,  # should a bias be added to the decision function?
                                class_weight = {"1": 1, "2": 2, "3": 3, "4": 4,
                                                "5": 5, "6": 6, "7": 7, "8": 8,
                                                "9": 9, "0": 0},  # balanced, None
                                max_iter = 100,  # Maximum number of iterations to do before stopping if not converged
                                solver = "lbfgs",  # Algorithm for optimization
                                multi_class="auto",  # Multiclass process to use
                                verbose = False,  # Output status updates
                                warm_start = False,  # Save training states
                                n_jobs=1)  # Number of CPUs to use for parallel training across multiple classes