In [1]:
import numpy as np
import pandas as pd
import openml
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split

In [3]:
# Get dataset by name
dataset = openml.datasets.get_dataset('mnist_784')

# Get the data itself as a dataframe (or otherwise)
X, y, categorical_indicator, attribute_names = dataset.get_data(
    target=dataset.default_target_attribute, dataset_format="dataframe"
)

In [4]:
# Convert to numpy array
# X = X_pd.to_numpy()
# y = y_pd.to_numpy().astype(np.int32)
# Train Split
X_train, X_test, y_train, y_test = train_test_split(X,y,
                                                    test_size=0.5,
                                                        random_state=42)

In [5]:
X_train.shape

(35000, 784)

In [6]:
X_test.shape

(35000, 784)

In [7]:
clf = SVC(kernel='poly',degree=3,
          decision_function_shape='ovr',
              class_weight='balanced',C=10)

In [8]:
pipe=Pipeline(steps=[
    ('scaler',StandardScaler()),
    ('model',clf)
])

In [9]:
pipe.fit(X_train,y_train)

In [10]:
y_pred=pipe.predict(X_test)

In [11]:
cm=confusion_matrix(y_test,y_pred)

In [12]:
cm

array([[3401,    1,   10,    2,    7,    8,   15,    1,   16,    2],
       [   0, 3876,   18,    7,    6,    0,    2,    3,    9,    6],
       [   6,    8, 3406,   12,   10,    6,    5,   27,   27,   13],
       [   1,    1,   34, 3404,    1,   30,    0,   16,   42,   22],
       [   3,    6,    8,    1, 3259,    2,    8,    7,    5,   34],
       [   5,    0,    6,   26,    6, 3051,   18,    4,   21,    7],
       [   8,    1,    9,    1,   12,   15, 3432,    2,    9,    1],
       [   3,   17,   23,    2,   29,    2,    0, 3591,    4,   47],
       [   5,   11,   17,   20,   10,   24,    9,    7, 3226,   15],
       [   8,    7,   16,   12,   40,    9,    0,   19,   14, 3385]])

In [13]:
cm.shape

(10, 10)

In [14]:
from sklearn.metrics import precision_score, f1_score, recall_score

In [15]:
f1_score(y_test,y_pred,average='weighted')

0.9723258740744225