In [1]:
import numpy as np
import pandas as pd
import pickle
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import confusion_matrix

mnist = fetch_openml('mnist_784', version=1, as_frame=False)

In [2]:
X, Y = mnist["data"], mnist["target"].astype(np.uint8)
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

In [3]:
Y_train_0 = (Y_train == 0)
Y_test_0 = (Y_test == 0)

In [4]:
from sklearn.linear_model import SGDClassifier

sgd_clf = SGDClassifier(random_state=42)
sgd_clf.fit(X_train, Y_train_0)

In [5]:
Y_train_predict = sgd_clf.predict(X_train)
Y_test_predict = sgd_clf.predict(X_test)

acc_train = sum(Y_train_predict == Y_train_0) / len(Y_train_0)
acc_test = sum(Y_test_predict == Y_test_0) / len(Y_test_0)

print(acc_train, acc_test)

0.9918571428571429 0.9903571428571428


In [6]:
results = [acc_train, acc_test]

with open('sgd_acc.pkl', 'wb') as file:
    pickle.dump(results, file)

In [7]:
cvscore = cross_val_score(sgd_clf, X_train, Y_train_0, cv=3, scoring="accuracy", n_jobs=-1)

In [8]:
with open('sgd_cva.pkl', 'wb') as file:
    pickle.dump(np.array(cvscore), file)

In [9]:
svm_clf = SVC()
svm_clf.fit(X_train, Y_train)
print(svm_clf.classes_)

[0 1 2 3 4 5 6 7 8 9]


In [10]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)

sgd_m_clf = SGDClassifier(random_state=42, n_jobs=-1, max_iter=2000)
sgd_m_clf.fit(X_train_scaled, Y_train)

print(sgd_m_clf.predict([mnist["data"][0], mnist["data"][1]]))

[3 0]


In [11]:
Y_train_predict = cross_val_predict(sgd_m_clf, X_train_scaled, Y_train, cv=3, n_jobs=-1)

conf_matrix = confusion_matrix(Y_train, Y_train_predict)
print(conf_matrix)

[0.91000161 0.90625167 0.90110361]
[[ 298 5262    0    0    0    0    0    0    0    0]
 [6275    2    0    0    0    0    0    0    0    0]
 [5588   22    0    0    0    0    0    0    0    0]
 [5698   10    0    0    0    0    0    0    0    0]
 [5525    4    0    0    0    0    0    0    0    0]
 [5009   31    0    0    0    0    0    0    0    0]
 [5456   24    0    0    0    0    0    0    0    0]
 [5760   30    0    0    0    0    0    0    0    0]
 [5448   20    0    0    0    0    0    0    0    0]
 [5523   15    0    0    0    0    0    0    0    0]]


In [12]:
with open('sgd_cmx.pkl', 'wb') as file:
    pickle.dump(np.array(conf_matrix), file)