In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler

In [6]:
with np.load('mnist.npz') as data:
    X_train, y_train = data['x_train'], data['y_train']
    X_test, y_test = data['x_test'], data['y_test']

In [8]:

X_train_flat = X_train.reshape(X_train.shape[0], -1)
X_test_flat = X_test.reshape(X_test.shape[0], -1)


In [9]:

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_flat)
X_test_scaled = scaler.transform(X_test_flat)

In [10]:

X_train_split, X_val_split, y_train_split, y_val_split = train_test_split(X_train_scaled, y_train, test_size=0.2, random_state=42)

In [19]:

knn_model = KNeighborsClassifier()
svm_model = SVC()
logistic_model = LogisticRegression(max_iter=100000)  
decision_tree_model = DecisionTreeClassifier()
random_forest_model = RandomForestClassifier()

In [20]:

knn_model.fit(X_train_split, y_train_split)
svm_model.fit(X_train_split, y_train_split)
logistic_model.fit(X_train_split, y_train_split)
decision_tree_model.fit(X_train_split, y_train_split)
random_forest_model.fit(X_train_split, y_train_split)

In [None]:

knn_predictions = knn_model.predict(X_val_split)
svm_predictions = svm_model.predict(X_val_split)
logistic_predictions = logistic_model.predict(X_val_split)
decision_tree_predictions = decision_tree_model.predict(X_val_split)
random_forest_predictions = random_forest_model.predict(X_val_split)

In [None]:

knn_accuracy = accuracy_score(y_val_split, knn_predictions)
svm_accuracy = accuracy_score(y_val_split, svm_predictions)
logistic_accuracy = accuracy_score(y_val_split, logistic_predictions)
decision_tree_accuracy = accuracy_score(y_val_split, decision_tree_predictions)
random_forest_accuracy = accuracy_score(y_val_split, random_forest_predictions)

In [None]:

print(f'KNN Accuracy on Validation Data: {knn_accuracy}')
print(f'SVM Accuracy on Validation Data: {svm_accuracy}')
print(f'Logistic Regression Accuracy on Validation Data: {logistic_accuracy}')
print(f'Decision Tree Accuracy on Validation Data: {decision_tree_accuracy}')
print(f'Random Forest Accuracy on Validation Data: {random_forest_accuracy}')

KNN Accuracy on Validation Data: 0.94575
SVM Accuracy on Validation Data: 0.9636666666666667
Logistic Regression Accuracy on Validation Data: 0.9145833333333333
Decision Tree Accuracy on Validation Data: 0.8661666666666666
Random Forest Accuracy on Validation Data: 0.9679166666666666


In [None]:

best_model = max([(knn_model, knn_accuracy), (svm_model, svm_accuracy), (logistic_model, logistic_accuracy),
                  (decision_tree_model, decision_tree_accuracy), (random_forest_model, random_forest_accuracy)],
                 key=lambda x: x[1])[0]

In [None]:

X_test_flat = X_test.reshape(X_test.shape[0], -1) 
X_test_scaled = scaler.transform(X_test_flat)

final_predictions = best_model.predict(X_test_scaled)
final_accuracy = accuracy_score(y_test, final_predictions)

print(f'Final Model Accuracy on Test Data: {final_accuracy}')

Final Model Accuracy on Test Data: 0.9689


In [18]:

print("Classification Report for the Final Model:")
print(classification_report(y_test, final_predictions))

print("Confusion Matrix for the Final Model:")
print(confusion_matrix(y_test, final_predictions))

Classification Report for the Final Model:
              precision    recall  f1-score   support

           0       0.97      0.99      0.98       980
           1       0.99      0.99      0.99      1135
           2       0.96      0.97      0.96      1032
           3       0.97      0.96      0.96      1010
           4       0.98      0.97      0.97       982
           5       0.96      0.97      0.96       892
           6       0.98      0.98      0.98       958
           7       0.97      0.96      0.97      1028
           8       0.96      0.95      0.96       974
           9       0.96      0.95      0.95      1009

    accuracy                           0.97     10000
   macro avg       0.97      0.97      0.97     10000
weighted avg       0.97      0.97      0.97     10000

Confusion Matrix for the Final Model:
[[ 971    0    0    0    0    2    1    1    4    1]
 [   0 1124    2    3    0    2    2    0    1    1]
 [   6    0  998    7    2    0    3   10    6    0]
 

In [None]:
#Agarche Dige dar daneshgah tofigh hozur dar kelasetun nakhahim dasht , vali khaterat khubi az shoma be yad Miseparim :)