# Demo: Modeling MNIST dataset by using Classification

## Imports

In [2]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

## Load MNIST dataset

In [3]:
mnist = fetch_openml('mnist_784', version=1, parser='auto')
X, y = mnist.data, mnist.target
y = y.astype(np.uint8)  # Convert labels to integers

## Date preprocessing

In [None]:
# Split into training and test sets (60k/10k)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=10000, random_state=42
)

# Normalize pixel values to [0, 1]
X_train = X_train / 255.0
X_test = X_test / 255.0


## Model training using LogisticRegression-Model

In [None]:
# Initialize and train logistic regression model
model = LogisticRegression(
    solver='saga',        # Optimized for large datasets
    multi_class='multinomial',  # For multi-class classification
    max_iter=100,         # Increase if convergence warning appears
    n_jobs=-1,            # Use all available cores
    verbose=1
)
model.fit(X_train, y_train)



## Model evaluation

In [None]:
# Make predictions
y_pred = model.predict(X_test)

# Evaluate performance
print(f"Accuracy: {accuracy_score(y_test, y_pred) * 100:.2f}%")
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

# Plot sample predictions
plt.figure(figsize=(10, 8))
for i in range(12):
    plt.subplot(3, 4, i+1)
    plt.imshow(X_test.iloc[i].values.reshape(28, 28), cmap='gray')
    plt.title(f"Predicted: {y_pred[i]}")
    plt.axis('off')
plt.tight_layout()
plt.show()