<a href="https://colab.research.google.com/github/aryaman2306/numberguesser-mnist/blob/main/NumberGuesserModel.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install scikit-learn joblib



In [None]:
from sklearn.datasets import fetch_openml
from sklearn.linear_model import SGDClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report
import joblib


In [None]:
# 1) Fetch MNIST from OpenML (70k samples of 28×28 images flattened to 784 features)
X, y = fetch_openml('mnist_784', version=1, return_X_y=True, as_frame=False)

# 2) Scale pixel values from [0–255] to [0.0–1.0]
X = X.astype('float32') / 255.0

# 3) Split into train and test sets (e.g. 60k train / 10k test)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=10_000, random_state=42, stratify=y
)


In [None]:
scaler = StandardScaler(with_mean=True, with_std=True)
X_train = scaler.fit_transform(X_train)
X_test  = scaler.transform(X_test)

# Save the scaler too, so you can apply the same transform at inference time
joblib.dump(scaler, 'mnist_scaler.joblib')


['mnist_scaler.joblib']

In [None]:
from sklearn.ensemble import RandomForestClassifier
forest_clf =RandomForestClassifier(random_state=42)
forest_clf.fit(X_train, y_train)


In [None]:
y_pred = forest_clf.predict(X_train)
print("Test accuracy:", accuracy_score(y_train, y_pred))
print(classification_report(y_train, y_pred))


Test accuracy: 1.0
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      5917
           1       1.00      1.00      1.00      6752
           2       1.00      1.00      1.00      5991
           3       1.00      1.00      1.00      6121
           4       1.00      1.00      1.00      5849
           5       1.00      1.00      1.00      5411
           6       1.00      1.00      1.00      5894
           7       1.00      1.00      1.00      6251
           8       1.00      1.00      1.00      5850
           9       1.00      1.00      1.00      5964

    accuracy                           1.00     60000
   macro avg       1.00      1.00      1.00     60000
weighted avg       1.00      1.00      1.00     60000



In [None]:
joblib.dump(forest_clf, 'mnist_sgd_clf.joblib')


['mnist_sgd_clf.joblib']

In [None]:
from sklearn.datasets import fetch_openml
import joblib

# reload
scaler = joblib.load('mnist_scaler.joblib')
forest_clf    = joblib.load('mnist_sgd_clf.joblib')

# grab one test sample
X, y = fetch_openml('mnist_784', version=1, return_X_y=True, as_frame=False)
X = X.astype('float32') / 255.0
# split or just pick an example that the model has never seen
x0, y0 = X[0].reshape(1, -1), y[0]

# apply the scaler exactly as you will in Gradio
x0s = scaler.transform(x0)
print("True label:", y0)
print("Model predicts:", clf.predict(x0s))


True label: 5
Model predicts: ['5']


In [None]:
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
import pandas as pd

# 1) Reload or reuse your data & split (must match what you used when training)
X, y = fetch_openml('mnist_784', version=1, return_X_y=True, as_frame=False)
X = X.astype('float32') / 255.0
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=10_000, random_state=42, stratify=y
)

# 2) Scale the test set
X_test_scaled = scaler.transform(X_test)

# 3) Predict on X_test_scaled
y_pred = forest_clf.predict(X_test_scaled)

# 4) Build confusion matrix
labels = [str(i) for i in range(10)]
cm = confusion_matrix(y_test, y_pred, labels=labels)
cm_df = pd.DataFrame(cm, index=[f"true_{i}" for i in labels],
                        columns=[f"pred_{i}" for i in labels])

# 5) Display
print("Confusion Matrix (rows=true, cols=predicted):")
display(cm_df)


Confusion Matrix (rows=true, cols=predicted):


Unnamed: 0,pred_0,pred_1,pred_2,pred_3,pred_4,pred_5,pred_6,pred_7,pred_8,pred_9
true_0,972,1,2,0,0,2,6,0,2,1
true_1,0,1110,4,3,1,2,2,3,0,0
true_2,7,3,959,2,7,5,1,13,2,0
true_3,0,0,8,976,0,13,2,18,3,0
true_4,1,2,1,0,943,0,5,5,0,18
true_5,3,1,1,9,2,872,7,3,1,3
true_6,5,0,0,0,2,8,967,0,0,0
true_7,1,7,9,0,3,0,0,1016,0,6
true_8,4,10,6,26,5,25,4,3,877,15
true_9,4,10,2,15,28,5,0,14,8,908


Deployment of Model


In [None]:
!pip install gradio pillow




In [None]:
import joblib

# adjust paths if needed
scaler = joblib.load('mnist_scaler.joblib')
clf    = joblib.load('mnist_sgd_clf.joblib')


In [None]:
from PIL import Image, ImageOps
import numpy as np

def predict_digit(img: np.ndarray):
    # 1) Gradio gives a H×W×3 uint8 array
    pil = Image.fromarray(img).convert("L")            # → grayscale
    pil = ImageOps.invert(pil).resize((28,28))         # invert colors & resize
    arr = np.array(pil).reshape(1, -1) / 255.0         # flatten to [1×784] and scale
    # 2) apply the same scaling you used in training
    arr = scaler.transform(arr)
    # 3) get probabilities from your SGDClassifier
    probs = clf.predict_proba(arr)[0]                  # shape (10,)
    # 4) return a dict so Gradio can pick & display the top label
    return {str(i): float(probs[i]) for i in range(10)}


In [None]:
import gradio as gr

def predict_from_sketchpad(input_value):
    # Sketchpad returns a dict with key 'composite'
    img_array = input_value["composite"]  # this is a NumPy array of the drawing
    return predict_digit(img_array)

iface = gr.Interface(
    fn=predict_from_sketchpad,
    inputs=gr.Sketchpad(),               # simple black-and-white sketchpad&#8203;:contentReference[oaicite:2]{index=2}
    outputs=gr.Label(num_top_classes=1),
    title="MNIST Digit Recognizer (SGDClassifier)",
    description="Draw a digit (0–9) below and click Submit"
)

iface.launch(share=True)


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://02055f9ad0e3eb2f69.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




NameError: name 'img_array' is not defined