In [15]:
import cv2
import numpy as np
from tensorflow.keras.models import load_model
from collections import deque

In [16]:
model = load_model('mnist_model.keras')

In [17]:
cap = cv2.VideoCapture(0 + cv2.CAP_DSHOW)

In [None]:



def to_model_shape(x28):
    x28 = x28.astype("float32")/255.0
    in_shape = model.input_shape  # e.g. (None,784) or (None,28,28,1)
    if len(in_shape) == 2 and in_shape[1] == 784:
        return x28.reshape(1, 784)
    return x28.reshape(1, 28, 28, 1)



In [19]:
def preprocess_mnist(roi_bgr):
    g = cv2.cvtColor(roi_bgr, cv2.COLOR_BGR2GRAY)
    g = cv2.GaussianBlur(g, (5,5), 0)
    th = cv2.adaptiveThreshold(g, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                               cv2.THRESH_BINARY_INV, 11, 2)

    cnts, _ = cv2.findContours(th, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    if not cnts:
        return None, {"reason":"no contour"}

    c = max(cnts, key=cv2.contourArea)
    x,y,w,h = cv2.boundingRect(c)
    if w*h < 300:   # reject tiny blobs (tune as needed)
        return None, {"reason":"tiny area"}

    digit = th[y:y+h, x:x+w]
    digit = cv2.dilate(digit, np.ones((3,3), np.uint8), iterations=1)

    # keep aspect: resize longer side to 20
    H, W = digit.shape
    if H > W:
        newH, newW = 20, max(1, int(W * (20.0/H)))
    else:
        newW, newH = 20, max(1, int(H * (20.0/W)))
    digit = cv2.resize(digit, (newW, newH), interpolation=cv2.INTER_AREA)

    canvas = np.zeros((28,28), dtype=np.uint8)
    x0, y0 = (28-newW)//2, (28-newH)//2
    canvas[y0:y0+newH, x0:x0+newW] = digit

    ys, xs = np.nonzero(canvas)
    if len(xs):
        cx, cy = xs.mean(), ys.mean()
        sx, sy = int(round(14 - cx)), int(round(14 - cy))
        M = np.float32([[1,0,sx],[0,1,sy]])
        canvas = cv2.warpAffine(canvas, M, (28,28), flags=cv2.INTER_NEAREST, borderValue=0)

    return canvas, {"area": w*h}



In [20]:
# --- drawing ROI in the center ---
history = deque(maxlen=7)    # smoothing window
CONF = 0.55

while True:
    ok, frame = cap.read()
    if not ok: break
    H, W = frame.shape[:2]
    bw, bh = 160, 160
    x1, y1 = max(0, W//2 - bw//2), max(0, H//2 - bh//2)
    x2, y2 = min(W, W//2 + bw//2), min(H, H//2 + bh//2)
    roi = frame[y1:y2, x1:x2]

    x28, info = preprocess_mnist(roi)
    disp = frame.copy()
    color = (0,255,0) if x28 is not None else (0,0,255)
    cv2.rectangle(disp, (x1,y1), (x2,y2), color, 2)

    text = "No digit"
    if x28 is not None:
        inp = to_model_shape(x28)
        preds = model.predict(inp, verbose=0)[0]
        cls = int(np.argmax(preds))
        prob = float(np.max(preds))
        history.append((cls, prob))

        # smooth: pick the most frequent class among confident frames
        votes = [c for c,p in history if p >= CONF]
        if votes:
            from statistics import mode
            cls_sm = mode(votes)
            prob_sm = max(p for c,p in history if c == cls_sm)
            text = f"Pred: {cls_sm}  Prob: {prob_sm:.2f}"
        else:
            text = f"Pred: ?  Prob: {prob:.2f}"

        # debug view
        view = cv2.resize(x28, (140,140), interpolation=cv2.INTER_NEAREST)
        cv2.imshow("digit28", view)

    cv2.putText(disp, text, (20, 40), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (255,0,255), 2)
    cv2.imshow("input", disp)

    if cv2.waitKey(1) & 0xFF == 27: break

cap.release()
cv2.destroyAllWindows()