<a href="https://colab.research.google.com/github/cherryash100/HAND-WRITTEN-DIGIT-RECOGNITION/blob/main/hand_written_digit_recognition.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [15]:
!pip install -q scikit-learn tensorflow numpy matplotlib opencv-python mediapipe pillow

import numpy as np
import matplotlib.pyplot as plt
import cv2
import mediapipe as mp
from collections import deque
import time
from IPython.display import display, Javascript, HTML, clear_output
from google.colab.output import eval_js
from google.colab.patches import cv2_imshow
from base64 import b64decode, b64encode
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.utils import to_categorical
import warnings
import json # Import the json library

warnings.filterwarnings('ignore')

print("Loading MNIST Dataset...")
mnist = fetch_openml('mnist_784', version=1, parser='auto')
X, y = mnist.data, mnist.target
X = np.array(X)
y = np.array(y).astype(int)
X = X / 255.0
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=10000, random_state=42, stratify=y)
print("✓ Dataset loaded!")

print("\nTraining KNN Model...")
knn_model = KNeighborsClassifier(n_neighbors=3, n_jobs=-1)
knn_model.fit(X_train[:10000], y_train[:10000])
print("✓ KNN trained!")

print("\nTraining SVM Model...")
svm_model = SVC(kernel='rbf', gamma='scale', C=10, random_state=42, probability=True)
svm_model.fit(X_train[:10000], y_train[:10000])
print("✓ SVM trained!")

print("\nTraining ANN Model...")
y_train_ann = to_categorical(y_train, 10)
ann_model = Sequential([
    Dense(128, activation='relu', input_shape=(784,)),
    Dropout(0.2),
    Dense(64, activation='relu'),
    Dropout(0.2),
    Dense(10, activation='softmax')
])
ann_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
ann_model.fit(X_train, y_train_ann, epochs=10, batch_size=128, validation_split=0.1, verbose=0)
print("✓ ANN trained!")

print("\n" + "="*70)
print("ALL MODELS READY!")
print("="*70)

def js_to_image(js_reply):
    image_bytes = b64decode(js_reply.split(',')[1])
    jpg_as_np = np.frombuffer(image_bytes, dtype=np.uint8)
    img = cv2.imdecode(jpg_as_np, flags=1)
    return img

def video_stream():
    js = Javascript('''
        var video;
        var div = null;
        var stream;
        var captureCanvas;
        var imgElement;
        var labelDiv;

        var pendingResolve = null;
        var shutdown = false;

        function removeDom() {
            stream.getVideoTracks()[0].stop();
            video.remove();
            div.remove();
            video = null;
            div = null;
            stream = null;
            imgElement = null;
            captureCanvas = null;
            labelDiv = null;
        }

        function onAnimationFrame() {
            if (!shutdown) {
                window.requestAnimationFrame(onAnimationFrame);
            }
            if (pendingResolve) {
                var result = "";
                if (!shutdown) {
                    captureCanvas.getContext('2d').drawImage(video, 0, 0, 640, 480);
                    result = captureCanvas.toDataURL('image/jpeg', 0.8);
                }
                var lp = pendingResolve;
                pendingResolve = null;
                lp(result);
            }
        }

        async function createDom() {
            if (div !== null) {
                return stream;
            }

            div = document.createElement('div');
            div.style.border = '2px solid black';
            div.style.padding = '10px';
            div.style.width = '660px';
            div.style.margin = '20px auto';
            document.body.appendChild(div);

            const modelDiv = document.createElement('div');
            modelDiv.style.color = 'white';
            modelDiv.style.fontSize = '18px';
            modelDiv.style.padding = '10px';
            modelDiv.style.marginBottom = '10px';
            modelDiv.style.background = 'linear-gradient(90deg, #667eea 0%, #764ba2 100%)';
            modelDiv.style.borderRadius = '5px';
            modelDiv.style.textAlign = 'center';
            modelDiv.style.fontWeight = 'bold';
            modelDiv.innerHTML = 'Real-Time Air Drawing Digit Recognition';
            div.appendChild(modelDiv);

            video = document.createElement('video');
            video.style.display = 'block';
            video.width = 640;
            video.height = 480;
            video.setAttribute('playsinline', '');
            video.onclick = () => { shutdown = true; };
            stream = await navigator.mediaDevices.getUserMedia({video: { facingMode: "user"}});
            div.appendChild(video);

            labelDiv = document.createElement('div');
            labelDiv.style.marginTop = '10px';
            labelDiv.style.fontSize = '24px';
            labelDiv.style.fontWeight = 'bold';
            labelDiv.style.textAlign = 'center';
            labelDiv.style.padding = '20px';
            labelDiv.style.background = '#f0f0f0';
            labelDiv.style.borderRadius = '5px';
            labelDiv.innerHTML = 'Draw a digit in the air with your finger! ✍️';
            div.appendChild(labelDiv);

            imgElement = document.createElement('img');
            imgElement.style.position = 'absolute';
            imgElement.style.zIndex = 1;
            imgElement.style.display = 'none';
            div.appendChild(imgElement);

            captureCanvas = document.createElement('canvas');
            captureCanvas.width = 640;
            captureCanvas.height = 480;

            video.srcObject = stream;
            await video.play();

            window.requestAnimationFrame(onAnimationFrame);

            return stream;
        }
        async function stream_frame(label, imgData) {
            if (shutdown) {
                removeDom();
                shutdown = false;
                return '';
            }

            var preCreate = Date.now();
            stream = await createDom();

            var preShow = Date.now();
            if (label != "") {
                labelDiv.innerHTML = label; // Removed JSON.parse()
            }

            if (imgData != "") {
                var videoRect = video.getClientRects()[0];
                imgElement.style.top = videoRect.top + "px";
                imgElement.style.left = videoRect.left + "px";
                imgElement.style.width = videoRect.width + "px";
                imgElement.style.height = videoRect.height + "px";
                imgElement.src = imgData; // Removed JSON.parse()
            }

            var preCapture = Date.now();
            var result = await new Promise(function(resolve, reject) {
                pendingResolve = resolve;
            });
            shutdown = false;

            return {'create': preShow - preCreate,
                    'show': preCapture - preShow,
                    'capture': Date.now() - preCapture,
                    'img': result};
        }
        ''')

    display(js)

def video_frame(label, bbox):
    # Encode label and bbox as JSON strings
    data = eval_js('stream_frame({}, {})'.format(json.dumps(label), json.dumps(bbox)))
    return data

mp_hands = mp.solutions.hands
hands = mp_hands.Hands(min_detection_confidence=0.7, min_tracking_confidence=0.7)
mp_draw = mp.solutions.drawing_utils

drawing_canvas = np.zeros((480, 640, 3), dtype=np.uint8)
points = deque(maxlen=512)
frame_count = 0
last_prediction_time = time.time()
current_prediction = {"knn": -1, "svm": -1, "ann": -1}
confidence_scores = {"knn": 0, "svm": 0, "ann": 0}

def preprocess_for_prediction(canvas):
    gray = cv2.cvtColor(canvas, cv2.COLOR_BGR2GRAY)
    _, thresh = cv2.threshold(gray, 30, 255, cv2.THRESH_BINARY)

    contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    if len(contours) == 0:
        return None

    x, y, w, h = cv2.boundingRect(max(contours, key=cv2.contourArea))

    if w < 30 or h < 30:
        return None

    digit_roi = thresh[y:y+h, x:x+w]

    size = max(w, h)
    squared = np.zeros((size, size), dtype=np.uint8)
    x_offset = (size - w) // 2
    y_offset = (size - h) // 2
    squared[y_offset:y_offset+h, x_offset:x_offset+w] = digit_roi

    resized = cv2.resize(squared, (28, 28))
    normalized = resized / 255.0
    flattened = normalized.reshape(1, -1)

    return flattened

def predict_digit(processed_input):
    if processed_input is None:
        return None, None, None, None, None, None

    pred_knn = knn_model.predict(processed_input)[0]
    pred_svm = svm_model.predict(processed_input)[0]
    pred_ann = np.argmax(ann_model.predict(processed_input, verbose=0))

    proba_knn = np.max(knn_model.predict_proba(processed_input))
    proba_svm = np.max(svm_model.predict_proba(processed_input))
    proba_ann = np.max(ann_model.predict(processed_input, verbose=0))

    return pred_knn, pred_svm, pred_ann, proba_knn, proba_svm, proba_ann

print("\n" + "="*70)
print("STARTING REAL-TIME RECOGNITION")
print("="*70)
print("\nInstructions:")
print("1. Show your index finger to the camera")
print("2. Draw digits (0-9) in the air")
print("3. Keep your finger visible while drawing")
print("4. Press 'C' to clear the canvas")
print("5. Click on video to stop")
print("\nReady! Starting camera...")
print("="*70 + "\n")

video_stream() # Call video_stream before the loop

try:
    while True:
        js_reply = video_frame('', '')
        if not js_reply:
            break

        frame = js_to_image(js_reply["img"])
        if frame is None:
            break

        frame = cv2.flip(frame, 1)
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        result = hands.process(rgb_frame)

        if result.multi_hand_landmarks:
            for hand_landmarks in result.multi_hand_landmarks:
                index_finger_tip = hand_landmarks.landmark[8]
                h, w, c = frame.shape
                cx, cy = int(index_finger_tip.x * w), int(index_finger_tip.y * h)

                points.appendleft((cx, cy))
                cv2.circle(frame, (cx, cy), 10, (0, 255, 0), -1)

        for i in range(1, len(points)):
            if points[i - 1] is None or points[i] is None:
                continue
            cv2.line(drawing_canvas, points[i - 1], points[i], (255, 255, 255), 8)
            cv2.line(frame, points[i - 1], points[i], (0, 0, 255), 3)

        frame_count += 1
        current_time = time.time()

        if frame_count % 15 == 0 and (current_time - last_prediction_time) > 0.5:
            processed = preprocess_for_prediction(drawing_canvas)
            if processed is not None:
                pred_knn, pred_svm, pred_ann, conf_knn, conf_svm, conf_ann = predict_digit(processed)

                if pred_knn is not None:
                    current_prediction = {"knn": pred_knn, "svm": pred_svm, "ann": pred_ann}
                    confidence_scores = {"knn": conf_knn, "svm": conf_svm, "ann": conf_ann}
                    last_prediction_time = current_time

        cv2.putText(frame, 'Press C to Clear', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)

        label_html = '<div style="display: flex; justify-content: space-around; margin-top: 10px;">'

        if current_prediction["knn"] != -1:
            label_html += f'''
            <div style="background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); padding: 15px; border-radius: 10px; min-width: 150px; text-align: center;">
                <div style="color: white; font-size: 16px; margin-bottom: 5px;">KNN</div>
                <div style="color: white; font-size: 48px; font-weight: bold;">{current_prediction["knn"]}</div>
                <div style="color: white; font-size: 14px;">{confidence_scores["knn"]*100:.1f}%</div>
            </div>
            '''

            label_html += f'''
            <div style="background: linear-gradient(135deg, #f093fb 0%, #f5576c 100%); padding: 15px; border-radius: 10px; min-width: 150px; text-align: center;">
                <div style="color: white; font-size: 16px; margin-bottom: 5px;">SVM</div>
                <div style="color: white; font-size: 48px; font-weight: bold;">{current_prediction["svm"]}</div>
                <div style="color: white; font-size: 14px;">{confidence_scores["svm"]*100:.1f}%</div>
            </div>
            '''

            label_html += f'''
            <div style="background: linear-gradient(135deg, #4facfe 0%, #00f2fe 100%); padding: 15px; border-radius: 10px; min-width: 150px; text-align: center;">
                <div style="color: white; font-size: 16px; margin-bottom: 5px;">ANN</div>
                <div style="color: white; font-size: 48px; font-weight: bold;">{current_prediction["ann"]}</div>
                <div style="color: white; font-size: 14px;">{confidence_scores["ann"]*100:.1f}%</div>
            </div>
            '''
        else:
            label_html += '<div style="color: #666; font-size: 18px;">Draw a digit in the air! ✍️</div>'

        label_html += '</div>'

        _, buffer = cv2.imencode('.jpg', frame)
        img_str = b64encode(buffer).decode('utf-8')
        img_data = f'data:image/jpeg;base64,{img_str}'

        js_reply = video_frame(label_html, img_data)

        key = cv2.waitKey(1) & 0xFF
        if key == ord('c') or key == ord('C'):
            drawing_canvas = np.zeros((480, 640, 3), dtype=np.uint8)
            points.clear()
            current_prediction = {"knn": -1, "svm": -1, "ann": -1}
            confidence_scores = {"knn": 0, "svm": 0, "ann": 0}

except KeyboardInterrupt:
    pass
except Exception as e:
    print(f"Error: {e}")
finally:
    hands.close()
    print("\n" + "="*70)
    print("SESSION ENDED")
    print("="*70)

Loading MNIST Dataset...
✓ Dataset loaded!

Training KNN Model...
✓ KNN trained!

Training SVM Model...
✓ SVM trained!

Training ANN Model...
✓ ANN trained!

ALL MODELS READY!

STARTING REAL-TIME RECOGNITION

Instructions:
1. Show your index finger to the camera
2. Draw digits (0-9) in the air
3. Keep your finger visible while drawing
4. Press 'C' to clear the canvas
5. Click on video to stop

Ready! Starting camera...



<IPython.core.display.Javascript object>




SESSION ENDED
