#Hand Recognition Calculator

In [None]:


!pip install opencv-python gTTS pydub SpeechRecognition cvzone mediapipe --quiet

import cv2, time, os
import numpy as np
import mediapipe as mp
import speech_recognition as sr
from gtts import gTTS
from io import BytesIO
from pydub import AudioSegment
import cvzone # Import cvzone

# Try importing Colab-specific modules
IN_COLAB = False
try:
    from google.colab import output
    from IPython.display import Javascript, display, clear_output, Audio
    from base64 import b64decode, b64encode
    IN_COLAB = True
except ImportError:
    pass


#  AUDIO HELPERS
def record_voice_colab(duration=2):
    """Record voice using browser microphone (Colab only)."""
    from IPython.display import Javascript
    RECORD = Javascript(f"""
    async function recordAudio() {{
        const stream = await navigator.mediaDevices.getUserMedia({{ audio: true }});
        const recorder = new MediaRecorder(stream);
        let chunks = [];
        recorder.ondataavailable = e => chunks.push(e.data);
        recorder.start();

        await new Promise(resolve => setTimeout(resolve, {duration*1000}));
        recorder.stop();

        let completeBlob = await new Promise(resolve => recorder.onstop = () => resolve(new Blob(chunks)));
        let reader = new FileReader();
        let base64String = await new Promise(resolve => {{
            reader.onloadend = () => resolve(reader.result);
            reader.readAsDataURL(completeBlob);
        }});
        return base64String;
    }}
    recordAudio();
    """)
    display(RECORD)
    audio_b64 = output.eval_js("recordAudio()")
    audio_bytes = b64decode(audio_b64.split(',')[1])
    return AudioSegment.from_file(BytesIO(audio_bytes), format="webm")


def recognize_speech(duration=2):
    """Recognize speech either in Colab (browser mic) or locally (PC mic)."""
    r = sr.Recognizer()

    if IN_COLAB:
        audio = record_voice_colab(duration)
        audio.export("temp.wav", format="wav")
        with sr.AudioFile("temp.wav") as source:
            audio_data = r.record(source)
    else:
        with sr.Microphone() as source:
            print("üéôÔ∏è Speak now...")
            r.adjust_for_ambient_noise(source, duration=0.5)
            audio_data = r.listen(source, phrase_time_limit=duration)

    try:
        text = r.recognize_google(audio_data)
        return text.lower()
    except:
        return ""


def speak_sentence(text):
    """Convert text to speech (works in both Colab and local)."""
    try:
        tts = gTTS(text=text, lang='en')
        buf = BytesIO()
        tts.write_to_fp(buf)
        buf.seek(0)

        if IN_COLAB:
            from IPython.display import Audio, display
            display(Audio(buf.read(), autoplay=True))
        else:
            # Save & play locally
            with open("tts_output.mp3", "wb") as f:
                f.write(buf.read())
            os.system("start tts_output.mp3" if os.name == "nt" else "mpg123 tts_output.mp3")
    except:
        print("TTS failed:", text)


#  CAMERA HELPERS
def js_to_image(js_reply):
    from base64 import b64decode
    image_bytes = b64decode(js_reply.split(',')[1])
    jpg_array = np.frombuffer(image_bytes, dtype=np.uint8)
    return cv2.imdecode(jpg_array, cv2.IMREAD_COLOR)


def start_stream():
    js = Javascript('''
    async function stream_frame() {
        if (!window.video) {
            const video = document.createElement('video');
            video.width = 640; video.height = 480;
            document.body.appendChild(video);
            const stream = await navigator.mediaDevices.getUserMedia({video: true});
            video.srcObject = stream;
            await video.play();
            window.video = video;
        }
        const canvas = document.createElement('canvas');
        canvas.width = 640; canvas.height = 480;
        const ctx = canvas.getContext('2d');
        ctx.drawImage(window.video, 0, 0, 640, 480);
        return canvas.toDataURL('image/jpeg', 0.8);
    }
    ''')
    display(js)


# HAND LOGIC
# Initialize cvzone hand detector
from cvzone.HandTrackingModule import HandDetector
detector = HandDetector(detectionCon=0.8, maxHands=2)


#  MAIN LOOP
def run_camera_with_dynamic_voice(frames=400):
    if IN_COLAB:
        start_stream()

    last_result = None
    global OPERATION
    OPERATION = "addition"  # default

    for i in range(frames):
        if IN_COLAB:
            js_reply = output.eval_js('stream_frame()')
            img = js_to_image(js_reply)
        else:
            cap = cv2.VideoCapture(0)
            ret, img = cap.read()
            cap.release()
            if not ret:
                print("Camera not available")
                break

        # Find hands using cvzone
        hands, img = detector.findHands(img.copy())

        left_cnt, right_cnt = None, None

        if hands:
            for hand in hands:
                lmList = hand["lmList"]
                handType = hand["type"]
                fingers = detector.fingersUp(hand)
                cnt = fingers.count(1)

                if handType == "Left":
                    left_cnt = cnt
                elif handType == "Right":
                    right_cnt = cnt




        # Display hand tracking status if no hands are detected
        if not hands:
            cv2.putText(img, "No hands detected", (30, 80),
                        cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 3)


        if left_cnt is not None and right_cnt is not None:
            if OPERATION == "addition":
                result_val = left_cnt + right_cnt
                sentence = f"{left_cnt} plus {right_cnt} equals {result_val}"
            elif OPERATION == "subtraction":
                result_val = left_cnt - right_cnt
                sentence = f"{left_cnt} minus {right_cnt} equals {result_val}"
            elif OPERATION == "multiplication":
                result_val = left_cnt * right_cnt
                sentence = f"{left_cnt} times {right_cnt} equals {result_val}"
            else:
                sentence, result_val = "Invalid operation", None

            cv2.putText(img, sentence, (30, 80),
                        cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 3)

            if result_val != last_result:
                print("üñêÔ∏è", sentence)
                speak_sentence(sentence)
                last_result = result_val

        if not IN_COLAB:
            cv2.imshow("Finger Math", img)
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
        else:
            _, im_arr = cv2.imencode('.jpg', img)
            display(Javascript(f"""
                (function() {{
                    const base64 = "{b64encode(im_arr).decode()}";
                    let img = document.getElementById('output-img-op');
                    if (!img) {{
                        img = document.createElement('img');
                        img.id = 'output-img-op';
                        img.style.maxWidth = '640px';
                        document.body.appendChild(img);
                    }}
                    img.src = 'data:image/jpeg;base664,' + base64;
                }})();
            """))
#
        # Continuous voice check
        try:
            spoken = recognize_speech(1)  # 1-sec recognition
            if "addition" in spoken:
                OPERATION = "addition"
                speak_sentence("Switched to addition")
            elif "subtraction" in spoken:
                OPERATION = "subtraction"
                speak_sentence("Switched to subtraction")
            elif "multiplication" in spoken:
                OPERATION = "multiplication"
                speak_sentence("Switched to multiplication")
            elif "stop" in spoken:
                speak_sentence("Session stopped")
                if IN_COLAB:
                    clear_output(wait=True)
                break
        except:
            pass

        time.sleep(0.05)

    if not IN_COLAB:
        cv2.destroyAllWindows()



print("üéôÔ∏è Say 'addition', 'subtraction', or 'multiplication' anytime to switch operation.")
print("üéôÔ∏è Say 'stop' to finish the session.")
run_camera_with_dynamic_voice(frames=400)

üéôÔ∏è Say 'addition', 'subtraction', or 'multiplication' anytime to switch operation.
üéôÔ∏è Say 'stop' to finish the session.


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>



<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>



<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>



<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

üñêÔ∏è 5 plus 4 equals 9




<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

üñêÔ∏è 5 plus 2 equals 7




<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

KeyboardInterrupt: 