In [16]:
import cv2
import mediapipe as mp
import numpy as np
import pyautogui
import time
import os
import math
import subprocess

In [17]:
# Mediapipe setup
mp_hands = mp.solutions.hands
mp_draw = mp.solutions.drawing_utils
hands = mp_hands.Hands(False, max_num_hands=1, min_detection_confidence=0.7)

In [18]:
# Keyboard layout
keys = [
    ['+', '-', '*', '/', '=', '(', ')', '%'],
    ['1', '2', '3', '4', '5', '6', '7', '8', '9', '0'],
    ['CAPS', 'Q', 'W', 'E', 'R', 'T', 'Y', 'U', 'I', 'O', 'P'],
    ['A', 'S', 'D', 'F', 'G', 'H', 'J', 'K', 'L', 'ENTER'],
    ['Z', 'X', 'C', 'V', 'B', 'N', 'M', ',', '.', 'SPACE', 'DEL']
]
# Helper to get width of a key
def get_key_width(key):
    if key == 'SPACE':
        return 180
    elif key in ['CAPS', 'DEL', 'ENTER']:
        return 100
    else:
        return 60

In [19]:
# Global state
caps_lock_on = False
last_click_time = 0
click_cooldown = 1.0  # seconds
notepad_count = 1     # counter for unique file names

# Draw the keyboard centered on screen
def draw_keyboard(img, key_list, caps_on, hover_key=None):
    screen_w = img.shape[1]
    top_y = 100
    row_spacing = 10
    key_h = 70

    for row_index, row in enumerate(key_list):
        total_row_width = sum(get_key_width(k) + 10 for k in row) - 10
        cur_x = (screen_w - total_row_width) // 2
        y = top_y + row_index * (key_h + row_spacing)

        for key in row:
            w = get_key_width(key)
            h = key_h

            # Visuals
            is_hover = key == hover_key
            bg_color = (0, 0, 0)
            border_color = (0, 255, 0) if is_hover else ((0, 255, 255) if (key == 'CAPS' and caps_on) else (255, 0, 0))
            text_color = (255, 255, 255)

            # Draw key
            cv2.rectangle(img, (cur_x, y), (cur_x + w, y + h), bg_color, -1)
            cv2.rectangle(img, (cur_x, y), (cur_x + w, y + h), border_color, 2)

            # Display letter
            label = ' ' if key == 'SPACE' else key
            display_key = label.upper() if key not in ['SPACE', 'DEL', 'CAPS', 'ENTER'] and caps_on else label
            font_scale = 1.3 if len(display_key) == 1 else 1
            text_size = cv2.getTextSize(display_key, cv2.FONT_HERSHEY_SIMPLEX, font_scale, 2)[0]
            text_x = cur_x + (w - text_size[0]) // 2
            text_y = y + (h + text_size[1]) // 2
            cv2.putText(img, display_key, (text_x, text_y), cv2.FONT_HERSHEY_SIMPLEX, font_scale, text_color, 2)

            cur_x += w + 10

In [20]:

def get_hovered_key(xf, yf, key_list, screen_w):
    top_y = 100
    key_h = 70
    row_spacing = 10

    for row_index, row in enumerate(key_list):
        total_row_width = sum(get_key_width(k) + 10 for k in row) - 10
        cur_x = (screen_w - total_row_width) // 2
        y = top_y + row_index * (key_h + row_spacing)

        for key in row:
            w = get_key_width(key)
            h = key_h
            if cur_x < xf < cur_x + w and y < yf < y + h:
                return key
            cur_x += w + 10
    return None

In [21]:

def open_notepad():
    global notepad_count, filename
    filename = f"typed_{notepad_count}.txt"
    with open(filename, "w") as f:
        f.write("")  # create empty file
    subprocess.Popen(["notepad.exe", filename])
    notepad_count += 1
    time.sleep(1)


def type_to_notepad(key, caps_on):
    if key == 'SPACE':
        pyautogui.write(' ')
    elif key == 'DEL':
        pyautogui.press('backspace')
    elif key == 'ENTER':
        pyautogui.press('enter')
    elif key != 'CAPS':
        if key.isalpha():
            pyautogui.write(key.upper() if caps_on else key.lower())
        else:
            pyautogui.write(key)


open_notepad()


In [22]:
# Start Webcam
cap = cv2.VideoCapture(0)
cap.set(3, 1280)
cap.set(4, 720)

while True:
    success, frame = cap.read()
    if not success:
        break
    frame = cv2.flip(frame, 1)
    img_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = hands.process(img_rgb)

    hover_key = None
    screen_w = frame.shape[1]

    if results.multi_hand_landmarks:
        for hand in results.multi_hand_landmarks:
            mp_draw.draw_landmarks(frame, hand, mp_hands.HAND_CONNECTIONS)

            h, w, _ = frame.shape
            index_tip = hand.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP]
            thumb_tip = hand.landmark[mp_hands.HandLandmark.THUMB_TIP]

            cx, cy = int(index_tip.x * w), int(index_tip.y * h)
            tx, ty = int(thumb_tip.x * w), int(thumb_tip.y * h)

            cv2.circle(frame, (cx, cy), 12, (0, 255, 0), cv2.FILLED)

            hover_key = get_hovered_key(cx, cy, keys, screen_w)

            # Detect pinch click
            dist = math.hypot(tx - cx, ty - cy)
            current_time = time.time()
            if dist < 40 and current_time - last_click_time > click_cooldown:
                if hover_key:
                    if hover_key == 'CAPS':
                        caps_lock_on = not caps_lock_on
                    else:
                        type_to_notepad(hover_key, caps_lock_on)
                    last_click_time = current_time

    draw_keyboard(frame, keys, caps_lock_on, hover_key)
    cv2.imshow("Virtual Keyboard - New Notepad & Aligned", frame)

    if cv2.waitKey(1) & 0xFF == 27:
        break

cap.release()
cv2.destroyAllWindows()
