In [None]:
import os, sys, io, base64, logging, time
import pyperclip
from openai import OpenAI, APIError,APIError
from PIL import Image
from tkinter import Tk, Canvas
import pyautogui, keyboard
import pygame
import tempfile
import uuid
from pathlib import Path
from playsound import playsound
from dotenv import load_dotenv

pygame 2.6.1 (SDL 2.28.4, Python 3.11.9)
Hello from the pygame community. https://www.pygame.org/contribute.html


In [2]:
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
load_dotenv()

API_KEY = os.getenv("OPENAI_API_KEY")

print("Loaded API key:", bool(API_KEY))
print("Loaded API key length:", len(API_KEY))

if not API_KEY.startswith("sk-"):
    logger.error("Invalid OPENAI_API_KEY"); sys.exit(1)
client = OpenAI(api_key=API_KEY)


Loaded API key: True
Loaded API key length: 164


In [3]:
def extract_text_from_image(img: Image.Image) -> str:
    buf = io.BytesIO()
    img.save(buf, format="PNG")
    b64 = base64.b64encode(buf.getvalue()).decode()
    messages = [
        {
            "role": "user",
            "content": [
                {"type": "text",      "text": "Extract all text from the image."},
                {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{b64}"}}
            ]
        }
    ]
    try:
        resp = client.chat.completions.create(
            model="gpt-4o-mini",
            messages=messages
        )
    except APIError as e:
        logger.error(f"OpenAI API error {e.status_code}: {e}")
        raise
    return resp.choices[0].message.content

In [4]:
def speak_text(txt: str) -> None:
    """
    ממיר טקסט לדיבור עם OpenAI TTS ומשמיע דרך pygame.
    מניח שקיים משתנה גלובלי `client` מאתחל ומוגדר מראש.
    בכל קריאה נוצר קובץ ייחודי כדי למנוע נעילות.
    """
    # 1. יצירת דיבור
    resp = client.audio.speech.create(
        model="tts-1",
        voice="alloy",
        input=txt
    )
    audio_bytes = resp.read()

    # 2. שמירה זמנית עם שם ייחודי
    tmp = Path(tempfile.gettempdir()) / f"speech_{uuid.uuid4().hex}.mp3"
    tmp.write_bytes(audio_bytes)

    # 3. איתחול/עצירת השמעה קודמת
    if not pygame.mixer.get_init():
        pygame.mixer.init()
    else:
        pygame.mixer.music.stop()

    # 4. טעינה והפעלה
    pygame.mixer.music.load(str(tmp))
    pygame.mixer.music.play()
    while pygame.mixer.music.get_busy():
        pygame.time.delay(100)

    # 5. (אופציונלי) מחיקת הקובץ אחרי ההשמעה
    try:
        tmp.unlink()
    except:
        pass

In [5]:
def read_selection():
    # מעתיק את הטקסט המודגש ללוח
    keyboard.press_and_release("ctrl+c")
    time.sleep(0.1)  # מעט המתנה להעתקה
    text = pyperclip.paste()
    if text.strip():
        print("Reading selection:\n", text)
        speak_text(text)
    else:
        print("No text found in clipboard.")

In [6]:
def select_region():
    coords = {"start": None, "end": None}
    root = Tk(); root.attributes("-fullscreen", True, "-alpha", 0.3, "-topmost", True)
    canvas = Canvas(root, cursor="cross"); canvas.pack(fill="both", expand=True)

    def on_press(e): coords["start"] = (e.x, e.y); canvas.delete("rect")
    def on_drag(e):
        coords["end"] = (e.x, e.y)
        canvas.delete("rect")
        x1, y1 = coords["start"]
        canvas.create_rectangle(x1, y1, e.x, e.y, outline="red", width=2, tag="rect")
    def on_release(e): root.quit()

    canvas.bind("<ButtonPress-1>", on_press)
    canvas.bind("<B1-Motion>",   on_drag)
    canvas.bind("<ButtonRelease-1>", on_release)

    root.mainloop(); root.destroy()

    if not (coords["start"] and coords["end"]):
        logger.info("No region selected."); return

    x1, y1 = map(min, zip(coords["start"], coords["end"]))
    x2, y2 = map(max, zip(coords["start"], coords["end"]))
    img = pyautogui.screenshot(region=(x1, y1, x2-x1, y2-y1))
    img.save("capture.png")
    logger.info(f"Saved capture.png at {(x1,y1,x2,y2)}")

    text = extract_text_from_image(img)
    print("Extracted text:\n", text)
    speak_text(text)


In [7]:
running = True
def exit_app():
    global running
    print("Exiting…")
    running = False

In [9]:
running = True
keyboard.add_hotkey("ctrl+alt+shift+s", select_region)
keyboard.add_hotkey("ctrl+alt+shift+r", read_selection)
keyboard.add_hotkey("ctrl+alt+shift+q", exit_app)

print("Ready:\n"
    "  Ctrl+Alt+Shift+S = select & extract from screen\n"
    "  Ctrl+Alt+Shift+R = read highlighted text\n"
    "  Ctrl+Alt+Shift+Q = quit")

while running:
    time.sleep(0.1)

print("App stopped.")

Ready:
  Ctrl+Alt+Shift+S = select & extract from screen
  Ctrl+Alt+Shift+R = read highlighted text
  Ctrl+Alt+Shift+Q = quit
App stopped.
