In [2]:
import pygetwindow as gw
import gradio
from PIL import ImageGrab

# Get list of open windows
windows = gw.getAllWindows()

# Create a dropdown to select a window
window_names = [window.title for window in windows]
selected_window = gradio.Dropdown(label="Select Window", choices=window_names)

# Take screenshot of selected window
def take_screenshot(selected_window):
    window = gw.getWindowsWithTitle(selected_window)[0]
    screenshot = ImageGrab.grab(bbox=(window.left, window.top, window.right, window.bottom))
    return screenshot

screenshot = take_screenshot(selected_window)

  from .autonotebook import tqdm as notebook_tqdm


AttributeError: 'Dropdown' object has no attribute 'upper'

In [None]:
import easyocr

# Create an easyOCR reader
reader = easyocr.Reader(['en'])  # Use English language model

# Read text from screenshot and draw bounding boxes
def read_and_draw_bounding_boxes(screenshot):
    result = reader.readtext(screenshot)
    bounding_boxes = []
    for (bbox, text, prob) in result:
        bounding_boxes.append((bbox, text))
    return bounding_boxes

bounding_boxes = read_and_draw_bounding_boxes(screenshot)

In [None]:
import json

# Store coordinates and window information
def store_coordinates(window_name, window_size, bounding_boxes):
    data = {
        "window_name": window_name,
        "window_size": window_size,
        "bounding_boxes": bounding_boxes
    }
    with open("config.json", "w") as f:
        json.dump(data, f)

store_coordinates(selected_window, window.size, bounding_boxes)

In [None]:
# Open window and resize
def open_and_resize(window_name, window_size):
    window = gw.getWindowsWithTitle(window_name)[0]
    window.resizeTo(window_size[0], window_size[1])

open_and_resize(selected_window, window_size)

In [None]:
import pyautogui

# Execute keystrokes for each bounding box
def execute_keystrokes(bounding_boxes, keystrokes):
    for (bbox, text) in bounding_boxes:
        pyautogui.typewrite(keystrokes)

execute_keystrokes(bounding_boxes, ["keystroke1", "keystroke2"])

In [None]:
import gradio as gr

def config_app():
    # Configuration
    window_names = [window.title for window in gw.getAllWindows()]
    selected_window = gradio.Dropdown(label="Select Window", choices=window_names)
    screenshot = take_screenshot(selected_window)
    bounding_boxes = read_and_draw_bounding_boxes(screenshot)
    store_coordinates(selected_window, window.size, bounding_boxes)

    # Usage
    file_name = gradio.Textbox(label="Enter file name")
    keystrokes = gradio.Textbox(label="Enter keystrokes for each bounding box")
    open_and_resize(selected_window, window_size)
    execute_keystrokes(bounding_boxes, keystrokes)

demo = gradio.Demo(config_app, title="Window Automation Demo")
demo.launch()

In [6]:
import gradio as gr
import pygetwindow as gw
import easyocr
import pyautogui
import json
import time
from PIL import Image, ImageDraw
import numpy as np

# Global OCR Reader
reader = easyocr.Reader(['en'])

def list_open_windows():
    # Return only the titles of currently open windows as strings
    windows = [win.title for win in gw.getAllWindows() if win.title]
    return windows

def capture_screenshot(window_title):
    # Find the window by title
    windows = gw.getWindowsWithTitle(window_title)
    if not windows:
        raise ValueError(f"Window with title '{window_title}' not found.")
    
    window = windows[0]
    
    # Check if the window is minimized
    if window.isMinimized:
        window.restore()  # Restore if minimized
        time.sleep(0.5)  # Wait briefly for the window to be restored

    try:
        # Attempt to activate the window
        window.activate()
        time.sleep(0.5)  # Wait briefly to ensure activation

        # Get window position and size
        x, y, width, height = window.left, window.top, window.width, window.height
        
        # Capture the screenshot of the window's region
        screenshot = pyautogui.screenshot(region=(x, y, width, height))
        return screenshot, (x, y, width, height)
    
    except gw.PyGetWindowException as e:
        raise RuntimeError(f"Failed to activate window '{window_title}': {e}")

def apply_ocr(screenshot, use_ocr=True):
    if use_ocr:
        # Convert the screenshot (PIL.Image) to a numpy array for easyOCR
        img_np = np.array(screenshot)
        bounds = reader.readtext(img_np)
        
        # Draw bounding boxes on the original PIL image
        draw = ImageDraw.Draw(screenshot)
        for bound in bounds:
            top_left, bottom_right = tuple(bound[0][0]), tuple(bound[0][2])
            draw.rectangle([top_left, bottom_right], outline="red", width=2)
        return screenshot, bounds
    else:
        return screenshot, []

def save_configuration(window_title, bounds, window_size, file_name):
    # Save configuration
    config_data = {
        "window_title": window_title,
        "window_size": window_size,
        "bounds": [{"name": f"Box {i+1}", "coordinates": bound[0]} for i, bound in enumerate(bounds)]
    }
    with open(file_name, "w") as f:
        json.dump(config_data, f)

def load_configuration(file_name):
    # Load configuration from file
    with open(file_name, "r") as f:
        return json.load(f)

def execute_keystrokes(config_file, keystrokes):
    # Load configuration
    config = load_configuration(config_file)
    window_title = config["window_title"]
    window_size = config["window_size"]
    bounds = config["bounds"]
    
    # Open and resize the window
    window = gw.getWindowsWithTitle(window_title)[0]
    window.resizeTo(window_size[2], window_size[3])
    window.moveTo(window_size[0], window_size[1])

    # Execute keystrokes at each bounding box
    for i, bound in enumerate(bounds):
        # Move to the bounding box's coordinates
        coordinates = bound["coordinates"]
        x, y = (coordinates[0][0] + coordinates[2][0]) // 2, (coordinates[0][1] + coordinates[2][1]) // 2
        pyautogui.click(x, y)
        for keystroke in keystrokes[i]:
            pyautogui.write(keystroke)
            pyautogui.press('enter')

# Gradio Interface
def configure_window(window_title, use_ocr, file_name):
    screenshot, window_size = capture_screenshot(window_title)
    processed_img, bounds = apply_ocr(screenshot, use_ocr)
    save_configuration(window_title, bounds, window_size, file_name)
    return processed_img

def usage_window(config_file, keystrokes):
    execute_keystrokes(config_file, keystrokes)
    return "Execution Complete"

with gr.Blocks() as demo:
    with gr.Tab("Configuration"):
        windows = gr.Dropdown(choices=list_open_windows(), label="Select an open window")
        use_ocr = gr.Checkbox(label="Use OCR for bounding boxes", value=True)
        file_name = gr.Textbox(label="Configuration file name", value="config.json")
        config_btn = gr.Button("Configure")
        config_img = gr.Image()
        
        config_btn.click(configure_window, inputs=[windows, use_ocr, file_name], outputs=config_img)
        
    with gr.Tab("Usage"):
        config_file = gr.Textbox(label="Configuration file name", value="config.json")
        keystrokes = gr.JSON(label="Keystrokes per bounding box", value={})
        exec_btn = gr.Button("Execute")
        output_text = gr.Textbox(label="Output")
        
        exec_btn.click(usage_window, inputs=[config_file, keystrokes], outputs=output_text)

demo.launch()


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


* Running on local URL:  http://127.0.0.1:7873

To create a public link, set `share=True` in `launch()`.




Traceback (most recent call last):
  File "C:\Users\Admin\AppData\Local\Temp\ipykernel_17492\697224807.py", line 33, in capture_screenshot
    window.activate()
  File "c:\Users\Admin\AppData\Local\Programs\Python\Python312\Lib\site-packages\pygetwindow\_pygetwindow_win.py", line 246, in activate
    _raiseWithLastError()
  File "c:\Users\Admin\AppData\Local\Programs\Python\Python312\Lib\site-packages\pygetwindow\_pygetwindow_win.py", line 99, in _raiseWithLastError
    raise PyGetWindowException('Error code from Windows: %s - %s' % (errorCode, _formatMessage(errorCode)))
pygetwindow.PyGetWindowException: Error code from Windows: 0 - The operation completed successfully.

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "c:\Users\Admin\AppData\Local\Programs\Python\Python312\Lib\site-packages\gradio\queueing.py", line 624, in process_events
    response = await route_utils.call_process_api(
               ^^^^^^^^^^^^^^^^^^^