diff --git a/Dockerfile b/Dockerfile index 5310652..1343aae 100644 --- a/Dockerfile +++ b/Dockerfile @@ -41,6 +41,16 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ libgbm1 \ libxshmfence1 \ libasound2 \ + unzip \ + p7zip-full \ + bc \ + ripgrep \ + fd-find \ + sqlite3 \ + libsqlite3-dev \ + wkhtmltopdf \ + poppler-utils \ + default-jre \ && apt-get clean && rm -rf /var/lib/apt/lists/* @@ -64,6 +74,12 @@ COPY ./server.py /app/server.py # Create application/jupyter directories RUN mkdir -p /app/uploads /app/jupyter_runtime +# Copy skills directory structure into the container +# Public skills are baked into the image +# User skills directory is created as mount point for user-added skills +COPY ./skills/public /app/uploads/skills/public +RUN mkdir -p /app/uploads/skills/user + # # Generate SSH host keys # RUN ssh-keygen -A @@ -81,6 +97,16 @@ EXPOSE 8222 # Start the FastAPI application # CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8002", "--workers", "1", "--no-access-log"] +RUN apt-get --fix-broken install +# Ensure Node.js, npm (and npx) are set up +RUN curl -fsSL https://deb.nodesource.com/setup_22.x | bash - +RUN apt-get install -y nodejs + + + +ENV PATH="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin" +RUN npm install playwright@1.53.0 -g +RUN npx playwright@1.53.0 install # Copy the entrypoint script into the image COPY entrypoint.sh /entrypoint.sh @@ -88,13 +114,9 @@ COPY entrypoint.sh /entrypoint.sh # Make the entrypoint script executable RUN chmod +x /entrypoint.sh -# Ensure Node.js, npm (and npx) are set up -RUN curl -fsSL https://deb.nodesource.com/setup_22.x | bash - -RUN apt-get install -y nodejs -ENV PATH="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin" -RUN npm install playwright@1.53.0 -g -RUN npx playwright@1.53.0 install + + # Use the entrypoint script diff --git a/README.md b/README.md index e9c574f..4dabf1a 100644 --- a/README.md +++ b/README.md @@ -178,11 +178,77 @@ Code runs in an isolated container with VM-level isolation. Your host system and From [@apple/container](https://github.com/apple/container/blob/main/docs/technical-overview.md): >Each container has the isolation properties of a full VM, using a minimal set of core utilities and dynamic libraries to reduce resource utilization and attack surface. +## Skills System + +CodeRunner includes a built-in skills system that provides pre-packaged tools for common tasks. Skills are organized into two categories: + +### Built-in Public Skills + +The following skills are included in every CodeRunner installation: + +- **pdf-text-replace** - Replace text in fillable PDF forms +- **image-crop-rotate** - Crop and rotate images + +### Using Skills + +Skills are accessed through MCP tools: + +```python +# List all available skills +result = await list_skills() + +# Get documentation for a specific skill +info = await get_skill_info("pdf-text-replace") + +# Execute a skill's script +code = """ +import subprocess +subprocess.run([ + 'python', + '/app/uploads/skills/public/pdf-text-replace/scripts/replace_text_in_pdf.py', + '/app/uploads/input.pdf', + 'OLD TEXT', + 'NEW TEXT', + '/app/uploads/output.pdf' +]) +""" +result = await execute_python_code(code) +``` + +### Adding Custom Skills + +Users can add their own skills to the `~/.coderunner/assets/skills/user/` directory: + +1. Create a directory for your skill (e.g., `my-custom-skill/`) +2. Add a `SKILL.md` file with documentation +3. Add your scripts in a `scripts/` subdirectory +4. Skills will be automatically discovered by the `list_skills()` tool + +**Skill Structure:** +``` +~/.coderunner/assets/skills/user/my-custom-skill/ +├── SKILL.md # Documentation with usage examples +└── scripts/ # Your Python/bash scripts + └── process.py +``` + +### Example: Using the PDF Text Replace Skill + +```bash +# Inside the container, execute: +python /app/uploads/skills/public/pdf-text-replace/scripts/replace_text_in_pdf.py \ + /app/uploads/tax_form.pdf \ + "John Doe" \ + "Jane Smith" \ + /app/uploads/tax_form_updated.pdf +``` + ## Architecture CodeRunner consists of: - **Sandbox Container:** Isolated execution environment with Jupyter kernel - **MCP Server:** Handles communication between AI models and the sandbox +- **Skills System:** Pre-packaged tools for common tasks (PDF manipulation, image processing, etc.) ## Examples diff --git a/SKILLS-README.md b/SKILLS-README.md new file mode 100644 index 0000000..2058e99 --- /dev/null +++ b/SKILLS-README.md @@ -0,0 +1,117 @@ +# Skills powered by coderunner, running locally on your Mac + +> [!NOTE] +> [CodeRunner](https://github.com/instavm/coderunner) executes AI-generated code in a truly isolated sandboxed environment on your Mac using Apple's native containers. + +# Pre-requisite +* `Mac` with a `M-series` chip. +* Install the latest`coderunner` by running the `./install.sh` script from the main repository. +```shell +./install.sh +``` + +# How To Use Skills +* `coderunner` is exposed as an MCP and can be connected to tools like `gemini cli` or `qwen cli` or `claude desktop` or anything that supports MCP. The execution is completely local, done on your Mac. + +*For example, for Gemini CLI, you can edit* `~/.gemini/settings.json` + ```json + { + "theme": "Default", + "selectedAuthType": "oauth-personal", + "mcpServers": { + "coderunner": { + "httpUrl": "http://coderunner.local:8222/mcp" + } + } +} +``` + + And for system instructions, replace the `~/.gemini/GEMINI.md` with the [GEMINI.md](https://github.com/instavm/coderunner/examples/gemini/GEMINI.md) + + +# How To Add New Skills + +## Option 1: Import from Claude + +You can either download and copy the folder from Anthropic skills's [github repo](https://github.com/anthropics/skills/) to `~/.coderunner/assets/skills/user/` + +For example, I have added 4 skills in the user folder as: +```shell +/Users/manish/.coderunner/assets/skills/ +├── public +│ ├── image-crop-rotate +│ │ ├── scripts +│ │ └── SKILL.md +│ └── pdf-text-replace +│ ├── scripts +│ └── SKILL.md +└── user + ├── docx + │ ├── docx-js.md + │ ├── LICENSE.txt + │ ├── ooxml + │ ├── ooxml.md + │ ├── scripts + │ └── SKILL.md + ├── pptx + │ ├── html2pptx.md + │ ├── LICENSE.txt + │ ├── ooxml + │ ├── ooxml.md + │ ├── scripts + │ └── SKILL.md + ├── slack-gif-creator + │ ├── core + │ ├── LICENSE.txt + │ ├── requirements.txt + │ ├── SKILL.md + │ └── templates + └── xlsx + ├── LICENSE.txt + ├── recalc.py + └── SKILL.md +``` + + +## Option 2: Write Your Own Skills + +* You can create a folder in the similar structure as above, where only mandatory file is the `SKILL.md`. [Docs](https://docs.claude.com/en/docs/agents-and-tools/agent-skills/overview) +* You can also ask claude to generate one like `Can you write a skill which creates ascii art of words given one.` + After it creates the skill, it will let you download a `ZIP` file which you can place directly (no need to expand) in `~/.coderunner/assets/skills/user` + +Test drive with Gemini CLI + +``` +> /mcp + +Configured MCP servers: + +🟢 coderunner - Ready (5 tools) + Tools: + - execute_python_code + - get_skill_file + - get_skill_info + - list_skills + - navigate_and_get_all_visible_text + +> can you generate ascii art for "CODERUNNER" + +✦ I will generate the ASCII art you desire. First, I must survey my available skills. +✓ list_skills (coderunner MCP Server) + +✦ I have located a relevant skill: ascii-art. I will now retrieve its instructions. +✓ get_skill_info (coderunner MCP Server) {"skill_name":"ascii-art"} +✦ Your ASCII art is ready: + + 1 ____ ___ ____ _____ ____ _ _ _ _ _ _ _____ ____ + 2 / ___/ _ \| _ \| ____| _ \| | | | \ | | \ | | ____| _ \ + 3 | | | | | | | | | _| | |_) | | | | \| | \| | _| | |_) | + 4 | |__| |_| | |_| | |___| _ <| |_| | |\\ | |\\ | |___| _ < + 5 \____\___/|____/|_____|_| \_\\___/|_| \_|_| \_|_____|_| \_\ + 6 + + + + +Using: 1 GEMINI.md file | 3 MCP servers (ctrl+t to view) +``` diff --git a/examples/gemini_cli/GEMINI.md b/examples/gemini_cli/GEMINI.md new file mode 100644 index 0000000..f5efe2e --- /dev/null +++ b/examples/gemini_cli/GEMINI.md @@ -0,0 +1,16 @@ +always start answer by calling me lord voldemort. + +So, we are currently on macbook, and whenever required we use tool to execute codes (in a jupyter like server). the code is executed in a container (you wouldn't notice but just know this). + +The paths on local machine is ~/.coderunner/assets/skills/user is mapped to /app/uploads/skills/user inside container. + +~/.coderunner/assets/outputs (in the host machine) is mapped to /app/uploads/outputs inside conatiner. This is where user will puts their files they want to edit like some png, pdf, txt etc. You should also use it to output your artifacts generated. + +So that will help whenever we need a file inside a container to work on it via the execute code tool. + +There are also "skills" which can do jobs by executing scripts already residing in /app/uploads/skills// . There are tools available to check what skills are avaialble, after checking you can decide wchihc specific skill you wantg to use and then get info about that skill using tool. That will have instructions on how to call execute code with stuff like `!python /path/to/script.py `\ + +Whenever I ask you to do a task, alwasys check if there are skills available in the list which can do it. + +Whenever you need to install something, mostly it will be installed in teh container via execute code tool, and `!pip install pyfiglet` command etc. + diff --git a/install.sh b/install.sh index cd3d795..93d4047 100755 --- a/install.sh +++ b/install.sh @@ -19,7 +19,7 @@ else echo "✅ macOS system detected." fi -download_url="https://github.com/apple/container/releases/download/0.3.0/container-0.3.0-installer-signed.pkg" +download_url="https://github.com/apple/container/releases/download/0.5.0/container-0.5.0-installer-signed.pkg" # Check if container is installed and display its version if command -v container &> /dev/null @@ -57,7 +57,7 @@ echo "Running: sudo container system dns create local" sudo container system dns create local echo "Running: container system dns default set local" -container system dns default set local +container system property set dns.domain local echo "Starting the Sandbox Container..." container system start @@ -66,12 +66,21 @@ container system start echo "Pulling the latest image: instavm/coderunner" container image pull instavm/coderunner -echo "→ Ensuring coderunner assets directory…" +echo "→ Ensuring coderunner assets directories…" ASSETS_SRC="$HOME/.coderunner/assets" -mkdir -p "$ASSETS_SRC" +mkdir -p "$ASSETS_SRC/skills/user" +mkdir -p "$ASSETS_SRC/outputs" # Run the command to start the sandbox container echo "Running: container run --name coderunner --detach --rm --cpus 8 --memory 4g instavm/coderunner" -container run --volume "$ASSETS_SRC:/app/uploads" --name coderunner --detach --rm --cpus 8 --memory 4g instavm/coderunner +container run \ + --volume "$ASSETS_SRC/skills/user:/app/uploads/skills/user" \ + --volume "$ASSETS_SRC/outputs:/app/uploads/outputs" \ + --name coderunner \ + --detach \ + --rm \ + --cpus 8 \ + --memory 4g \ + instavm/coderunner echo "✅ Setup complete. MCP server is available at http://coderunner.local:8222/mcp" \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 84020bd..336350f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -38,3 +38,48 @@ fastmcp openai-agents playwright==1.53.0 + +# Data Science Libraries +pandas +numpy +scipy +scikit-learn +statsmodels + +# Visualization Libraries +matplotlib +seaborn + +# Image/Video I/O Libraries +imageio +imageio-ffmpeg + +# File Processing Libraries +pyarrow +openpyxl +xlsxwriter +xlrd +pillow +python-pptx +python-docx +pypdf +pdfplumber +pypdfium2 +pdf2image +pdfkit +tabula-py +reportlab[pycairo] +img2pdf + +# Math & Computing Libraries +sympy +mpmath + +# Utilities +tqdm +python-dateutil +pytz +joblib + +# Beautiful Soup for HTML parsing (already used in server.py) +beautifulsoup4 diff --git a/server.py b/server.py index d2b4e10..7be87a4 100644 --- a/server.py +++ b/server.py @@ -5,6 +5,7 @@ import json import logging import os +import zipfile import pathlib import time import uuid @@ -53,6 +54,11 @@ SHARED_DIR.mkdir(exist_ok=True) KERNEL_ID_FILE_PATH = SHARED_DIR / "python_kernel_id.txt" +# Skills directory configuration +SKILLS_DIR = SHARED_DIR / "skills" +PUBLIC_SKILLS_DIR = SKILLS_DIR / "public" +USER_SKILLS_DIR = SKILLS_DIR / "user" + def resolve_with_system_dns(hostname): try: return socket.gethostbyname(hostname) @@ -530,5 +536,200 @@ async def navigate_and_get_all_visible_text(url: str) -> str: return f"Error: Failed to retrieve all visible text: {str(e)}" +# --- SKILLS MANAGEMENT TOOLS --- + + +async def _parse_skill_frontmatter(skill_md_path): + try: + async with aiofiles.open(skill_md_path, mode='r') as f: + content = await f.read() + frontmatter = [] + in_frontmatter = False + for line in content.splitlines(): + if line.strip() == '---': + if in_frontmatter: + break + else: + in_frontmatter = True + continue + if in_frontmatter: + frontmatter.append(line) + + metadata = {} + for line in frontmatter: + if ':' in line: + key, value = line.split(':', 1) + metadata[key.strip()] = value.strip() + return metadata + except Exception: + return {} + +@mcp.tool() +async def list_skills() -> str: + """ + Lists all available skills in the CodeRunner container. + + Returns a list of available skills organized by category (public/user). + Public skills are built into the container, while user skills are added by users. + + Returns: + JSON string with skill names organized by category. + """ + try: + # Unzip any user-provided skills + if USER_SKILLS_DIR.exists(): + for item in USER_SKILLS_DIR.iterdir(): + if item.is_file() and item.suffix == '.zip': + with zipfile.ZipFile(item, 'r') as zip_ref: + zip_ref.extractall(USER_SKILLS_DIR) + os.remove(item) + + skills = { + "public": [], + "user": [] + } + + # Helper to process a skills directory + async def process_skill_dir(directory, category): + if directory.exists(): + for skill_dir in directory.iterdir(): + if skill_dir.is_dir(): + skill_md_path = skill_dir / "SKILL.md" + if skill_md_path.exists(): + metadata = await _parse_skill_frontmatter(skill_md_path) + skills[category].append({ + "name": metadata.get("name", skill_dir.name), + "description": metadata.get("description", "No description available.") + }) + + await process_skill_dir(PUBLIC_SKILLS_DIR, "public") + await process_skill_dir(USER_SKILLS_DIR, "user") + + # Sort for consistent output + skills["public"].sort(key=lambda x: x['name']) + skills["user"].sort(key=lambda x: x['name']) + + result = f"Available Skills:\n\n" + result += f"Public Skills ({len(skills['public'])}):\n" + if skills["public"]: + for skill in skills["public"]: + result += f" - {skill['name']}: {skill['description']}\n" + else: + result += " (none)\n" + + result += f"\nUser Skills ({len(skills['user'])}):\n" + if skills["user"]: + for skill in skills["user"]: + result += f" - {skill['name']}: {skill['description']}\n" + else: + result += " (none)\n" + + result += f"\nUse get_skill_info(skill_name) to read documentation for a specific skill." + + return result + + except Exception as e: + logger.error(f"Failed to list skills: {e}") + return f"Error: Failed to list skills: {str(e)}" + + +async def _read_skill_file(skill_name: str, filename: str) -> tuple[str, str, str]: + """ + Helper function to read a file from a skill's directory. + + Args: + skill_name: The name of the skill + filename: The name of the file to read (e.g., 'SKILL.md', 'EXAMPLES.md') + + Returns: + A tuple of (content, skill_type, error_message) + If successful, error_message is None + If failed, content and skill_type are None + """ + try: + # Check public skills first + public_skill_file = PUBLIC_SKILLS_DIR / skill_name / filename + user_skill_file = USER_SKILLS_DIR / skill_name / filename + + skill_file_path = None + skill_type = None + + if public_skill_file.exists(): + skill_file_path = public_skill_file + skill_type = "public" + elif user_skill_file.exists(): + skill_file_path = user_skill_file + skill_type = "user" + else: + return None, None, f"Error: File '{filename}' not found in skill '{skill_name}'. Use list_skills() to see available skills." + + # Read the file content + async with aiofiles.open(skill_file_path, mode='r') as f: + content = await f.read() + + # Replace all occurrences of /mnt/user-data with /app/uploads + content = content.replace('/mnt/user-data', '/app/uploads') + + return content, skill_type, None + + except Exception as e: + logger.error(f"Failed to read file '{filename}' from skill '{skill_name}': {e}") + return None, None, f"Error: Failed to read file: {str(e)}" + + +@mcp.tool() +async def get_skill_info(skill_name: str) -> str: + """ + Retrieves the documentation (SKILL.md) for a specific skill. + + Args: + skill_name: The name of the skill (e.g., 'pdf-text-replace', 'image-crop-rotate') + + Returns: + The content of the skill's SKILL.md file with usage instructions and examples. + """ + content, skill_type, error = await _read_skill_file(skill_name, "SKILL.md") + + if error: + return error + + # Add header with skill type + header = f"Skill: {skill_name} ({skill_type})\n" + header += f"Location: /app/uploads/skills/{skill_type}/{skill_name}/\n" + header += "=" * 80 + "\n\n" + + return header + content + + +@mcp.tool() +async def get_skill_file(skill_name: str, filename: str) -> str: + """ + Retrieves any markdown file from a skill's directory. + This is useful when SKILL.md references other documentation files like EXAMPLES.md, API.md, etc. + + Args: + skill_name: The name of the skill (e.g., 'pdf-text-replace', 'image-crop-rotate') + filename: The name of the markdown file to read (e.g., 'EXAMPLES.md', 'API.md', 'README.md') + + Returns: + The content of the requested file with /mnt/user-data paths replaced with /app/uploads. + + Example: + get_skill_file('pdf-text-replace', 'EXAMPLES.md') + """ + content, skill_type, error = await _read_skill_file(skill_name, filename) + + if error: + return error + + # Add header with file info + header = f"Skill: {skill_name} ({skill_type})\n" + header += f"File: {filename}\n" + header += f"Location: /app/uploads/skills/{skill_type}/{skill_name}/{filename}\n" + header += "=" * 80 + "\n\n" + + return header + content + + # Use the streamable_http_app as it's the modern standard app = mcp.streamable_http_app() \ No newline at end of file diff --git a/skills/public/image-crop-rotate/SKILL.md b/skills/public/image-crop-rotate/SKILL.md new file mode 100644 index 0000000..a162b9d --- /dev/null +++ b/skills/public/image-crop-rotate/SKILL.md @@ -0,0 +1,71 @@ +--- +name: image-crop-rotate +description: Image processing skill for cropping images to 50% from center and rotating them 90 degrees clockwise. This skill should be used when users request image cropping to center, image rotation, or both operations combined on image files. +--- + +# Image Crop and Rotate + +## Overview + +This skill provides functionality to crop images to 50% of their original size from the center and rotate them 90 degrees clockwise. It uses a reliable Python script with PIL/Pillow for consistent, high-quality image processing. + +## When to Use This Skill + +Use this skill when the user requests: +- Cropping an image to 50% from the center +- Rotating an image 90 degrees (clockwise) +- Both cropping and rotating an image +- Image processing tasks that combine center cropping with rotation + +## How to Use This Skill + +### Overview + +The skill provides a single script that performs both operations: cropping to 50% from center and rotating 90 degrees clockwise. + +### Process + +1. **Identify the input image**: Locate the user's uploaded image file in `/mnt/user-data/uploads/` + +2. **Execute the script**: Run the `crop_and_rotate.py` script with input and output paths: + ```bash + python scripts/crop_and_rotate.py + ``` + +3. **Provide the result**: Move the processed image to `/mnt/user-data/outputs/` and share it with the user + +### Script Details + +**`scripts/crop_and_rotate.py`** + +This script performs two operations in sequence: +1. Crops the image to 50% of its original size, centered +2. Rotates the cropped image 90 degrees clockwise + +**Usage:** +```bash +python scripts/crop_and_rotate.py input.jpg output.jpg +``` + +**Arguments:** +- First argument: Path to input image +- Second argument: Path to save processed image + +**Supported formats:** Any format supported by PIL/Pillow (JPEG, PNG, GIF, BMP, TIFF, etc.) + +**Output:** The script prints processing details including original size, cropped size, and final size + +### Example Workflow + +```bash +# Process an uploaded image +python /mnt/user-data/skills/image-crop-rotate/scripts/crop_and_rotate.py \ + /mnt/user-data/uploads/photo.jpg \ + /mnt/user-data/outputs/photo_processed.jpg +``` + +The script will: +1. Open the input image +2. Crop it to 50% from the center (e.g., 1000x800 → 500x400) +3. Rotate the cropped image 90° clockwise (e.g., 500x400 → 400x500) +4. Save the result to the output path diff --git a/skills/public/image-crop-rotate/scripts/crop_and_rotate.py b/skills/public/image-crop-rotate/scripts/crop_and_rotate.py new file mode 100644 index 0000000..ad7f402 --- /dev/null +++ b/skills/public/image-crop-rotate/scripts/crop_and_rotate.py @@ -0,0 +1,97 @@ +#!/usr/bin/env python3 +""" +Crop and rotate images. + +This script crops an image to 50% of its size from the center +and rotates it 90 degrees clockwise. +""" + +import sys +from pathlib import Path +from PIL import Image + + +def crop_and_rotate(input_path, output_path): + """ + Crop image to 50% from center and rotate 90 degrees clockwise. + + Args: + input_path: Path to input image file + output_path: Path to save the processed image + """ + print(f"Opening image: {input_path}") + + # Open the image + img = Image.open(input_path) + original_size = img.size + print(f"Original image size: {original_size}") + + # Crop to 50% from center + width, height = img.size + new_width = width // 2 + new_height = height // 2 + + # Calculate crop box (left, top, right, bottom) + left = (width - new_width) // 2 + top = (height - new_height) // 2 + right = left + new_width + bottom = top + new_height + + cropped = img.crop((left, top, right, bottom)) + print(f"Cropped image size: {cropped.size}") + + # Rotate 90 degrees clockwise (using ROTATE_270 which is equivalent to 90° clockwise) + rotated = cropped.transpose(Image.ROTATE_270) + print(f"Final image size after rotation: {rotated.size}") + + # Save the result + rotated.save(output_path) + print(f"Processed image saved to: {output_path}") + + # Print summary + print(f"\n✓ Successfully processed image") + print(f" Input: {input_path}") + print(f" Output: {output_path}") + print(f" Original size: {original_size}") + print(f" Cropped size: {cropped.size}") + print(f" Final size: {rotated.size}") + + # Cleanup + img.close() + cropped.close() + rotated.close() + + +def main(): + if len(sys.argv) != 3: + print("Usage: python crop_and_rotate.py ") + print("\nExample:") + print(" python crop_and_rotate.py input.jpg output.jpg") + sys.exit(1) + + input_image = sys.argv[1] + output_image = sys.argv[2] + + # Validate input file exists + if not Path(input_image).exists(): + print(f"ERROR: Input file does not exist: {input_image}") + sys.exit(1) + + # Validate output directory exists + output_dir = Path(output_image).parent + if not output_dir.exists(): + print(f"ERROR: Output directory does not exist: {output_dir}") + sys.exit(1) + + try: + crop_and_rotate(input_image, output_image) + sys.exit(0) + except Exception as e: + print(f"\nERROR: {e}") + import traceback + traceback.print_exc() + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/skills/public/pdf-text-replace/SKILL.md b/skills/public/pdf-text-replace/SKILL.md new file mode 100644 index 0000000..453a3bc --- /dev/null +++ b/skills/public/pdf-text-replace/SKILL.md @@ -0,0 +1,81 @@ +--- +name: pdf-text-replace +description: Replace text in fillable PDF forms by updating form field values. This skill should be used when users need to update names, addresses, dates, or other text in PDF form fields. +--- + +# PDF Text Replace Skill + +Replace text in fillable PDF forms by updating form field values. + +## Description + +This skill allows you to search and replace text in PDF files that have fillable form fields. It scans all form fields in the PDF, finds fields containing the search text, and replaces it with the replacement text. + +## Use Cases + +- Update names in filled tax forms +- Replace addresses in PDF documents +- Update dates or reference numbers +- Batch update form field values + +## Requirements + +- PDF must have fillable form fields (not flattened) +- Python 3.7+ +- pypdf library + +## Usage + +### Basic Usage + +```bash +python /app/uploads/skills/public/pdf-text-replace/scripts/replace_text_in_pdf.py \ + /app/uploads/input.pdf \ + "OLD TEXT" \ + "NEW TEXT" \ + /app/uploads/output.pdf +``` + +### Example: Replace Name in Tax Form + +```bash +python /app/uploads/skills/public/pdf-text-replace/scripts/replace_text_in_pdf.py \ + /app/uploads/f5472.pdf \ + "MANISH KUMAR" \ + "MANNU KUMAR" \ + /app/uploads/f5472_updated.pdf +``` + +## Script Details + +**Script:** `scripts/replace_text_in_pdf.py` + +**Arguments:** +1. `input_pdf` - Path to input PDF file +2. `search_text` - Text to search for in form fields +3. `replace_text` - Text to replace with +4. `output_pdf` - Path to save the updated PDF + +**Output:** +- Creates a new PDF with updated field values +- Preserves all form fields (not flattened) +- Reports number of fields modified + +## Limitations + +- Only works with fillable PDF forms (not scanned/image PDFs) +- Replaces text in form field values, not static text +- Case-sensitive search by default +- Cannot modify flattened PDFs + +## Dependencies + +The script requires the `pypdf` library, which is included in the container requirements. + +## Error Handling + +The script will report errors if: +- Input PDF doesn't exist +- PDF doesn't have fillable form fields +- Search text is not found +- Output path is not writable diff --git a/skills/public/pdf-text-replace/scripts/replace_text_in_pdf.py b/skills/public/pdf-text-replace/scripts/replace_text_in_pdf.py new file mode 100644 index 0000000..d0b8677 --- /dev/null +++ b/skills/public/pdf-text-replace/scripts/replace_text_in_pdf.py @@ -0,0 +1,122 @@ +#!/usr/bin/env python3 +""" +Replace text in fillable PDF form fields. + +This script searches for text in PDF form fields and replaces it with new text, +creating a new PDF with the updated values while preserving the fillable form structure. +""" + +import sys +from pathlib import Path +from pypdf import PdfReader, PdfWriter + + +def replace_text_in_pdf(input_pdf_path, search_text, replace_text, output_pdf_path): + """ + Replace text in PDF form fields. + + Args: + input_pdf_path: Path to input PDF file + search_text: Text to search for in form fields + replace_text: Text to replace with + output_pdf_path: Path to save the updated PDF + + Returns: + Number of fields modified + """ + # Read the PDF + print(f"Reading PDF: {input_pdf_path}") + reader = PdfReader(input_pdf_path) + writer = PdfWriter() + writer.clone_document_from_reader(reader) + + # Check if PDF has form fields + if reader.get_fields() is None: + print("ERROR: This PDF does not have fillable form fields.") + print("This script only works with PDFs that have form fields.") + return 0 + + # Get all form fields + fields = reader.get_fields() + print(f"Total form fields: {len(fields)}") + + # Extract current field values + print("\nExtracting current field values...") + field_values = {} + for field_name, field_data in fields.items(): + value = field_data.get("/V") + if value: + field_values[field_name] = value + + print(f"Found {len(field_values)} fields with values") + + # Search and replace in field values + print(f"\nSearching for '{search_text}'...") + modified_count = 0 + new_field_values = {} + + for field_name, value in field_values.items(): + if isinstance(value, str) and search_text in value: + new_value = value.replace(search_text, replace_text) + new_field_values[field_name] = new_value + modified_count += 1 + print(f" Replacing in field '{field_name}': '{value}' → '{new_value}'") + + if modified_count == 0: + print(f"No fields containing '{search_text}' were found.") + return 0 + + print(f"\nModified {modified_count} field(s)") + + # Update form field values in the writer + if new_field_values: + for page in writer.pages: + writer.update_page_form_field_values(page, new_field_values) + + # Write the output PDF + print(f"\nCreating updated PDF: {output_pdf_path}") + with open(output_pdf_path, "wb") as output_file: + writer.write(output_file) + + print(f"\n✓ Successfully created updated PDF!") + print(f" Input: {input_pdf_path}") + print(f" Output: {output_pdf_path}") + print(f" Replacements: '{search_text}' → '{replace_text}'") + print(f" Fields modified: {modified_count}") + + return modified_count + + +def main(): + if len(sys.argv) != 5: + print("Usage: python replace_text_in_pdf.py ") + print("\nExample:") + print(" python replace_text_in_pdf.py input.pdf 'John Doe' 'Jane Smith' output.pdf") + sys.exit(1) + + input_pdf = sys.argv[1] + search_text = sys.argv[2] + replace_text = sys.argv[3] + output_pdf = sys.argv[4] + + # Validate input file exists + if not Path(input_pdf).exists(): + print(f"ERROR: Input file does not exist: {input_pdf}") + sys.exit(1) + + # Validate output directory exists + output_dir = Path(output_pdf).parent + if not output_dir.exists(): + print(f"ERROR: Output directory does not exist: {output_dir}") + sys.exit(1) + + try: + modified = replace_text_in_pdf(input_pdf, search_text, replace_text, output_pdf) + sys.exit(0 if modified > 0 else 1) + except Exception as e: + print(f"\nERROR: {e}") + sys.exit(1) + + +if __name__ == "__main__": + main()