In [1]:
# The imports
from dotenv import load_dotenv, dotenv_values
from agents import Agent, Runner, trace, SQLiteSession, function_tool
from agents.mcp import MCPServerStdio
import asyncio
import time
import os
import json
from fastapi import FastAPI, UploadFile, File, Form
from fastapi.responses import JSONResponse, FileResponse, HTMLResponse
from fastapi.middleware.cors import CORSMiddleware
from pathlib import Path
import shutil
import nest_asyncio
import uvicorn
from agents.items import ToolCallOutputItem
from mimetypes import guess_type
from typing import Optional
from contextlib import asynccontextmanager
import threading
import requests
from watchdog.observers import Observer
from watchdog.events import FileSystemEventHandler
from openai import OpenAI
import subprocess
import tempfile
from PyPDF2 import PdfReader
from pptx import Presentation
from PIL import Image
import base64
import csv
import pytz
from datetime import datetime

nest_asyncio.apply()

# Load environment variables
load_dotenv(override=True)
DOWNLOADS_DIR = Path.home() / "Downloads"
API_URL = "http://127.0.0.1:8000/chat"
async def read_upload_file_async(uploaded_file: UploadFile):
    """Async helper to read uploaded file content"""
    try:
        content = await uploaded_file.read()
        # Reset file pointer for potential re-reading
        await uploaded_file.seek(0)
        return content
    except Exception as e:
        print(f"Error reading uploaded file: {e}")
        return None
# Use a more robust tracking system
processed_files = set()  # Track by file path + creation time
file_creation_times = {} 

openai_api_key = os.getenv('OPENAI_API_KEY')
groq_api_key = os.getenv('GROQ_API_KEY')
playwright_key=os.getenv('PLAYWRIGHT')
client = OpenAI()

# MCP Server configurations
fetch_params = {"command": "uvx", "args": ["mcp-server-fetch"]}

playwright_params = {
    "command": "npx",
    "args": [
        "@playwright/mcp@latest",
        "--browser", "chrome",
        "--extension",
        "--shared-browser-context",
    ],
    "env": {
        "PLAYWRIGHT_MCP_EXTENSION_TOKEN": playwright_key
    }
}

# Global variables
playwright_server = None
web_agent = None
file_agent = None

# Storage setup
STORAGE_ROOT = Path(os.getenv("STORAGE_ROOT", "../../storage")).resolve()
METADATA_FILE = STORAGE_ROOT / os.getenv("METADATA_FILE", "file_metadata.json")
UPLOADS_DIR = STORAGE_ROOT / "uploads"

STORAGE_ROOT.mkdir(parents=True, exist_ok=True)
UPLOADS_DIR.mkdir(exist_ok=True)
if not METADATA_FILE.exists():
    with open(METADATA_FILE, "w") as f:
        json.dump([], f)

def load_metadata():
    if METADATA_FILE.exists():
        with open(METADATA_FILE, "r") as f:
            try:
                data = json.load(f)
                return data if isinstance(data, list) else []
            except json.JSONDecodeError:
                return []
    return []

def save_metadata(data):
    with open(METADATA_FILE, "w") as f:
        json.dump(data, f, indent=4)

def get_current_time():
    tz = pytz.timezone("Asia/Kolkata")
    return datetime.now(tz).strftime("%Y-%m-%d %H:%M:%S")

# Web Agent Tools
@function_tool
def get_credentials(site_name: str):
    """Takes site name as input and gives username and password credentials of the site"""
    creds = dotenv_values(".env")
    username_key = f"{site_name.upper()}_USERNAME"
    password_key = f"{site_name.upper()}_PASSWORD"

    if username_key in creds and password_key in creds:
        return creds[username_key], creds[password_key]
    else:
        raise ValueError(f"Credentials for {site_name}")

@function_tool
def wait_for_user(message: str = "Solve CAPTCHA in the browser and press Enter to continue"):
    """Pause execution until the user confirms."""
    print("Hello")
    time.sleep(4)
    return "Resumed after user solved CAPTCHA"

# File Management Tools
@function_tool
def update_file_tool(file_name: str, new_category: str = None, new_tag: str = None):
    """Updates file metadata and optionally moves file between categories."""
    metadata = load_metadata()
    entry = next((m for m in metadata if m["name"] == file_name), None)

    if not entry:
        return {"error": f"File '{file_name}' not found in metadata."}

    src = Path(entry["path"])
    old_category_dir = src.parent

    if new_category:
        new_category_dir = STORAGE_ROOT / new_category
        new_category_dir.mkdir(parents=True, exist_ok=True)

        dst = new_category_dir / src.name
        if dst.exists():
            return {"error": f"File already exists in destination: {dst}"}

        shutil.move(str(src), dst)
        entry["path"] = str(dst)
        entry["category"] = new_category

        try:
            if old_category_dir.exists() and not any(old_category_dir.iterdir()):
                old_category_dir.rmdir()
        except Exception as e:
            print(f"Warning: Could not delete empty folder {old_category_dir}: {e}")

    if new_tag is not None:
        entry["tag"] = new_tag

    entry["last_accessed"] = get_current_time()
    save_metadata(metadata)

    return {
        "success": True,
        "file": entry["name"],
        "new_category": entry["category"],
        "new_tag": entry["tag"],
        "path": entry["path"]
    }

@function_tool
def add_file_tool(file_path: str, category: str, tag: str = None, name: str = None, overwrite: bool = False, description: str = None):
    """Uploads a file into STORAGE_ROOT/category/tag/ and records metadata."""
    src = Path(file_path).resolve()
    if not src.exists():
        return {"error": f"File not found: {src}"}

    if src.stat().st_size == 0:
        return {"error": f"File is empty: {src}"}

    if tag:
        target_dir = STORAGE_ROOT / category / tag
    else:
        target_dir = STORAGE_ROOT / category

    target_dir.mkdir(parents=True, exist_ok=True)

    if name:
        name = Path(name).stem
        final_name = f"{name}{src.suffix}"
    else:
        final_name = src.name

    dst = target_dir / final_name

    # Check if file already exists in metadata with same content
    metadata = load_metadata()
    existing_file = None
    for entry in metadata:
        if entry["name"] == final_name:
            existing_path = Path(entry["path"])
            if existing_path.exists():
                try:
                    # Compare file sizes and modification times
                    src_stat = src.stat()
                    dst_stat = existing_path.stat()
                    if (src_stat.st_size == dst_stat.st_size and 
                        abs(src_stat.st_mtime - dst_stat.st_mtime) < 5): 
                        existing_file = entry
                        break
                except:
                    pass

    if existing_file and not overwrite:
        print(f"📋 File already exists and appears identical: {final_name}")
        existing_file["last_accessed"] = get_current_time()
        save_metadata(metadata)
        return {
            "success": True,
            "stored_file": final_name,
            "category": existing_file["category"],
            "tag": existing_file["tag"],
            "description": existing_file["description"],
            "status": "already_exists"
        }

    # Handle naming conflicts for new files
    
    if dst.exists() and not overwrite:
        base, ext = dst.stem, dst.suffix
        i = 1
        while True:
            new_name = f"{base}_{i}{ext}"
            new_dst = target_dir / new_name
            if not new_dst.exists():
                dst = new_dst
                final_name = new_name
                break
            i += 1

    try:
        shutil.copy2(src, dst)
        if not dst.exists() or dst.stat().st_size == 0:
            return {"error": f"Failed to copy file properly: {dst}"}
    except Exception as e:
        return {"error": f"Failed to copy file: {e}"}

    entry = {
        "name": final_name,
        "path": str(dst),
        "category": category,
        "tag": tag,
        "description": description if description else "No description provided",
        "last_accessed": get_current_time(),
        "file_size": dst.stat().st_size
    }
    metadata.append(entry)
    save_metadata(metadata)

    return {
        "success": True,
        "stored_file": final_name,
        "category": category,
        "tag": tag,
        "description": entry["description"],
        "file_size": entry["file_size"],
        "status": "newly_added"
    }
@function_tool
def get_files_tool(name: str=None, category: str=None, tag: str=None, query: str=None):
    """Fetches files based on filters OR semantic query."""
    metadata = load_metadata()
    results = []
    if query:
        for entry in metadata:
            if query.lower() in entry.get("description","").lower():
                entry["last_accessed"] = datetime.now().isoformat()
                results.append(entry)
    else:
        for entry in metadata:
            match = True
            if name and entry["name"] != name:
                match = False
            if category and entry["category"] != category:
                match = False
            if tag and entry["tag"] != tag:
                match = False
            if match:
                entry["last_accessed"] = datetime.now().isoformat()
                results.append(entry)
    save_metadata(metadata)
    return {"files": results}

@function_tool
def load_metadata_tool():
    """Returns the current metadata.json content as a list of file entries."""
    return load_metadata()

@function_tool
def delete_files_tool(name: str=None, category: str=None, tag: str=None):
    """Deletes files and updates metadata based on filters."""
    metadata = load_metadata()
    updated_metadata = []
    deleted = []

    for entry in metadata:
        match = True
        if name and entry["name"] != name:
            match = False
        if category and entry["category"] != category:
            match = False
        if tag and entry["tag"] != tag:
            match = False

        if match:
            file_path = Path(entry["path"])
            if file_path.exists():
                try:
                    file_path.unlink()
                except Exception as e:
                    return {"error": f"Could not delete {file_path}: {e}"}
            deleted.append(entry)
        else:
            updated_metadata.append(entry)

    save_metadata(updated_metadata)

    def cleanup_empty_folders(root: Path):
        """Recursively remove empty folders under root, ignoring hidden files."""
        for folder in sorted(root.glob("**/*"), key=lambda x: len(x.parts), reverse=True):
            if folder.is_dir():
                if not any(f for f in folder.iterdir() if not f.name.startswith('.')):
                    try:
                        folder.rmdir()
                    except Exception:
                        pass

    cleanup_empty_folders(STORAGE_ROOT)

    return {
        "deleted": [d["name"] for d in deleted],
        "remaining_files": [f["name"] for f in updated_metadata]
    }

@function_tool
def read_contents(file_path: str, max_pages: int = 20, max_chars: int = 10000):
    """Reads the contents of a given file and returns a text preview."""
    path = Path(file_path).resolve()  # Use resolve() to get absolute path
    
    if not path.exists():
        return {"error": f"File not found: {file_path} (resolved: {path})"}

    # Check if file is empty
    try:
        file_size = path.stat().st_size
        if file_size == 0:
            return {"error": f"File is empty: {path}"}
        print(f"Reading file: {path} (size: {file_size} bytes)")
    except Exception as e:
        return {"error": f"Cannot access file: {path} - {e}"}

    text_content = ""

    try:
        if path.suffix.lower() == ".pdf":
            try:
                # Add more verbose error handling for PDF
                print(f"Attempting to read PDF: {path}")
                reader = PdfReader(str(path))
                print(f"PDF has {len(reader.pages)} pages")
                
                for i, page in enumerate(reader.pages[:max_pages]):
                    text_content += f"\n--- Page {i+1} ---\n"
                    extracted = page.extract_text()
                    text_content += extracted if extracted else "[No extractable text]\n"
                    
                if not text_content.strip():
                    return {"error": "PDF contains no extractable text"}
                    
            except Exception as e:
                return {"error": f"PDF reading failed: {e}"}

        elif path.suffix.lower() in [".pptx", ".ppt"]:
            try:
                text_content = ""
                with tempfile.TemporaryDirectory() as tmpdir:
                    tmpdir_path = Path(tmpdir)
                    pdf_path = tmpdir_path / (path.stem + ".pdf")

                    subprocess.run([
                        "/Applications/LibreOffice.app/Contents/MacOS/soffice",
                        "--headless",
                        "--convert-to", "pdf",
                        str(path),
                        "--outdir", str(tmpdir_path)
                    ], check=True)

                    if pdf_path.exists():
                        reader = PdfReader(str(pdf_path))
                        for i, page in enumerate(reader.pages[:max_pages]):
                            text_content += f"\n--- Page {i+1} ---\n"
                            text_content += page.extract_text() or ""
                    else:
                        return {"error": "PowerPoint to PDF conversion failed"}

            except Exception as e:
                return {"error": f"PowerPoint PDF extraction failed: {e}"}

        elif path.suffix.lower() == ".json":
            try:
                data = json.loads(path.read_text(encoding="utf-8", errors="ignore"))
                text_content = json.dumps(data, indent=2)[:max_chars]
            except Exception as e:
                return {"error": f"JSON reading failed: {e}"}

        elif path.suffix.lower() == ".csv":
            try:
                with open(path, newline="", encoding="utf-8", errors="ignore") as csvfile:
                    reader = csv.reader(csvfile)
                    rows = []
                    for i, row in enumerate(reader):
                        if i >= max_pages:
                            break
                        rows.append(", ".join(row))
                    text_content = "\n".join(rows)[:max_chars]
            except Exception as e:
                return {"error": f"CSV reading failed: {e}"}

        elif path.suffix.lower() in [".jpg", ".jpeg", ".png"]:
            try:
                with open(path, "rb") as img_file:
                    img_bytes = img_file.read()
                    if len(img_bytes) == 0:
                        return {"error": "Image file is empty"}
                    img_b64 = base64.b64encode(img_bytes).decode("utf-8")

                response = client.chat.completions.create(
                    model="gpt-4o-mini",
                    messages=[
                        {"role": "system", "content": "You are an assistant that describes images in plain language."},
                        {"role": "user", "content": f"Describe this image:\n[base64 image data]\n{img_b64}"}
                    ],
                    max_tokens=300
                )

                text_content = response.choices[0].message.content.strip()

            except Exception as e:
                return {"error": f"Image description failed: {e}"}

        else:
            # Try to read as text file
            try:
                text_content = path.read_text(encoding="utf-8", errors="ignore")[:max_chars]
            except Exception as e:
                return {"error": f"Text file reading failed: {e}"}

        if len(text_content) > max_chars:
            text_content = text_content[:max_chars] + "... [truncated]"

        return {
            "file_name": path.name,
            "path": str(path),
            "content_preview": text_content if text_content.strip() else "[No readable content]",
            "file_size": file_size
        }

    except Exception as e:
        return {"error": f"Unexpected error: {e}"}

# Create File Reader Agent
file_reader_agent = Agent(
    name="FileReaderAgent",
    instructions=(
        "You are a file classifier. "
        "1. Always read the file content using `read_contents(file_path)` and summarize it in 1-2 sentences. "
        "2. Use `load_metadata_tool()` to inspect existing files' descriptions, categories, and tags. "
        "3. Recommend the most appropriate **category** and **tag** for this file: "
        "   - The **category** should be a broad, high-level classification (e.g., 'reference_materials', 'novels', 'reports') in snake_case, lowercase, no spaces. "
        "   - The **tag** should NOT be the fine-grained topic (e.g., 'subnetting', 'normalization'). "
        "     Instead, it should be the broader subject-area that the file belongs to (e.g., 'networks', 'databases', 'operating_systems'). "
        "   - Let the **description** capture the finer details (e.g., 'This file explains subnetting in IPv4'). "
        "   - Reuse an existing category/tag if it already fits the content; create a new one only if necessary. "
        "4. Only output the recommended category, tag, and 1-2 sentence description. "
        "   Do not modify metadata.json directly."
    ),
    tools=[read_contents, load_metadata_tool],
    model="gpt-4o-mini"
)


reader_tool = file_reader_agent.as_tool(
    tool_name="file_reader",
    tool_description="Reads a file, generates a short description, and suggests the most appropriate category and tag for it."
)

# Create File Manager Agent
file_agent = Agent(
    name="FileManagerAgent",
    instructions=(
        "You are a file manager agent. "
        "Your main responsibility is to manage files: add, update, fetch metadata, and delete files. "
        "\n\n"
        "For file additions: "
        "- Always keep the original 'name' field exactly as the uploaded filename. "
        "- By default, call the `file_reader` tool to get the recommended description, category, and tag. "
        "- If the user explicitly provides a category and/or tag, use those values instead of calling `file_reader` for them. "
        "- If the user provides a description, use it; otherwise, use the one from `file_reader`. "
        "- Ensure that category and tag values are stored in snake_case (e.g., 'reference_materials', 'compiler_design'). "
        "- Never invent categories, tags, or descriptions yourself. "
        "\n\n"
        "For updating or moving files: "
        "- Only modify category, tag, or description if explicitly requested by the user. "
        "- Do not change the filename unless explicitly instructed. "
        "- Do not reclassify the file with `file_reader` unless the user requests reclassification."
        "\n\n"
        "For fetching files: "
        "- Analyze the metadata's description field to determine relevance. "
        "- Return files with all metadata fields (name, category, tag, path, description). "
        "- Never invent or assume content."
        "\n\n"
        "Always ground all decisions in metadata and outputs from `file_reader`. "
        "Never attempt to classify or describe files on your own."
    ),
    tools=[add_file_tool, get_files_tool, delete_files_tool, update_file_tool, reader_tool],
    model="gpt-4o-mini"
)

# Web Agent instructions
web_instructions = """
You are an autonomous browsing agent.

General Browsing Instructions:
- Use ONE browser tab/page for all navigation. Do not open multiple tabs.
- When navigating to websites, use the full URL including https:// (e.g., https://youtube.com)
- Wait for pages to fully load before taking actions.
- Always accept cookies and dismiss pop-ups (e.g., click "Accept", "Not Now") when prompted.
- Close any unnecessary modals, banners, or pop-ups that appear while browsing.

Login Instructions:
- Use the get_credentials tool to retrieve the correct username and password for the website.
- Enter credentials in their respective fields (do not input both in the same box).
- After filling out username and password click on login button (Button might be such as Submit, Log in, etc.).
- If captcha is present do not click on login button. Call the 'wait_for_user' tool.

Website Navigation & Search Instructions:
- Navigate the internet autonomously to complete the users instructions.
- If one website fails to provide the required content, try alternative approaches or websites until the task is completed.
- Follow the users instructions precisely while searching or interacting with the website content.
- Be specific with URLs - for YouTube, use https://www.youtube.com
"""

# OpenAI Routing Function
def get_routing_decision(user_input: str):
    """Use OpenAI API to determine which agent to route to"""
    
    system_prompt = """
You are a routing system that decides which specialized agent should handle user requests.

Classification Rules:
- investigator: Any request involving websites, web browsing, online login, web search, scraping, or internet-based tasks
  Examples: "login vtop", "search google", "browse youtube", "scrape website", "check social media"
  
- FileManagerAgent: Any request involving local file/folder operations 
  Examples: "organize files", "delete documents", "move photos", "search local files", file uploads

Input Processing Rules:
FOR INVESTIGATOR:
- Extract ONLY the "User message" part from the input
- Look for the line that starts with "User message:" and extract everything after it
- Ignore all file metadata, uploaded files info, and system context

FOR FileManagerAgent:
- Pass the COMPLETE original input including all metadata and context

Instructions:
1. Analyze the user's request (look for the "User message:" part)
2. Determine which agent should handle it based on the classification rules
3. Return a JSON response with exactly this format:
{
    "selected_agent": "investigator" or "FileManagerAgent",
    "input": "the appropriate input for the selected agent"
}
If no user input is present default fallback value for selected_agent is FileManagerAgent and input is Store all the files.
Make sure the User input is present in the input section of the JSON
Always return valid JSON. Do not include any other text or explanations outside the JSON.
"""

    try:
        response = client.chat.completions.create(
            model="gpt-4o-mini",
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": user_input}
            ],
            temperature=0.1,
            max_tokens=1000
        )
        
        routing_response = response.choices[0].message.content.strip()
        print(routing_response)
        return json.loads(routing_response)
        
    except Exception as e:
        print(f"Error in routing decision: {e}")
        # Fallback to file agent for safety
        return {
            "selected_agent": "FileManagerAgent",
            "input": "Store the files"
        }

def run_agent_sync(user_input: str, uploaded_files=None):
    """Run the agent routing system using OpenAI for routing decisions"""
    from agents import Runner
    from agents.items import ToolCallOutputItem
    
    # Get routing decision from OpenAI
    routing_decision = get_routing_decision(user_input)
    selected_agent = routing_decision["selected_agent"]
    agent_input = routing_decision["input"]
    
    print(f"Routing to: {selected_agent}")
    
    if selected_agent == "investigator":
        try:
            loop = asyncio.get_running_loop()
            result = loop.run_until_complete(Runner.run(web_agent, agent_input, max_turns=20))
        except RuntimeError:
            result = asyncio.run(Runner.run(web_agent, agent_input, max_turns=20))
            
    elif selected_agent == "FileManagerAgent":
        file_paths = []

        # Save uploaded files with better handling
        if uploaded_files:
            for uploaded_file in uploaded_files:
                if hasattr(uploaded_file, 'filename') and uploaded_file.filename:
                    file_path = UPLOADS_DIR / Path(uploaded_file.filename).name
                    
                    # Check if file already exists
                    file_already_exists = False
                    if file_path.exists():
                        try:
                            existing_size = file_path.stat().st_size
                            # For UploadFile objects, we need to read content to get size
                            if hasattr(uploaded_file, 'size') and uploaded_file.size == existing_size:
                                print(f"⏭️ File with same name and size already exists: {file_path}")
                                file_already_exists = True
                        except Exception as e:
                            print(f"Error checking existing file: {e}")
                    
                    if not file_already_exists:
                        try:
                            # Handle FastAPI UploadFile objects properly
                            with open(file_path, "wb") as f:
                                if hasattr(uploaded_file, 'file'):
                                    # Reset file pointer to beginning
                                    uploaded_file.file.seek(0)
                                    shutil.copyfileobj(uploaded_file.file, f)
                                else:
                                    # Fallback for other file-like objects
                                    if hasattr(uploaded_file, 'read'):
                                        uploaded_file.seek(0)  # Reset if possible
                                        content = uploaded_file.read()
                                    else:
                                        content = uploaded_file
                                    f.write(content)
                            
                            # Verify file was written correctly
                            if file_path.exists() and file_path.stat().st_size > 0:
                                file_paths.append(file_path)
                                print(f"💾 Saved new file: {file_path} (size: {file_path.stat().st_size})")
                            else:
                                print(f"⚠️ Failed to save file properly: {file_path}")
                                
                        except Exception as e:
                            print(f"⚠️ Error saving file {uploaded_file.filename}: {e}")
                    else:
                        # Still add to file_paths if it exists
                        file_paths.append(file_path)
        
        metadata = load_metadata()
        metadata_json = json.dumps(metadata, indent=2)

        user_input_formatted = f"Here is the current file metadata (name, category, tag, description, path):\n{metadata_json}\n\n"
        
        if agent_input:
            user_input_formatted += f"User message: {agent_input}\n"
        if file_paths:
            user_input_formatted += "Files to process:\n" + "\n".join(str(p) for p in file_paths)

        try:
            loop = asyncio.get_running_loop()
            result = loop.run_until_complete(Runner.run(file_agent, user_input_formatted))
        except RuntimeError:
            result = asyncio.run(Runner.run(file_agent, user_input_formatted))
    else:
        return f"Unknown agent: {selected_agent}", []
    
    # Extract files to show
    files_to_show = []
    for item in result.new_items:
        if isinstance(item, ToolCallOutputItem) and isinstance(item.output, dict):
            if "files" in item.output:
                files_to_show.extend([Path(f["path"]) for f in item.output["files"] if "path" in f])

    return result.final_output, files_to_show
def fetch_files_for_api(name: str=None, category: str=None, tag: str=None):
    """Fetches a file asked by the user and sends it back to him."""
    metadata = load_metadata()
    results = []
    for entry in metadata:
        match = True
        if name and entry["name"] != name:
            match = False
        if category and entry["category"] != category:
            match = False
        if tag and entry["tag"] != tag:
            match = False
        if match:
            results.append(entry)
    return {
        "files": [
            {"name": f["name"], "category": f["category"], "tag": f.get("tag"), "path": f["path"]}
            for f in results
        ]
    }
#Fix for duplicate file processing issues

import time
import threading
import requests
from pathlib import Path
from watchdog.observers import Observer
from watchdog.events import FileSystemEventHandler

DOWNLOADS_DIR = Path.home() / "Downloads"
API_URL = "http://127.0.0.1:8000/chat"

# Use a more robust tracking system
processed_files = set()  # Track by file path + creation time
file_creation_times = {}  # Track when files were first seen

def wait_for_file_ready(file_path: Path, timeout=30, poll_interval=1):
    """Wait for file to be completely downloaded and stable"""
    start_time = time.time()
    last_size = -1
    stable_count = 0
    min_stable_checks = 3
    
    while time.time() - start_time < timeout:
        if not file_path.exists():
            time.sleep(poll_interval)
            continue
            
        try:
            current_size = file_path.stat().st_size
            
            if current_size == last_size and current_size > 0:
                stable_count += 1
                if stable_count >= min_stable_checks:
                    print(f"✅ File ready: {file_path} (size: {current_size} bytes)")
                    return True
            else:
                stable_count = 0
                
            last_size = current_size
            print(f"⏳ Waiting for file to stabilize: {current_size} bytes")
            
        except Exception as e:
            print(f"⚠️ Error checking file: {e}")
            
        time.sleep(poll_interval)
    
    print(f"⚠️ Timeout waiting for file: {file_path}")
    return False

def get_file_signature(file_path: Path):
    """Create a unique signature for a file based on path, size, and modification time"""
    try:
        stat = file_path.stat()
        return f"{file_path.name}_{stat.st_size}_{stat.st_mtime}"
    except:
        return str(file_path)

def is_file_already_processed(file_path: Path):
    """Check if this specific file has already been processed"""
    signature = get_file_signature(file_path)
    return signature in processed_files

def mark_file_as_processed(file_path: Path):
    """Mark this specific file as processed"""
    signature = get_file_signature(file_path)
    processed_files.add(signature)
    print(f"📝 Marked as processed: {signature}")

def cleanup_old_processed_files():
    """Clean up old entries from processed_files set to prevent memory bloat"""
    # Keep only the last 100 processed files to prevent unlimited growth
    if len(processed_files) > 100:
        # Convert to list, keep last 50, convert back to set
        recent_files = list(processed_files)[-50:]
        processed_files.clear()
        processed_files.update(recent_files)
        print("🧹 Cleaned up old processed files cache")

class DownloadHandler(FileSystemEventHandler):
    def on_created(self, event):
        if event.is_directory:
            return
            
        file_path = Path(event.src_path)

        # Ignore temp files and hidden files
        if (file_path.suffix in [".crdownload", ".part", ".tmp"] or 
            file_path.name.startswith('.') or
            file_path.name.startswith('~')):
            return

        print(f"[NEW FILE DETECTED] {file_path}")
        time.sleep(0.5)

        if is_file_already_processed(file_path):
            print(f"⏭️ File already processed, skipping: {file_path}")
            return

        file_creation_times[str(file_path)] = time.time()

        if not wait_for_file_ready(file_path, timeout=30, poll_interval=1):
            print(f"⚠️ File not ready: {file_path}")
            file_creation_times.pop(str(file_path), None)
            return

        # Check file is not empty
        try:
            file_size = file_path.stat().st_size
            if file_size == 0:
                print(f"⚠️ File is empty, waiting more...")
                time.sleep(2)
                if file_path.stat().st_size == 0:
                    print(f"⚠️ File still empty, skipping: {file_path}")
                    file_creation_times.pop(str(file_path), None)
                    return
        except Exception as e:
            print(f"⚠️ Error checking file size: {e}")
            file_creation_times.pop(str(file_path), None)
            return

        if is_file_already_processed(file_path):
            print(f"⏭️ File processed while waiting, skipping: {file_path}")
            return

        mark_file_as_processed(file_path)

        try:
            print(f"🚀 Processing file: {file_path} (size: {file_size})")
            
            # Determine content type
            content_type, _ = guess_type(str(file_path))
            if not content_type:
                content_type = "application/octet-stream"
            
            with open(file_path, "rb") as f:
                files = {"uploaded_files": (file_path.name, f, content_type)}
                data = {"message": "Process this downloaded file"}
                
                response = requests.post(
                    API_URL, 
                    files=files, 
                    data=data, 
                    timeout=120,  # Longer timeout for large files
                    headers={"User-Agent": "DownloadMonitor/1.0"}
                )

            if response.status_code == 200:
                try:
                    resp_json = response.json()
                    print(f"[AGENT RESPONSE] {resp_json.get('chat_reply', 'No response')}")
                except json.JSONDecodeError:
                    print(f"[AGENT RESPONSE] Non-JSON response: {response.text[:200]}")
                
                # Wait before deleting
                time.sleep(1)
                try:
                    if file_path.exists():
                        file_path.unlink()
                        print(f"✅ Deleted original from Downloads: {file_path}")
                    else:
                        print(f"ℹ️ File already removed: {file_path}")
                except Exception as delete_error:
                    print(f"⚠️ Could not delete file {file_path}: {delete_error}")
            else:
                print(f"⚠️ API call failed: {response.status_code} - {response.text[:200]}")
                # Remove from processed files so it can be retried
                signature = get_file_signature(file_path)
                processed_files.discard(signature)

        except requests.exceptions.Timeout:
            print(f"⚠️ API call timed out for {file_path}")
            signature = get_file_signature(file_path)
            processed_files.discard(signature)
        except requests.exceptions.ConnectionError:
            print(f"⚠️ Could not connect to API for {file_path}")
            signature = get_file_signature(file_path)
            processed_files.discard(signature)
        except Exception as e:
            print(f"[ERROR processing file] {e}")
            signature = get_file_signature(file_path)
            processed_files.discard(signature)
        
        finally:
            file_creation_times.pop(str(file_path), None)
            cleanup_old_processed_files()

    def on_modified(self, event):
        pass
    
    def on_moved(self, event):
        pass

def start_download_monitor():
    if not DOWNLOADS_DIR.exists():
        print(f"Downloads folder not found: {DOWNLOADS_DIR}")
        return None

    print(f"🚀 Starting fresh download monitor for {DOWNLOADS_DIR}")
    
    # Clear any existing processed files when starting
    processed_files.clear()
    file_creation_times.clear()
    
    # Scan existing files in downloads folder and mark them as already processed
    # to avoid processing old files
    existing_files = list(DOWNLOADS_DIR.glob("*"))
    for existing_file in existing_files:
        if existing_file.is_file() and not existing_file.name.startswith('.'):
            mark_file_as_processed(existing_file)
            print(f"📋 Marked existing file as processed: {existing_file.name}")

    event_handler = DownloadHandler()
    observer = Observer()
    observer.schedule(event_handler, str(DOWNLOADS_DIR), recursive=False)
    observer.start()
    print(f"👀 Monitoring {DOWNLOADS_DIR} for NEW downloads...")

    def _keep_alive():
        try:
            while True:
                time.sleep(5)  # Check every 5 seconds instead of 2
        except KeyboardInterrupt:
            print("🛑 Stopping download monitor...")
            observer.stop()
        observer.join()

    thread = threading.Thread(target=_keep_alive, daemon=True)
    thread.start()
    return observer

# Start monitoring
observer = start_download_monitor()


# FastAPI Application
@asynccontextmanager
async def lifespan(app: FastAPI):
    # Startup logic
    global playwright_server, web_agent
    
    playwright_server = MCPServerStdio(
        params=playwright_params, 
        client_session_timeout_seconds=60
    )
    await playwright_server.connect()
    
    # Create the web_agent with connected MCP server
    web_agent = Agent(
        name="investigator",
        instructions=web_instructions, 
        model="gpt-5-mini",
        mcp_servers=[playwright_server],
        tools=[get_credentials, wait_for_user]
    )
    
    yield  # This is where the app runs
    
    # Shutdown logic
    if playwright_server:
        await playwright_server.close()

app = FastAPI(title="File Manager Agent", lifespan=lifespan)
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],       
    allow_credentials=True,
    allow_methods=["*"],       
    allow_headers=["*"], 
)

@app.post("/chat")
async def chat_endpoint(
    message: Optional[str] = Form(None),
    uploaded_files: list[UploadFile] = File(None)
):
    if message == None:
        message = ""
    
    # Don't save files here - let run_agent_sync handle it
    metadata = load_metadata()
    
    # Prepare the input for routing
    user_input = message
    if uploaded_files and any(f.filename for f in uploaded_files if f.filename):
        filenames = [f.filename for f in uploaded_files if f.filename]
        user_input += f"\nFiles uploaded: {', '.join(filenames)}"

    # Run agent - pass the uploaded_files directly
    reply, files_to_show = run_agent_sync(user_input, uploaded_files)

    # Prepare clickable URLs
    files_info = []
    for f in files_to_show:
        meta = next((m for m in metadata if Path(m["path"]) == f), None)
        if meta:
            category = meta.get("category")
            tag = meta.get("tag")
            files_info.append({
                "name": meta["name"],
                "category": category,
                "tag": tag,
                "url": f"/download/{meta['name']}?category={category}&tag={tag}"
            })

    return JSONResponse({
        "chat_reply": reply,
        "files": files_info
    })
@app.get("/files")
def get_files():
    try:
        with open(METADATA_FILE, "r") as f:
            data = json.load(f)
        return {"files": data}
    except Exception as e:
        return JSONResponse(status_code=500, content={"error": str(e)})

@app.get("/download/{file_name}")
async def download_file(file_name: str, category: str=None, tag: str=None):
    files = fetch_files_for_api(name=file_name, category=category, tag=tag).get("files", [])
    if not files:
        return JSONResponse({"error": "File not found"}, status_code=404)
    
    file_path = Path(files[0]["path"])
    if not file_path.exists():
        return JSONResponse({"error": "File not found on disk"}, status_code=404)

    return FileResponse(path=file_path, filename=files[0]["name"])

@app.get("/", response_class=HTMLResponse)
async def home():
    return """
    <!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>File Manager Agent</title>
<style>
  body { font-family: Arial, sans-serif; padding: 20px; }
  .chat { margin-bottom: 10px; }
  .file-links { margin-top: 10px; }
  .uploaded-list { margin-top: 10px; color: green; }
</style>
</head>
<body>

<h2>🤖 File Manager Agent</h2>

<form id="chatForm">
  <div class="chat">
    <label>Message:</label><br>
    <input type="text" id="message" size="50">
  </div>
  <div class="chat">
    <label>Upload Files (optional):</label><br>
    <input type="file" id="file" multiple>
  </div>
  <button type="submit">Send</button>
</form>

<h3>Uploaded Files:</h3>
<div id="uploadedFiles" class="uploaded-list"></div>

<h3>Reply:</h3>
<div id="reply"></div>

<h3>Files:</h3>
<div id="files" class="file-links"></div>

<script>
const form = document.getElementById('chatForm');
const fileInput = document.getElementById('file');
const uploadedFilesDiv = document.getElementById('uploadedFiles');

fileInput.addEventListener('change', () => {
  uploadedFilesDiv.innerHTML = '';
  if (fileInput.files.length > 0) {
    uploadedFilesDiv.innerHTML = `<strong>${fileInput.files.length} file(s) selected:</strong><br>`;
    for (let i = 0; i < fileInput.files.length; i++) {
      uploadedFilesDiv.innerHTML += `• ${fileInput.files[i].name}<br>`;
    }
  } else {
    uploadedFilesDiv.innerHTML = 'No files selected';
  }
});

form.addEventListener('submit', async (e) => {
  e.preventDefault();

  const formData = new FormData();
  formData.append('message', document.getElementById('message').value);

  if (fileInput.files.length > 0) {
    for (let i = 0; i < fileInput.files.length; i++) {
      formData.append('uploaded_files', fileInput.files[i]);
    }
  }

  const response = await fetch('/chat', {
    method: 'POST',
    body: formData
  });

  const data = await response.json();
  document.getElementById('reply').innerText = data.chat_reply;

  const filesDiv = document.getElementById('files');
  filesDiv.innerHTML = '';
  if (data.files && data.files.length > 0) {
    data.files.forEach(f => {
      const link = document.createElement('a');
      link.href = f.url;
      link.innerText = f.name;
      link.target = '_blank';
      filesDiv.appendChild(link);
      filesDiv.appendChild(document.createElement('br'));
    });
  }

  // Clear file input, uploaded files list, and optionally text
  fileInput.value = "";
  uploadedFilesDiv.innerHTML = 'No files selected';
  document.getElementById('message').value = "";
});
</script>

</body>
</html>
    """

# Start monitoring downloads
observer = start_download_monitor()

# Main execution
async def main():
    config = uvicorn.Config(app=app, host="0.0.0.0", port=8000, log_level="info")
    server = uvicorn.Server(config)
    await server.serve()

if __name__ == "__main__":
    asyncio.run(main())

Unhandled exception in FSEventsEmitter
Traceback (most recent call last):
  File "/opt/anaconda3/envs/llms/lib/python3.11/site-packages/watchdog/observers/fsevents.py", line 307, in run
    _fsevents.add_watch(self, self.watch, self.events_callback, self.pathnames)
RuntimeError: Cannot add watch <ObservedWatch: path='/Users/vishwajithp/Downloads', is_recursive=False> - it is already scheduled
INFO:     Started server process [1807]
INFO:     Waiting for application startup.


🚀 Starting fresh download monitor for /Users/vishwajithp/Downloads
📝 Marked as processed: Discrete Mathematics Lattices - javatpoint.pdf_1119857_1728901136.3970156
📋 Marked existing file as processed: Discrete Mathematics Lattices - javatpoint.pdf
📝 Marked as processed: download (1).png_244438_1743201260.0
📋 Marked existing file as processed: download (1).png
📝 Marked as processed: anydesk.dmg_26020146_1758627988.7091336
📋 Marked existing file as processed: anydesk.dmg
📝 Marked as processed: copy_of_tb_dataset.py_38223_1743257595.653002
📋 Marked existing file as processed: copy_of_tb_dataset.py
📝 Marked as processed: confusion_matrix.png_23356_1742625937.99556
📋 Marked existing file as processed: confusion_matrix.png
📝 Marked as processed: airport1.webp_79394_1718691678.8371398
📋 Marked existing file as processed: airport1.webp
📝 Marked as processed: shel.webp_168484_1706338621.1562645
📋 Marked existing file as processed: shel.webp
📝 Marked as processed: IMG_0010.HEIC_820826_1708238383

INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit)


INFO:     127.0.0.1:63641 - "GET / HTTP/1.1" 200 OK
{
    "selected_agent": "investigator",
    "input": "open youtube and play baby by justin bieber"
}
Routing to: investigator
{
    "selected_agent": "investigator",
    "input": "open youtube and play baby by justin bieber"
}
Routing to: investigator


Error invoking MCP tool browser_navigate: Timed out while waiting for response to ClientRequest. Waited 60.0 seconds.


INFO:     127.0.0.1:63646 - "POST /chat HTTP/1.1" 500 Internal Server Error


ERROR:    Exception in ASGI application
Traceback (most recent call last):
  File "/opt/anaconda3/envs/llms/lib/python3.11/site-packages/anyio/streams/memory.py", line 111, in receive
    return self.receive_nowait()
           ^^^^^^^^^^^^^^^^^^^^^
  File "/opt/anaconda3/envs/llms/lib/python3.11/site-packages/anyio/streams/memory.py", line 106, in receive_nowait
    raise WouldBlock
anyio.WouldBlock

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/opt/anaconda3/envs/llms/lib/python3.11/site-packages/anyio/_core/_tasks.py", line 115, in fail_after
    yield cancel_scope
  File "/opt/anaconda3/envs/llms/lib/python3.11/site-packages/mcp/shared/session.py", line 272, in send_request
    response_or_error = await response_stream_reader.receive()
                        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/anaconda3/envs/llms/lib/python3.11/site-packages/anyio/streams/memory.py", line 119, in receive
    await r

INFO:     127.0.0.1:63644 - "POST /chat HTTP/1.1" 200 OK


INFO:     Shutting down
INFO:     Waiting for application shutdown.
ERROR:    Traceback (most recent call last):
  File "/opt/anaconda3/envs/llms/lib/python3.11/site-packages/starlette/routing.py", line 692, in lifespan
    async with self.lifespan_context(app) as maybe_state:
  File "/opt/anaconda3/envs/llms/lib/python3.11/contextlib.py", line 217, in __aexit__
    await anext(self.gen)
  File "/var/folders/z_/z67qkp956931m3s2bm941x8r0000gn/T/ipykernel_1807/450753137.py", line 992, in lifespan
    await playwright_server.close()
          ^^^^^^^^^^^^^^^^^^^^^^^
AttributeError: 'MCPServerStdio' object has no attribute 'close'

ERROR:    Application shutdown failed. Exiting.
INFO:     Finished server process [1807]


KeyboardInterrupt: 