In [2]:
import os
import json
import time
import hashlib
import logging
from datetime import datetime, timezone
from typing import Any, Dict, Union
import re
import sqlite3
from groq import Groq

# ----------- CONFIGURATION ------------
logging.basicConfig(
    level=logging.WARNING,  # Reduced logging verbosity
    format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
    datefmt="%Y-%m-%d %H:%M:%S",
)
logger = logging.getLogger(__name__)

GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
if not GROQ_API_KEY:
    logger.error("GROQ_API_KEY not set in environment variables.")
client = Groq(api_key=GROQ_API_KEY) if GROQ_API_KEY else None

class SharedMemory:
    def __init__(self, use_file_db=False):
        if use_file_db:
            self.conn = sqlite3.connect('shared_memory.db')
        else:
            self.conn = sqlite3.connect(':memory:')  # In-memory for clean runs
        self._init_db()
        
    def _init_db(self):
        cursor = self.conn.cursor()
        cursor.execute('''
            CREATE TABLE IF NOT EXISTS conversation_logs (
                id INTEGER PRIMARY KEY AUTOINCREMENT,
                conversation_id TEXT NOT NULL,
                timestamp TEXT NOT NULL,
                data TEXT NOT NULL
            )
        ''')
        self.conn.commit()
        
    def log(self, conversation_id: str, data: Dict[str, Any]) -> None:
        entry = {
            "timestamp": datetime.now(timezone.utc).isoformat(),
            **data
        }
        cursor = self.conn.cursor()
        cursor.execute('''
            INSERT INTO conversation_logs (conversation_id, timestamp, data)
            VALUES (?, ?, ?)
        ''', (conversation_id, entry["timestamp"], json.dumps(entry)))
        self.conn.commit()

    def get_context(self, conversation_id: str) -> list:
        cursor = self.conn.cursor()
        cursor.execute('''
            SELECT data FROM conversation_logs 
            WHERE conversation_id = ?
            ORDER BY timestamp ASC
        ''', (conversation_id,))
        return [json.loads(row[0]) for row in cursor.fetchall()]

    def get_last_extraction(self, conversation_id: str) -> dict:
        cursor = self.conn.cursor()
        cursor.execute('''
            SELECT data FROM conversation_logs 
            WHERE conversation_id = ? 
            AND data LIKE '%extracted_values%'
            ORDER BY timestamp DESC
            LIMIT 1
        ''', (conversation_id,))
        row = cursor.fetchone()
        return json.loads(row[0])["extracted_values"] if row else {}

    def clear_conversation(self, conversation_id: str) -> None:
        """Clear specific conversation history"""
        cursor = self.conn.cursor()
        cursor.execute('DELETE FROM conversation_logs WHERE conversation_id = ?', (conversation_id,))
        self.conn.commit()

# Initialize shared memory
shared_memory = SharedMemory()

# ----------- GROQ COMPLETION UTILS ------------

def groq_completion(
    prompt: str,
    system: str = "",
    model: str = "llama3-70b-8192",
    response_format: str = None,
    max_retries: int = 3,
    timeout: int = 30,
) -> Any:
    if client is None:
        logger.error("Groq client not initialized.")
        raise RuntimeError("Groq client not available.")

    if response_format == "json" and "json" not in prompt.lower():
        prompt += "\n\nPlease respond in JSON format."

    messages = ([{"role": "system", "content": system}] if system else []) + [
        {"role": "user", "content": prompt}
    ]

    for attempt in range(1, max_retries + 1):
        try:
            response = client.chat.completions.create(
                model=model,
                messages=messages,
                temperature=0.3,
                response_format={"type": "json_object"} if response_format == "json" else None,
                timeout=timeout,
            )
            content = response.choices[0].message.content
            return content
        except Exception as e:
            logger.warning(f"Groq completion failed (attempt {attempt}): {e}")
            time.sleep(2 ** attempt)
    logger.error("Groq completion failed after retries.")
    raise RuntimeError("Groq completion failed.")

# ----------- AGENTS ------------

def classify_agent(input_data: Union[str, bytes]) -> dict:
    """Classifier Agent - Determines format and intent"""
    # Check for PDF magic number
    if isinstance(input_data, bytes) and input_data.startswith(b'%PDF'):
        return {"format": "PDF", "intent": "Unknown"}
    
    # Convert bytes to string if needed
    if isinstance(input_data, bytes):
        try:
            content = input_data.decode('utf-8')[:2000]
        except UnicodeDecodeError:
            content = str(input_data)[:2000]
    else:
        content = str(input_data)[:2000]

    system_prompt = (
        "Classify input into format (PDF, JSON, Email) and intent "
        "(Invoice, RFQ, Complaint, Regulation, Other). Output JSON with keys: 'format', 'intent'."
    )
    prompt = f"Input:\n{content}\n\nAnalysis:"
    try:
        raw = groq_completion(prompt, system_prompt, response_format="json")
        result = json.loads(raw)
        return result
    except Exception as e:
        logger.error(f"Classification error: {e}")
        return {"format": "Email", "intent": "Other"}

def json_agent(payload: dict, conversation_id: str, intent: str) -> dict:
    """JSON Agent - Processes structured JSON data"""
    context = shared_memory.get_last_extraction(conversation_id)
    
    system_prompt = (
        "You are a JSON processing expert. Extract data to match the target schema. "
        f"Current intent: {intent}. Flag anomalies/errors in 'anomalies' array."
    )
    
    prompt = (
        f"Previous Context: {json.dumps(context, indent=2)[:1000]}\n\n"
        f"Target Schema:\n- Fields vary based on intent: {intent}\n\n"
        f"Input JSON Payload:\n{json.dumps(payload, indent=2)[:3000]}\n\n"
        "Output JSON with keys: 'extracted_data' and 'anomalies'."
    )

    try:
        raw = groq_completion(prompt, system_prompt, response_format="json")
        data = json.loads(raw)
        
        # Log agent activity
        shared_memory.log(conversation_id, {
            "agent": "JSON_Agent",
            "input_intent": intent,
            "output": data
        })
        
        return data
    except Exception as e:
        logger.error(f"JSON agent error: {e}")
        return {"extracted_data": {}, "anomalies": [str(e)]}

def email_agent(content: str, conversation_id: str, intent: str) -> dict:
    """Email Agent - Processes email/text content"""
    # Clean and truncate email content
    cleaned_content = re.sub(r'\s+', ' ', content)[:5000]
    context = shared_memory.get_last_extraction(conversation_id)
    
    system_prompt = (
        "You are an email processing expert. Extract key information. "
        f"Current intent: {intent}. Output JSON with: sender, intent, urgency, topics, summary."
    )
    
    prompt = (
        f"Previous Context: {json.dumps(context, indent=2)[:1000]}\n\n"
        f"Email Content:\n{cleaned_content}\n\n"
        "Output JSON with keys: sender, intent, urgency, topics, summary."
    )

    try:
        raw = groq_completion(
            prompt, 
            system_prompt, 
            model="llama3-70b-8192",
            response_format="json"
        )
        data = json.loads(raw)
        
        # Log agent activity
        shared_memory.log(conversation_id, {
            "agent": "Email_Agent",
            "input_intent": intent,
            "output": data
        })
        
        return data
    except Exception as e:
        logger.error(f"Email agent error: {e}")
        return {"error": str(e)}

def pdf_agent(pdf_bytes: bytes, conversation_id: str) -> dict:
    """PDF Agent - Extracts text from PDF and processes it"""
    try:
        from PyPDF2 import PdfReader
        from io import BytesIO
        
        reader = PdfReader(BytesIO(pdf_bytes))
        text = ""
        for page in reader.pages:
            page_text = page.extract_text() or ""
            text += page_text + "\n"
            if len(text) > 10000:  # Limit extraction
                break
        
        # Classify content from text
        classification = classify_agent(text[:2000])
        intent = classification.get("intent", "Unknown")
        
        # Route to email agent for content extraction
        return email_agent(text, conversation_id, intent)
        
    except ImportError:
        logger.error("PyPDF2 required for PDF processing")
        return {"error": "PyPDF2 not installed"}
    except Exception as e:
        logger.exception("PDF processing failed")
        return {"error": str(e)}

# ----------- MAIN ORCHESTRATOR ------------

def process_input(input_data: Any, conversation_id: str = None, clear_memory: bool = False) -> dict:
    """Main orchestrator function"""
    if not conversation_id:
        conversation_id = hashlib.md5(str(time.time()).encode()).hexdigest()[:12]
    
    if clear_memory:
        shared_memory.clear_conversation(conversation_id)
    
    try:
        # Step 1: Classify input
        classification = classify_agent(input_data)
        fmt = classification.get("format", "Email")
        intent = classification.get("intent", "Other")
        
        # Step 2: Route to appropriate agent
        if fmt == "JSON":
            try:
                payload = json.loads(input_data) if isinstance(input_data, str) else input_data
            except (TypeError, json.JSONDecodeError):
                payload = {"raw_data": str(input_data)[:1000]}
            result = json_agent(payload, conversation_id, intent)
        elif fmt == "PDF":
            if not isinstance(input_data, bytes):
                input_data = input_data.encode()
            result = pdf_agent(input_data, conversation_id)
        else:  # Email or fallback
            content = input_data.decode() if isinstance(input_data, bytes) else str(input_data)
            result = email_agent(content, conversation_id, intent)
        
        # Step 3: Log to shared memory
        shared_memory.log(conversation_id, {
            "source": "user_input",
            "type": fmt,
            "intent": intent,
            "extracted_values": result
        })
        
        # Return concise result
        return {
            "conversation_id": conversation_id,
            "classification": classification,
            "result": result
        }
        
    except Exception as e:
        logger.exception(f"Processing failed: {e}")
        return {"error": str(e)}

def get_conversation_history(conversation_id: str) -> list:
    """Get conversation history for a specific ID"""
    return shared_memory.get_context(conversation_id)

def print_result(result: dict, title: str = "RESULT"):
    """Pretty print results"""
    print(f"\n=== {title} ===")
    print(json.dumps(result, indent=2))
    print()

# ----------- EXAMPLE USAGE ------------

if __name__ == "__main__":
    # Example 1: Process Email
    test_email = """
    From: customer@example.com
    Subject: Product Complaint - Urgent

    Dear Support Team,

    I recently purchased a laptop from your store (order ID: 45321). 
    The screen started flickering within 3 days of use. This is unacceptable for a new product.

    Please resolve this issue urgently.

    Regards,
    John Doe
    """
    
    result = process_input(test_email, clear_memory=True)
    print_result(result, "EMAIL PROCESSING")
    
    # Example 2: Process JSON
    test_json = {
        "invoice_id": "INV-2024-001",
        "customer": "Tech Corp",
        "amount": 15000,
        "items": ["Laptops", "Monitors", "Keyboards"]
    }
    
    result = process_input(json.dumps(test_json), clear_memory=True)
    print_result(result, "JSON PROCESSING")
    
    # Example 3: Process PDF (if files exist)
    try:
        with open("regulation.pdf", "rb") as f:
            pdf_bytes = f.read()
        result = process_input(pdf_bytes, clear_memory=True)
        print_result(result, "PDF PROCESSING")
    except FileNotFoundError:
        print("=== PDF PROCESSING ===")
        print("No sample.pdf file found - skipping PDF test")
        print()


=== EMAIL PROCESSING ===
{
  "conversation_id": "3022f0e749f4",
  "classification": {
    "format": "Email",
    "intent": "Complaint"
  },
  "result": {
    "sender": "customer@example.com",
    "intent": "Complaint",
    "urgency": "Urgent",
    "topics": [
      "Laptop",
      "Screen Flickering",
      "Product Defect"
    ],
    "summary": "Customer is complaining about a laptop screen flickering within 3 days of purchase and wants urgent resolution."
  }
}


=== JSON PROCESSING ===
{
  "conversation_id": "bba78d1def59",
  "classification": {
    "format": "JSON",
    "intent": "Invoice"
  },
  "result": {
    "extracted_data": {
      "invoice_id": "INV-2024-001",
      "customer": "Tech Corp",
      "amount": 15000,
      "items": [
        "Laptops",
        "Monitors",
        "Keyboards"
      ]
    },
    "anomalies": []
  }
}


=== PDF PROCESSING ===
{
  "conversation_id": "830092628fb1",
  "classification": {
    "format": "PDF",
    "intent": "Unknown"
  },
  "result": 