In [None]:
# Cell 1: Core Imports and Base Classes (updated with timeouts)
import os
import pickle
import time
import json
import requests
import numpy as np
from collections import defaultdict
from dataclasses import dataclass
from typing import List, Optional, Dict, Tuple
from concurrent.futures import ThreadPoolExecutor, TimeoutError as FutureTimeoutError
import logging
from functools import lru_cache

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

@dataclass
class Response:
    """Enhanced response structure with timeout tracking"""
    text: str
    timestamp: float
    error: bool = False
    processing_time: float = 0.0
    error_details: str = ""
    timeout: bool = False
    critique: Optional[str] = None
    refinement: Optional[str] = None
    iterations: int = 0

class OllamaClient:
    """Robust Ollama client with configurable timeouts"""
    def __init__(self, model_name: str = "llama2:13b", base_url: str = "http://localhost:11434"):
        self.model_name = model_name
        self.base_url = base_url
        self.max_retries = 5  # Increased from 3
        self.request_timeout = 300  # 5 minutes
        self._verify_model()

    def _parse_json_safe(self, text: str):
        """Enhanced JSON parsing with fallback"""
        clean_text = text.strip()
        if not clean_text:
            return {"error": "Empty response"}
            
        try:
            return json.loads(clean_text)
        except json.JSONDecodeError:
            # Try to find JSON substring
            try:
                start = clean_text.find('{')
                end = clean_text.rfind('}') + 1
                return json.loads(clean_text[start:end])
            except:
                return {"error": f"Invalid JSON format: {clean_text[:200]}..."}
        except Exception as e:
            return {"error": str(e)}

    def _verify_model(self):
        """Model verification with status checks"""
        for attempt in range(self.max_retries):
            try:
                resp = requests.get(f"{self.base_url}/api/tags", timeout=10)
                if resp.status_code == 200:
                    data = self._parse_json_safe(resp.text)
                    models = [m['name'] for m in data.get('models', [])]
                    if any(self.model_name in m for m in models):
                        return
                    self._pull_model()
                    return
                logger.warning(f"Model check failed (status {resp.status_code})")
            except Exception as e:
                logger.warning(f"Model check attempt {attempt+1} failed: {e}")
                time.sleep(2 ** attempt)  # Exponential backoff
        raise ConnectionError(f"Couldn't connect to Ollama after {self.max_retries} attempts")

    def _pull_model(self):
        """Model pulling with progress tracking"""
        try:
            resp = requests.post(
                f"{self.base_url}/api/pull",
                json={"name": self.model_name},
                stream=True,
                timeout=600
            )
            for line in resp.iter_lines():
                if line:
                    try:
                        status = self._parse_json_safe(line).get('status', '')
                        logger.info(f"Pull progress: {status}")
                    except:
                        continue
        except Exception as e:
            logger.error(f"Model pull failed: {e}")
            raise

    def generate(self, prompt: str) -> Tuple[str, bool]:
        """Generation with configurable timeout and retries"""
        for attempt in range(self.max_retries):
            try:
                with ThreadPoolExecutor() as executor:
                    future = executor.submit(
                        requests.post,
                        f"{self.base_url}/api/generate",
                        json={
                            "model": self.model_name,
                            "prompt": prompt[:4000],  # Input safety
                            "stream": False,
                            "options": {"temperature": 0.5}
                        },
                        timeout=self.request_timeout
                    )
                    resp = future.result(timeout=self.request_timeout)
                    data = self._parse_json_safe(resp.text)
                    return data.get("response", ""), False
            except FutureTimeoutError:
                logger.warning(f"Generation timed out (attempt {attempt+1})")
                return f"Error: Timeout after {self.request_timeout}s", True
            except Exception as e:
                logger.warning(f"Attempt {attempt+1} failed: {e}")
                time.sleep(1)
        return f"Error: Failed after {self.max_retries} attempts", True

# Cell 2: Enhanced Base Agent Framework
class BaseAgent:
    """Timeout-aware base agent"""
    def __init__(self, client: OllamaClient):
        self.client = client
        self.retry_count = 3
        self.max_wait = 300  # 5 minutes
        
    def safe_generate(self, prompt: str) -> Response:
        """Generation with time budget tracking"""
        start_time = time.time()
        error_state = False
        timeout_occurred = False
        
        if not isinstance(prompt, str) or len(prompt.strip()) == 0:
            return Response(
                text="Error: Invalid input prompt",
                timestamp=start_time,
                error=True,
                error_details="Empty or non-string prompt",
                processing_time=0.0
            )
            
        for attempt in range(self.retry_count):
            try:
                with ThreadPoolExecutor() as executor:
                    future = executor.submit(self.client.generate, prompt)
                    text, error = future.result(timeout=self.max_wait)
                    
                    return Response(
                        text=text,
                        timestamp=start_time,
                        error=error,
                        processing_time=time.time() - start_time,
                        error_details=text if error else "",
                        timeout=timeout_occurred
                    )
            except FutureTimeoutError:
                logger.error(f"Generation timed out after {self.max_wait}s")
                timeout_occurred = True
            except Exception as e:
                error_msg = str(e)
                logger.error(f"Generation error: {e}")
                
        return Response(
            text=f"Final error: {error_msg}" if 'error_msg' in locals() else "Unknown error",
            timestamp=start_time,
            error=True,
            error_details=error_msg if 'error_msg' in locals() else "",
            processing_time=time.time() - start_time,
            timeout=timeout_occurred
        )

# Cell 3: Specialized Cover Letter Agents
class CoverLetterContextAgent(BaseAgent):
    """Job-specific context analyzer"""
    def enrich_context(self, job_desc: str, resume_data: str) -> Tuple[str, float]:
        safe_job_desc = job_desc[:2000] if job_desc else "No job description provided"
        safe_resume = resume_data[:2000] if resume_data else "No resume data provided"
        
        prompt = f"""Analyze job requirements and resume:
        Job Description: {safe_job_desc}
        Applicant Background: {safe_resume}
        Extract Key Matching Points:"""
        
        response = self.safe_generate(prompt)
        return response.text, response.processing_time

class CoverLetterAnalyzer(BaseAgent):
    """Cover letter quality control"""
    def analyze_letter(self, letter: str, job_desc: str) -> Dict[str, str]:
        prompt = f"""Evaluate cover letter completeness:
        Job Posting: {job_desc[:2000] if job_desc else 'Generic position'}
        Letter Content: {letter[:3000] if letter else 'Empty draft'}
        Output JSON with:
        - missing_skills: List[str]
        - score: float (0-1)
        - strength_areas: List[str]
        - improvement_suggestions: List[str]"""
        
        response = self.safe_generate(prompt)
        return self.client._parse_json_safe(response.text)

class ATSOptimizer(BaseAgent):
    """ATS compliance checker"""
    def check_ats(self, text: str) -> Dict[str, str]:
        prompt = f"""Optimize document for Applicant Tracking Systems:
        {text[:3000] if text else 'Empty document'}
        Check for:
        - Keyword matching
        - Section headers (Contact, Introduction, Skills, Experience, Closing)
        - Readability metrics
        - File format considerations
        Output JSON with:
        - compliance_score: float (0-1)
        - missing_keywords: List[str]
        - formatting_issues: List[str]
        - optimization_tips: List[str]"""
        
        response = self.safe_generate(prompt)
        return self.client._parse_json_safe(response.text)

# Cell 4: Robust Cover Letter System
class CoverLetterSystem:
    """End-to-end cover letter optimization system"""
    def __init__(self):
        self.client = OllamaClient(model_name="llama2:13b")
        self.agents = {
            'context': CoverLetterContextAgent(self.client),
            'analyzer': CoverLetterAnalyzer(self.client),
            'ats': ATSOptimizer(self.client)
        }
        self.history = []
        self.metrics = defaultdict(lambda: {'count': 0, 'errors': 0, 'timeouts': 0})
        
    def improve_letter(self, resume: str, job_desc: str, draft: str = "") -> Dict:
        """Main optimization pipeline with fallback handling"""
        result = {
            'draft': '',
            'analysis': {},
            'ats_check': {},
            'errors': [],
            'warnings': [],
            'timings': {}
        }
        
        # Input validation
        valid_resume = str(resume)[:5000] if resume else ""
        valid_job_desc = str(job_desc)[:5000] if job_desc else ""
        valid_draft = str(draft)[:10000] if draft else ""
        
        # Context analysis
        ctx_start = time.time()
        try:
            ctx_response = self.agents['context'].enrich_context(valid_job_desc, valid_resume)
            result['analysis']['context'] = ctx_response[0]
            result['timings']['context'] = ctx_response[1]
        except Exception as e:
            result['errors'].append(f"Context analysis failed: {str(e)}")
        
        # Draft generation/improvement
        draft_start = time.time()
        try:
            if not valid_draft:
                valid_draft = self._generate_initial_draft(
                    result.get('analysis', {}).get('context', ""),
                    valid_resume
                )
            result['draft'] = valid_draft
        except Exception as e:
            result['errors'].append(f"Draft generation failed: {str(e)}")
        result['timings']['draft'] = time.time() - draft_start
        
        # Content analysis
        analysis_start = time.time()
        try:
            analysis = self.agents['analyzer'].analyze_letter(valid_draft, valid_job_desc)
            if 'error' in analysis:
                result['errors'].append(f"Analysis failed: {analysis.get('error')}")
            else:
                result['analysis'] = analysis
        except Exception as e:
            result['errors'].append(f"Analysis crashed: {str(e)}")
        result['timings']['analysis'] = time.time() - analysis_start
        
        # ATS check
        ats_start = time.time()
        try:
            ats_check = self.agents['ats'].check_ats(valid_draft)
            if 'error' in ats_check:
                result['warnings'].append(f"ATS check incomplete: {ats_check.get('error')}")
            else:
                result['ats_check'] = ats_check
        except Exception as e:
            result['warnings'].append(f"ATS check failed: {str(e)}")
        result['timings']['ats'] = time.time() - ats_start
        
        # Final compilation
        result['timings']['total'] = time.time() - ctx_start
        return self._compile_final_result(result)
        
    def _generate_initial_draft(self, context: str, resume: str) -> str:
        """Fallback-aware draft generation"""
        prompt = f"""Generate cover letter draft:
        Job Requirements: {context[:2000]}
        Applicant Background: {resume[:3000]}
        Requirements:
        - 3-4 professional paragraphs
        - Specific role references
        - Quantified achievements
        - Proper business letter format
        
        Cover Letter:"""
        
        response = self.agents['context'].safe_generate(prompt)
        if response.error or not response.text.strip():
            return "Could not generate initial draft due to errors"
        return response.text
        
    def _compile_final_result(self, raw_data: dict) -> dict:
        """Ensure consistent output structure"""
        return {
            'draft': raw_data.get('draft', ''),
            'analysis': raw_data.get('analysis', {}),
            'ats_check': raw_data.get('ats_check', {}),
            'errors': raw_data.get('errors', []),
            'warnings': raw_data.get('warnings', []),
            'metrics': {
                'processing_time': raw_data['timings'].get('total', 0),
                'steps': {k: v for k, v in raw_data['timings'].items() if k != 'total'}
            }
        }

# Cell 5: Robust Testing Harness
def stress_test():
    """Comprehensive test with timeout protection"""
    system = CoverLetterSystem()
    test_resume = (
        "Experienced software engineer with 5+ years in Python development. "
        "Led team of 8 developers to deliver 3 major fintech applications. "
        "AWS Certified Developer, expertise in cloud architecture."
    )
    
    test_cases = [
        ("Senior Python Developer position at FinTech startup requiring cloud experience", test_resume, ""),
        ("", test_resume, "Existing draft content..."),  # Empty job description
        ("Machine Learning Engineer role at AI research lab", "", "Sample draft..."),  # Empty resume
        ("X"*10000, "Short resume", "Y"*5000),  # Long inputs
        (12345, {"invalid": "resume"}, 54321),  # Wrong data types
        ("Normal valid job description", test_resume, "Good draft text...")
    ]
    
    for idx, (job_desc, resume, draft) in enumerate(test_cases):
        print(f"\n=== Test Case {idx+1} ===")
        start_time = time.time()
        try:
            with ThreadPoolExecutor() as executor:
                future = executor.submit(system.improve_letter, resume, job_desc, draft)
                result = future.result(timeout=300)  # 5 minute timeout per test
                
            print(f"Status: {'Success' if not result['errors'] else 'Partial success'}")
            print(f"Time: {result['metrics']['processing_time']:.2f}s")
            
            if result['draft']:
                print(f"Draft length: {len(result['draft'])} chars")
                print("First 200 chars:", result['draft'][:200])
                
            if result['errors']:
                print("Errors:", result['errors'][:3])
                
            if result['warnings']:
                print("Warnings:", result['warnings'][:3])
                
        except FutureTimeoutError:
            print(f"Test {idx+1} timed out after 5 minutes")
        except Exception as e:
            print(f"Test {idx+1} crashed: {str(e)}")
            
        print(f"Completed in {time.time()-start_time:.2f}s")

if __name__ == "__main__":
    for i in range(3):  # Triple validation run
        print(f"\n===== Validation Run {i+1} =====")
        stress_test()
        print(f"===== Run {i+1} Complete =====\n")

# Usage Example with Error-Proof Access
system = CoverLetterSystem()
result = system.improve_letter(
    resume="Your resume text...",
    job_desc="Job posting content...",
    draft="Existing draft (optional)..."
)

# Safe output access
final_draft = result.get('draft', 'No draft generated')
analysis = result.get('analysis', {})
ats = result.get('ats_check', {})

print("\nFinal Draft Preview:")
print(final_draft[:500] + "..." if final_draft else "No draft available")

print("\nKey Improvements:")
print("- Missing skills:", analysis.get('missing_skills', []))
print("- ATS score:", ats.get('compliance_score', 0))
print("- Top suggestion:", analysis.get('improvement_suggestions', ['None'])[0])

print("\nProcessing Time:", result['metrics']['processing_time'])



{
  "steps": [],
  "errors": [],
  "processing_time": 100.94699907302856,
  "metrics": {}
}


IndexError: list index out of range