In [1]:
import pdfplumber

with pdfplumber.open("resume.pdf") as pdf:
    text = ""
    for page in pdf.pages:
        text += page.extract_text() + "\n"

In [8]:
"""
Resume Parser using AWS Bedrock Claude with Pydantic Validation
"""

import json
import boto3
from pydantic import BaseModel, Field, EmailStr, ValidationError
from typing import List, Optional, Literal
from datetime import date

# =============================================================================
# PYDANTIC MODELS FOR RESUME STRUCTURE
# =============================================================================

class Education(BaseModel):
    """Education entry in resume"""
    degree: str = Field(min_length=1, description="Degree or certification name")
    institution: str = Field(min_length=1, description="School/University name")
    field_of_study: Optional[str] = None
    graduation_year: Optional[int] = Field(None, ge=1950, le=2030)
    gpa: Optional[float] = Field(None, ge=0.0, le=4.0)
    location: Optional[str] = None

class WorkExperience(BaseModel):
    """Work experience entry"""
    job_title: str = Field(min_length=1, description="Job title/position")
    company: str = Field(min_length=1, description="Company name")
    location: Optional[str] = None
    start_date: Optional[str] = Field(None, description="Start date (e.g., 'Jan 2020' or '2020-01')")
    end_date: Optional[str] = Field(None, description="End date or 'Present'")
    duration: Optional[str] = None
    responsibilities: List[str] = Field(default_factory=list, description="Key responsibilities and achievements")
    
class Skill(BaseModel):
    """Skill with optional proficiency level"""
    name: str = Field(min_length=1)
    category: Optional[Literal["technical", "soft", "language", "tool", "framework", "other"]] = None
    proficiency: Optional[Literal["beginner", "intermediate", "advanced", "expert"]] = None

class Certification(BaseModel):
    """Professional certification"""
    name: str = Field(min_length=1)
    issuing_organization: Optional[str] = None
    issue_date: Optional[str] = None
    expiry_date: Optional[str] = None
    credential_id: Optional[str] = None

class Project(BaseModel):
    """Project or portfolio item"""
    title: str = Field(min_length=1)
    description: str = Field(min_length=1)
    technologies: List[str] = Field(default_factory=list)
    url: Optional[str] = None
    date: Optional[str] = None

class ParsedResume(BaseModel):
    """Complete structured resume data"""
    # Personal Information
    full_name: str = Field(min_length=1, description="Candidate's full name")
    email: Optional[EmailStr] = None
    phone: Optional[str] = None
    location: Optional[str] = Field(None, description="City, State/Country")
    linkedin_url: Optional[str] = None
    github_url: Optional[str] = None
    portfolio_url: Optional[str] = None
    
    # Professional Summary
    summary: Optional[str] = Field(None, description="Professional summary or objective")
    
    # Experience and Education
    work_experience: List[WorkExperience] = Field(default_factory=list)
    education: List[Education] = Field(default_factory=list)
    
    # Skills and Certifications
    skills: List[Skill] = Field(default_factory=list)
    certifications: List[Certification] = Field(default_factory=list)
    
    # Additional
    projects: List[Project] = Field(default_factory=list)
    languages: List[str] = Field(default_factory=list, description="Spoken languages")
    
    # Metadata
    years_of_experience: Optional[int] = Field(None, ge=0, le=50)
    current_job_title: Optional[str] = None


# =============================================================================
# AWS BEDROCK CLAUDE CLIENT
# =============================================================================

class BedrockResumeParser:
    """Resume parser using AWS Bedrock Claude with Pydantic validation"""
    
    def __init__(self, region_name: str = "eu-west-2", model_id: str = "anthropic.claude-3-7-sonnet-20250219-v1:0"):
        """
        Initialize Bedrock client
        
        Args:
            region_name: AWS region where Bedrock is available
            model_id: Bedrock model ID to use
        """
        self.client = boto3.client(
            service_name='bedrock-runtime',
            region_name=region_name
        )
        self.model_id = model_id
    
    def _create_prompt(self, resume_text: str) -> str:
        """
        Create structured prompt with JSON schema
        
        Args:
            resume_text: Raw resume text
            
        Returns:
            Formatted prompt with schema
        """
        schema = json.dumps(ParsedResume.model_json_schema(), indent=2)
        
        prompt = f"""Extract information from this resume and return as JSON.

IMPORTANT:
1. Be CONCISE - use short descriptions and summaries
2. For responsibilities, extract only key points (max 3-5 per job)
3. Combine similar skills into categories
4. Return ONLY valid JSON - no markdown, no preamble
5. Ensure the JSON is complete and properly closed

JSON SCHEMA:
{schema}

RESUME TEXT:
{resume_text}

Return the complete JSON object:"""
        
        return prompt
    
    def parse_resume(self, resume_text: str, max_retries: int = 2) -> tuple[Optional[ParsedResume], Optional[str]]:
        """
        Parse resume text using Claude and validate with Pydantic
        
        Args:
            resume_text: Raw resume text to parse
            max_retries: Number of retry attempts on validation failure
            
        Returns:
            Tuple of (ParsedResume object, error_message)
            - On success: (ParsedResume, None)
            - On failure: (None, error_message)
        """
        prompt = self._create_prompt(resume_text)
        
        for attempt in range(max_retries + 1):
            try:
                # Call Claude via Bedrock
                print(f"üîÑ Calling AWS Bedrock Claude (attempt {attempt + 1}/{max_retries + 1})...")
                
                response = self.client.invoke_model(
                    modelId=self.model_id,
                    contentType="application/json",
                    accept="application/json",
                    body=json.dumps({
                        "anthropic_version": "bedrock-2023-05-31",
                        "max_tokens": 8192,  # Increased for longer resumes
                        "temperature": 0.0,  # Deterministic for structured output
                        "messages": [
                            {
                                "role": "user",
                                "content": prompt
                            }
                        ]
                    })
                )
                
                # Parse response
                response_body = json.loads(response['body'].read())
                llm_output = response_body['content'][0]['text']
                
                print(f"‚úÖ Received response from Claude")
                print(f"üìù Raw output length: {len(llm_output)} characters")
                
                # Clean potential markdown code blocks
                llm_output = llm_output.strip()
                if llm_output.startswith("```json"):
                    llm_output = llm_output[7:]
                if llm_output.startswith("```"):
                    llm_output = llm_output[3:]
                if llm_output.endswith("```"):
                    llm_output = llm_output[:-3]
                llm_output = llm_output.strip()
                
                # Check if JSON is complete (basic validation)
                if not llm_output.endswith('}'):
                    print(f"‚ö†Ô∏è  Warning: JSON appears truncated (doesn't end with '}}')") 
                    print(f"   Last 100 chars: ...{llm_output[-100:]}")
                    raise ValueError("JSON output appears incomplete/truncated")
                
                # Try to parse as JSON first to check validity
                try:
                    json.loads(llm_output)
                except json.JSONDecodeError as je:
                    print(f"‚ö†Ô∏è  JSON decode error: {je}")
                    print(f"   Error at position {je.pos}")
                    if je.pos > 100:
                        print(f"   Context: ...{llm_output[je.pos-50:je.pos+50]}...")
                    raise
                
                # Validate with Pydantic
                print(f"üîç Validating output with Pydantic...")
                parsed_resume = ParsedResume.model_validate_json(llm_output)
                
                print(f"‚úÖ Validation successful!")
                print(f"üìä Extracted: {parsed_resume.full_name}")
                print(f"   - Education entries: {len(parsed_resume.education)}")
                print(f"   - Work experiences: {len(parsed_resume.work_experience)}")
                print(f"   - Skills: {len(parsed_resume.skills)}")
                print(f"   - Certifications: {len(parsed_resume.certifications)}")
                
                return parsed_resume, None
                
            except ValidationError as e:
                error_msg = f"Validation failed on attempt {attempt + 1}"
                print(f"‚ùå {error_msg}")
                for error in e.errors():
                    print(f"   - {error['loc']}: {error['msg']}")
                
                if attempt < max_retries:
                    print(f"üîÑ Retrying with error feedback...")
                    # Add error feedback to prompt for next attempt
                    prompt += f"\n\nPREVIOUS ATTEMPT FAILED WITH ERRORS:\n{str(e)}\n\nPlease fix these issues and return valid JSON:"
                else:
                    return None, f"Validation failed after {max_retries + 1} attempts: {str(e)}"
                    
            except Exception as e:
                error_msg = f"Error calling Bedrock or parsing response: {str(e)}"
                print(f"‚ùå {error_msg}")
                if attempt < max_retries:
                    print(f"üîÑ Retrying...")
                else:
                    return None, error_msg
        
        return None, "Max retries exceeded"


# =============================================================================
# EXAMPLE USAGE
# =============================================================================

def main():
    """Example usage of the resume parser"""
    sample_resume = text
    # Sample resume text
    # sample_resume = """
    # John Doe
    # Email: john.doe@email.com | Phone: (555) 123-4567
    # Location: San Francisco, CA | LinkedIn: linkedin.com/in/johndoe
    
    # PROFESSIONAL SUMMARY
    # Senior Software Engineer with 8+ years of experience in full-stack development, 
    # cloud architecture, and team leadership. Proven track record of delivering scalable 
    # solutions and mentoring junior developers.
    
    # WORK EXPERIENCE
    
    # Senior Software Engineer | Tech Corp Inc. | San Francisco, CA
    # January 2020 - Present
    # - Led development of microservices architecture serving 5M+ daily users
    # - Reduced API response time by 40% through optimization and caching strategies
    # - Mentored team of 5 junior engineers and conducted code reviews
    # - Implemented CI/CD pipeline reducing deployment time from 2 hours to 15 minutes
    
    # Software Engineer | StartUp XYZ | San Francisco, CA
    # June 2017 - December 2019
    # - Developed RESTful APIs using Python Flask and PostgreSQL
    # - Built responsive frontend using React and Redux
    # - Collaborated with product team on feature specifications
    
    # Junior Developer | Web Solutions Ltd. | Austin, TX
    # July 2015 - May 2017
    # - Maintained legacy PHP applications and migrated to modern framework
    # - Fixed bugs and implemented new features based on client requirements
    
    # EDUCATION
    
    # Bachelor of Science in Computer Science
    # University of California, Berkeley | 2011 - 2015
    # GPA: 3.8/4.0
    
    # SKILLS
    
    # Programming Languages: Python, JavaScript, TypeScript, Java, SQL
    # Frameworks: React, Node.js, Django, Flask, Spring Boot
    # Cloud & DevOps: AWS (EC2, S3, Lambda), Docker, Kubernetes, Jenkins
    # Databases: PostgreSQL, MongoDB, Redis
    # Other: Git, Agile/Scrum, System Design, Mentoring
    
    # CERTIFICATIONS
    # - AWS Certified Solutions Architect - Associate (2022)
    # - Certified Scrum Master (CSM) (2021)
    
    # PROJECTS
    # - Open Source Contributor to React ecosystem (2020-Present)
    # - Built personal portfolio website using Next.js and deployed on Vercel
    # """
    
    # Initialize parser with your region and model
    parser = BedrockResumeParser(
        region_name="eu-west-2",
        model_id="anthropic.claude-3-7-sonnet-20250219-v1:0"
    )
    
    # Parse resume
    print("=" * 80)
    print("PARSING RESUME")
    print("=" * 80)
    
    parsed_resume, error = parser.parse_resume(sample_resume)
    
    if parsed_resume:
        # Get JSON output
        json_output = parsed_resume.model_dump_json(indent=2)
        
        print("\n" + "=" * 80)
        print("PARSED RESUME (JSON)")
        print("=" * 80)
        # Print in chunks to avoid truncation
        chunk_size = 1000
        for i in range(0, len(json_output), chunk_size):
            print(json_output[i:i+chunk_size])
        
        # Access structured data
        print("\n" + "=" * 80)
        print("STRUCTURED DATA ACCESS")
        print("=" * 80)
        print(f"Name: {parsed_resume.full_name}")
        print(f"Email: {parsed_resume.email}")
        print(f"Phone: {parsed_resume.phone}")
        print(f"Location: {parsed_resume.location}")
        print(f"Years of Experience: {parsed_resume.years_of_experience}")
        
        print(f"\nEducation ({len(parsed_resume.education)} entries):")
        for edu in parsed_resume.education:
            print(f"  - {edu.degree} in {edu.field_of_study or 'N/A'}")
            print(f"    {edu.institution} ({edu.graduation_year or 'N/A'})")
        
        print(f"\nWork Experience ({len(parsed_resume.work_experience)} entries):")
        for exp in parsed_resume.work_experience:
            print(f"  - {exp.job_title} at {exp.company}")
            print(f"    Duration: {exp.start_date} to {exp.end_date}")
            print(f"    Responsibilities: {len(exp.responsibilities)} items")
        
        print(f"\nSkills ({len(parsed_resume.skills)} total):")
        for skill in parsed_resume.skills[:10]:  # Show first 10
            category = f" ({skill.category})" if skill.category else ""
            print(f"  - {skill.name}{category}")
        if len(parsed_resume.skills) > 10:
            print(f"  ... and {len(parsed_resume.skills) - 10} more")
        
        print(f"\nCertifications ({len(parsed_resume.certifications)} total):")
        for cert in parsed_resume.certifications:
            org = f" - {cert.issuing_organization}" if cert.issuing_organization else ""
            date = f" ({cert.issue_date})" if cert.issue_date else ""
            print(f"  - {cert.name}{org}{date}")
        
    else:
        print(f"\n‚ùå Parsing failed: {error}")


if __name__ == "__main__":
    main()

PARSING RESUME
üîÑ Calling AWS Bedrock Claude (attempt 1/3)...
‚úÖ Received response from Claude
üìù Raw output length: 4300 characters
üîç Validating output with Pydantic...
‚úÖ Validation successful!
üìä Extracted: Hargurjeet Singh Ganger
   - Education entries: 3
   - Work experiences: 3
   - Skills: 8
   - Certifications: 1

PARSED RESUME (JSON)
{
  "full_name": "Hargurjeet Singh Ganger",
  "email": "gurjeet333@gmail.com",
  "phone": "+91 9035828125",
  "location": "Bangalore, India",
  "linkedin_url": "linkedin.com/in/hargurjeet/",
  "github_url": "github.com/hargurjeet",
  "portfolio_url": "gurjeet333.medium.com",
  "summary": "Experienced IT professional with 15+ years in the industry, specializing in data science, statistical analysis, machine learning and Generative AI. Expert in LLMs, AI model development. Proficient in Python, SQL, and cloud platforms like AWS and GCP.",
  "work_experience": [
    {
      "job_title": "Senior Data Scientist",
      "company": "British Te

In [13]:
"""
Resume Parser using AWS Bedrock Claude with Pydantic Validation
Supports both PDF files and plain text input
"""

import json
import boto3
import pdfplumber
from pathlib import Path
from pydantic import BaseModel, Field, EmailStr, ValidationError
from typing import List, Optional, Literal, Union
from datetime import date

# =============================================================================
# PYDANTIC MODELS FOR RESUME STRUCTURE
# =============================================================================

class Education(BaseModel):
    """Education entry in resume"""
    degree: str = Field(min_length=1, description="Degree or certification name")
    institution: str = Field(min_length=1, description="School/University name")
    field_of_study: Optional[str] = None
    graduation_year: Optional[int] = Field(None, ge=1950, le=2030)
    gpa: Optional[float] = Field(None, ge=0.0, le=4.0)
    location: Optional[str] = None

class WorkExperience(BaseModel):
    """Work experience entry"""
    job_title: str = Field(min_length=1, description="Job title/position")
    company: str = Field(min_length=1, description="Company name")
    location: Optional[str] = None
    start_date: Optional[str] = Field(None, description="Start date (e.g., 'Jan 2020' or '2020-01')")
    end_date: Optional[str] = Field(None, description="End date or 'Present'")
    duration: Optional[str] = None
    responsibilities: List[str] = Field(default_factory=list, description="Key responsibilities and achievements")
    
class Skill(BaseModel):
    """Skill with optional proficiency level"""
    name: str = Field(min_length=1)
    category: Optional[Literal["technical", "soft", "language", "tool", "framework", "other"]] = None
    proficiency: Optional[Literal["beginner", "intermediate", "advanced", "expert"]] = None

class Certification(BaseModel):
    """Professional certification"""
    name: str = Field(min_length=1)
    issuing_organization: Optional[str] = None
    issue_date: Optional[str] = None
    expiry_date: Optional[str] = None
    credential_id: Optional[str] = None

class Project(BaseModel):
    """Project or portfolio item"""
    title: str = Field(min_length=1)
    description: str = Field(min_length=1)
    technologies: List[str] = Field(default_factory=list)
    url: Optional[str] = None
    date: Optional[str] = None

class ParsedResume(BaseModel):
    """Complete structured resume data"""
    # Personal Information
    full_name: str = Field(min_length=1, description="Candidate's full name")
    email: Optional[EmailStr] = None
    phone: Optional[str] = None
    location: Optional[str] = Field(None, description="City, State/Country")
    linkedin_url: Optional[str] = None
    github_url: Optional[str] = None
    portfolio_url: Optional[str] = None
    
    # Professional Summary
    summary: Optional[str] = Field(None, description="Professional summary or objective")
    
    # Experience and Education
    work_experience: List[WorkExperience] = Field(default_factory=list)
    education: List[Education] = Field(default_factory=list)
    
    # Skills and Certifications
    skills: List[Skill] = Field(default_factory=list)
    certifications: List[Certification] = Field(default_factory=list)
    
    # Additional
    projects: List[Project] = Field(default_factory=list)
    languages: List[str] = Field(default_factory=list, description="Spoken languages")
    
    # Metadata
    years_of_experience: Optional[int] = Field(None, ge=0, le=50)
    current_job_title: Optional[str] = None


# =============================================================================
# AWS BEDROCK CLAUDE CLIENT
# =============================================================================

class BedrockResumeParser:
    """Resume parser using AWS Bedrock Claude with Pydantic validation"""
    
    def __init__(self, region_name: str = "eu-west-2", model_id: str = "anthropic.claude-3-7-sonnet-20250219-v1:0"):
        """
        Initialize Bedrock client
        
        Args:
            region_name: AWS region where Bedrock is available
            model_id: Bedrock model ID to use
        """
        self.client = boto3.client(
            service_name='bedrock-runtime',
            region_name=region_name
        )
        self.model_id = model_id
    
    def extract_text_from_pdf(self, pdf_path: Union[str, Path]) -> str:
        """
        Extract text from PDF file using pdfplumber
        
        Args:
            pdf_path: Path to PDF file
            
        Returns:
            Extracted text from all pages
        """
        print(f"üìÑ Extracting text from PDF: {pdf_path}")
        
        try:
            text = ""
            with pdfplumber.open(pdf_path) as pdf:
                print(f"   Total pages: {len(pdf.pages)}")
                for i, page in enumerate(pdf.pages, 1):
                    page_text = page.extract_text()
                    if page_text:
                        text += page_text + "\n"
                        print(f"   ‚úì Extracted page {i}")
                    else:
                        print(f"   ‚ö† Page {i} has no extractable text")
            
            print(f"‚úÖ Extracted {len(text)} characters from PDF")
            return text.strip()
            
        except Exception as e:
            print(f"‚ùå Error extracting text from PDF: {e}")
            raise
    
    def _create_prompt(self, resume_text: str) -> str:
        """
        Create structured prompt with JSON schema
        
        Args:
            resume_text: Raw resume text
            
        Returns:
            Formatted prompt with schema
        """
        schema = json.dumps(ParsedResume.model_json_schema(), indent=2)
        
        prompt = f"""Extract information from this resume and return as JSON.

IMPORTANT:
1. Be CONCISE - use short descriptions and summaries
2. For responsibilities, extract only key points (max 3-5 per job)
3. Combine similar skills into categories
4. Return ONLY valid JSON - no markdown, no preamble
5. Ensure the JSON is complete and properly closed

JSON SCHEMA:
{schema}

RESUME TEXT:
{resume_text}

Return the complete JSON object:"""
        
        return prompt
    
    def parse_resume(self, pdf_path: Union[str, Path], max_retries: int = 2) -> tuple[Optional[ParsedResume], Optional[str]]:
        """
        Parse resume from PDF file only
        
        Args:
            pdf_path: Path to PDF file
            max_retries: Number of retry attempts on validation failure
            
        Returns:
            Tuple of (ParsedResume object, error_message)
            - On success: (ParsedResume, None)
            - On failure: (None, error_message)
        """
        # Validate input is a PDF file
        path = Path(pdf_path)
        
        if not path.exists():
            return None, f"File not found: {pdf_path}"
        
        if not path.is_file():
            return None, f"Path is not a file: {pdf_path}"
        
        if path.suffix.lower() != '.pdf':
            return None, f"Only PDF files are supported. Got: {path.suffix}"
        
        # Extract text from PDF
        try:
            resume_text = self.extract_text_from_pdf(path)
        except Exception as e:
            return None, f"Failed to extract text from PDF: {str(e)}"
        
        if not resume_text or len(resume_text.strip()) < 10:
            return None, "Extracted text is empty or too short. PDF may be image-based or corrupted."
        
        # Now parse the extracted text
        prompt = self._create_prompt(resume_text)
        
        for attempt in range(max_retries + 1):
            try:
                # Call Claude via Bedrock
                print(f"üîÑ Calling AWS Bedrock Claude (attempt {attempt + 1}/{max_retries + 1})...")
                
                response = self.client.invoke_model(
                    modelId=self.model_id,
                    contentType="application/json",
                    accept="application/json",
                    body=json.dumps({
                        "anthropic_version": "bedrock-2023-05-31",
                        "max_tokens": 8192,  # Increased for longer resumes
                        "temperature": 0.0,  # Deterministic for structured output
                        "messages": [
                            {
                                "role": "user",
                                "content": prompt
                            }
                        ]
                    })
                )
                
                # Parse response
                response_body = json.loads(response['body'].read())
                llm_output = response_body['content'][0]['text']
                
                print(f"‚úÖ Received response from Claude")
                print(f"üìù Raw output length: {len(llm_output)} characters")
                
                # Clean potential markdown code blocks
                llm_output = llm_output.strip()
                if llm_output.startswith("```json"):
                    llm_output = llm_output[7:]
                if llm_output.startswith("```"):
                    llm_output = llm_output[3:]
                if llm_output.endswith("```"):
                    llm_output = llm_output[:-3]
                llm_output = llm_output.strip()
                
                # Check if JSON is complete (basic validation)
                if not llm_output.endswith('}'):
                    print(f"‚ö†Ô∏è  Warning: JSON appears truncated (doesn't end with '}}')") 
                    print(f"   Last 100 chars: ...{llm_output[-100:]}")
                    raise ValueError("JSON output appears incomplete/truncated")
                
                # Try to parse as JSON first to check validity
                try:
                    json.loads(llm_output)
                except json.JSONDecodeError as je:
                    print(f"‚ö†Ô∏è  JSON decode error: {je}")
                    print(f"   Error at position {je.pos}")
                    if je.pos > 100:
                        print(f"   Context: ...{llm_output[je.pos-50:je.pos+50]}...")
                    raise
                
                # Validate with Pydantic
                print(f"üîç Validating output with Pydantic...")
                parsed_resume = ParsedResume.model_validate_json(llm_output)
                
                print(f"‚úÖ Validation successful!")
                print(f"üìä Extracted: {parsed_resume.full_name}")
                print(f"   - Education entries: {len(parsed_resume.education)}")
                print(f"   - Work experiences: {len(parsed_resume.work_experience)}")
                print(f"   - Skills: {len(parsed_resume.skills)}")
                print(f"   - Certifications: {len(parsed_resume.certifications)}")
                
                return parsed_resume, None
                
            except ValidationError as e:
                error_msg = f"Validation failed on attempt {attempt + 1}"
                print(f"‚ùå {error_msg}")
                for error in e.errors():
                    print(f"   - {error['loc']}: {error['msg']}")
                
                if attempt < max_retries:
                    print(f"üîÑ Retrying with error feedback...")
                    # Add error feedback to prompt for next attempt
                    prompt += f"\n\nPREVIOUS ATTEMPT FAILED WITH ERRORS:\n{str(e)}\n\nPlease fix these issues and return valid JSON:"
                else:
                    return None, f"Validation failed after {max_retries + 1} attempts: {str(e)}"
                    
            except Exception as e:
                error_msg = f"Error calling Bedrock or parsing response: {str(e)}"
                print(f"‚ùå {error_msg}")
                if attempt < max_retries:
                    print(f"üîÑ Retrying...")
                else:
                    return None, error_msg
        
        return None, "Max retries exceeded"


# =============================================================================
# EXAMPLE USAGE
# =============================================================================

def main():
    """Example usage of the resume parser"""
    
    # Initialize parser with your region and model
    parser = BedrockResumeParser(
        region_name="eu-west-2",
        model_id="anthropic.claude-3-7-sonnet-20250219-v1:0"
    )
    
    print("=" * 80)
    print("RESUME PARSER - PDF ONLY")
    print("=" * 80)
    
    # Parse from PDF file
    print("\n### Parsing PDF Resume ###\n")
    
    # Example: Replace with your actual PDF path
    pdf_path = "/home/ec2-user/SageMaker/resume_parser/resume.pdf"
    
    print(f"üí° To use: parser.parse_resume('{pdf_path}')")
    print("   Only PDF files are accepted.\n")
    
    # Uncomment these lines when you have a PDF file:
    parsed_resume, error = parser.parse_resume(pdf_path)
    
    if parsed_resume:
        # Get JSON output
        json_output = parsed_resume.model_dump_json(indent=2)
        
        print("\n" + "=" * 80)
        print("PARSED RESUME (JSON)")
        print("=" * 80)
        # Print in chunks to avoid truncation
        chunk_size = 1000
        for i in range(0, len(json_output), chunk_size):
            print(json_output[i:i+chunk_size])
        
        # Access structured data
        print("\n" + "=" * 80)
        print("STRUCTURED DATA ACCESS")
        print("=" * 80)
        print(f"Name: {parsed_resume.full_name}")
        print(f"Email: {parsed_resume.email}")
        print(f"Phone: {parsed_resume.phone}")
        print(f"Location: {parsed_resume.location}")
        print(f"Years of Experience: {parsed_resume.years_of_experience}")
        
        print(f"\nEducation ({len(parsed_resume.education)} entries):")
        for edu in parsed_resume.education:
            print(f"  - {edu.degree} in {edu.field_of_study or 'N/A'}")
            print(f"    {edu.institution} ({edu.graduation_year or 'N/A'})")
        
        print(f"\nWork Experience ({len(parsed_resume.work_experience)} entries):")
        for exp in parsed_resume.work_experience:
            print(f"  - {exp.job_title} at {exp.company}")
            print(f"    Duration: {exp.start_date} to {exp.end_date}")
            print(f"    Responsibilities: {len(exp.responsibilities)} items")
        
        print(f"\nSkills ({len(parsed_resume.skills)} total):")
        for skill in parsed_resume.skills[:10]:  # Show first 10
            category = f" ({skill.category})" if skill.category else ""
            print(f"  - {skill.name}{category}")
        if len(parsed_resume.skills) > 10:
            print(f"  ... and {len(parsed_resume.skills) - 10} more")
        
        print(f"\nCertifications ({len(parsed_resume.certifications)} total):")
        for cert in parsed_resume.certifications:
            org = f" - {cert.issuing_organization}" if cert.issuing_organization else ""
            date = f" ({cert.issue_date})" if cert.issue_date else ""
            print(f"  - {cert.name}{org}{date}")
     else:
        print(f"\n‚ùå Parsing failed: {error}")


if __name__ == "__main__":
    main()

RESUME PARSER - PDF ONLY

### Parsing PDF Resume ###

üí° To use: parser.parse_resume('/home/ec2-user/SageMaker/resume_parser/resume.pdf')
   Only PDF files are accepted.

üìÑ Extracting text from PDF: /home/ec2-user/SageMaker/resume_parser/resume.pdf
   Total pages: 2
   ‚úì Extracted page 1
   ‚úì Extracted page 2
‚úÖ Extracted 6948 characters from PDF
üîÑ Calling AWS Bedrock Claude (attempt 1/3)...
‚úÖ Received response from Claude
üìù Raw output length: 4570 characters
üîç Validating output with Pydantic...
‚úÖ Validation successful!
üìä Extracted: Hargurjeet Singh Ganger
   - Education entries: 3
   - Work experiences: 3
   - Skills: 8
   - Certifications: 1

PARSED RESUME (JSON)
{
  "full_name": "Hargurjeet Singh Ganger",
  "email": "gurjeet333@gmail.com",
  "phone": "+91 9035828125",
  "location": "Bangalore, India",
  "linkedin_url": "linkedin.com/in/hargurjeet/",
  "github_url": "github.com/hargurjeet",
  "portfolio_url": "gurjeet333.medium.com",
  "summary": "Experience