In [1]:
from langchain_google_genai import ChatGoogleGenerativeAI
from langgraph.graph import END, StateGraph, START

In [2]:
import getpass
import os

if "GOOGLE_API_KEY" not in os.environ:
    os.environ["GOOGLE_API_KEY"] = getpass.getpass("Enter your Google AI API key: ")

In [3]:
llm = ChatGoogleGenerativeAI(
    model="gemini-2.0-flash",
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2,
)

In [46]:
import operator
from typing import List
from typing_extensions import TypedDict
from pydantic import BaseModel, Field
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.messages import SystemMessage, HumanMessage
from langgraph.graph import StateGraph, START, END

# Initialize LLM
llm = ChatGoogleGenerativeAI(
    model="gemini-2.0-flash",
    temperature=0,
    max_tokens=None,
)

# Define Project schema
class Project(BaseModel):
    name: str = Field(description="Name of the project")
    description: str = Field(description="Description of the project")

class Experience(BaseModel):
    name: str = Field(description="Name of the company")
    role: str = Field(description="Role or position held")
    description: str = Field(description="Description of the work done/ experience at this company")

# Define Resume schema using BaseModel (Pydantic v2)
class Resume(BaseModel):
    name: str = Field(description="Full name")
    email: str = Field(description="Email address")
    phone: str = Field(description="Phone number")
    skills: List[str] = Field(description="List of technical and soft skills")
    projects: List[Project] = Field(default_factory=list, description="List of projects with name and description")
    experience: List[Experience] = Field(default_factory=list, description="List of company experiences with name, role, and description")
    major_name: str = Field(description="Major or field of study")
    usc_college_name: str = Field(description="Name of the college or university")
    linkedin: str = Field(default="", description="LinkedIn profile URL")

# Define State using TypedDict
class ResumeParserState(TypedDict):
    resume_text: str
    parsed_resume: Resume

# Define extraction instructions
extraction_prompt = """You are an expert resume parser. 
Extract ALL the following information from the resume and return it in the specified JSON format.

Resume:
{resume_text}

IMPORTANT: 
- For projects: Extract EVERY project mentioned. Create a list where each project has a "name" and "description".
- Include all details about what was done in each project.
- Do not skip any projects.
- STICK TO WHAT IS MENTIONED IN THE RESUME. DO NOT MAKE UP ANY INFORMATION.

Extract:
- name: Full name
- email: Email address
- phone: Phone number
- skills: List of technical and soft skills (as a list)
- projects: List of projects. EACH project should have "name" (project title) and "description" (detailed description of what was done)
- experience: List of professional or research experiences. EACH experience should have a "name of the comapny", "role at work" and brief "description"
- major_name: Major or field of study
- usc_college_name: Name of the college or university
- linkedin: LinkedIn profile URL (if available, otherwise empty string)

Return valid JSON matching the Resume schema."""

# Define nodes
def parse_resume_node(state: ResumeParserState):
    """Node to parse resume using LLM with structured output"""
    
    resume_text = state["resume_text"]
    
    # Use structured output to enforce Resume format
    structured_llm = llm.with_structured_output(Resume, method="json_mode")
    
    # Create prompt
    system_message = extraction_prompt.format(resume_text=resume_text)
    
    # Invoke LLM
    parsed_resume = structured_llm.invoke([
        SystemMessage(content=system_message),
        HumanMessage(content="Please parse this resume and extract all information. Make sure to extract all projects with their descriptions!")
    ])
    
    return {"parsed_resume": parsed_resume}

# Build graph
def create_resume_parser_graph():
    """Create and compile the resume parser graph"""
    
    builder = StateGraph(ResumeParserState)
    builder.add_node("parse_resume", parse_resume_node)
    
    builder.add_edge(START, "parse_resume")
    builder.add_edge("parse_resume", END)
    
    return builder.compile()

# Main function
def parse_resume(resume_text: str) -> Resume:
    """Main function to parse a resume"""
    
    graph = create_resume_parser_graph()
    
    result = graph.invoke({
        "resume_text": resume_text,
    })
    
    return result["parsed_resume"]

In [None]:
# Example usage
if __name__ == "__main__":
    sample_resume = """
    John Doe
    john.doe@email.com
    (555) 123-4567
    
    SKILLS
    Python, JavaScript, LangGraph, FastAPI, SQL
    
    EXPERIENCE
    Senior Developer at Tech Corp (2021-2023)
    - Led development of AI applications
    - Managed team of 5 engineers
    
    EDUCATION
    B.S. Computer Science
    Viterbi School of Engineering, University of Southern California

    EXPERIENCE
    IAMSEC VISION IT Pvt. Ltd. 
    Data Scientist 
    ● Deployed a CI\CD agentic pipeline for social media analytics using Metas Graph API,LangGraph for engagement, competitor analysis.
    ● Defined a pipeline to extract data from unstructured documents using metadata enrichment, hierarchical chunking and graph relations. Subsequently stored the data in Neo4j and designed a GraphRAG, it improved the retrieval quality by 40 percent compared to regular RAG.
    ● Migrated 3 clients MySQL database on AWS RDS, reducing downtime by average of 35 percent and improving query performance by 30%.
    Wolters Kluwer Pvt Ltd India.
    Data Science Intern
    ● Played a key role in building a chatbot using LangChain. Designed evaluation framework using Mlflow-based logging system to capture LLM CoT, metrics, and user feedback,hallucination detection. Researched on advanced RAG techniques like-HyDe, Query decomposition, trained embedding models to improve the search and retrieval workflow.
    ● Streamlined client-specific ETL pipelines for data aggregation from multiple file formats, and devised usable 10k records. After hypothetical testing designed a custom median encoding strategy to train the ML models to predict multiple labels. Used the prediction to develope a law firm recommendation system tailored to client-specific needs, improving baseline accuracy to ~72% (an 8 percent gain).
    ● Optimized prompts for RAG, agent handling SQL DB by trying methods like: 1. ReAct, 2. Chain-Of-Thoughts, 3. Multimodal CoT.
    Dr. Devika Verma
    Natural Language Processing Research Intern 
    ● Implemented kāraka extraction methods: 1. data-driven classifier and 2. Universal Dependency parser with UD to Kāraka mappings.
    ● Engineered feature vectors for (question, candidate sentence) pairs based on verb alignment, kāraka arguments, and post-positions.
    ● Achieved 82.7 percent accuracy in Hindi (data-driven annotator) and 68.7 percent in Marathi, with MRR of 0.71 and 0.64, respectively

    PROJECTS
    1. Multi-Modal Summarization of MRI Brain Reports and Images
    - Created a multi-modal deep learning pipeline to generate summaries of MRI brain scans and corresponding radiology reports.
    - Processed MRI images using a CNN-based feature extractor (ResNet50) and combined image embeddings with text embeddings

    2. LegalAI
    - Developed a conversational law firm recommendation system across 45+ firms integrating the clustering and ML models for predictions.
    - The agent handles case registration, provide predictive insights with interactive Q&A, leveraging ML models with 79 percent accuracy. Implemented Human-in-the-loop to add other matter details before making new predictions and storing them in the DB.
    - Architected full-stack microservices platform with Node.js/Express backend, React/WebSocket frontend and MySQL database
    """
    
    parsed = parse_resume(sample_resume)
    print(parsed)

  sample_resume = """


name='John Doe' email='john.doe@email.com' phone='(555) 123-4567' skills=['Python', 'JavaScript', 'LangGraph', 'FastAPI', 'SQL'] projects=[Project(name='Multi-Modal Summarization of MRI Brain Reports and Images', description='- Created a multi-modal deep learning pipeline to generate summaries of MRI brain scans and corresponding radiology reports.\n- Processed MRI images using a CNN-based feature extractor (ResNet50) and combined image embeddings with text embeddings'), Project(name='LegalAI', description='- Developed a conversational law firm recommendation system across 45+ firms integrating the clustering and ML models for predictions.\n- The agent handles case registration, provide predictive insights with interactive Q&A, leveraging ML models with 79 percent accuracy. Implemented Human-in-the-loop to add other matter details before making new predictions and storing them in the DB.\n- Architected full-stack microservices platform with Node.js/Express backend, React/WebSocket fron