In [26]:
from typing import TypedDict, List, Annotated, Optional, Dict
from pydantic import BaseModel, Field
from langgraph.graph import StateGraph, START, END
from langchain_google_genai import ChatGoogleGenerativeAI
from dotenv import load_dotenv
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.messages import SystemMessage, HumanMessage

In [6]:
load_dotenv()
llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash")

In [27]:
from typing import TypedDict, Optional, List, Dict

class JobAgent(TypedDict, total=False):
    # ---- Initial user input ----
    name: str
    job_title: str
    experience: int
    pdf_path: str
    jd: str
    email: Optional[str]
    linkedin: Optional[str]

    # ---- Generated during flow ----
    resume_text: str
    sections: Dict[str, str]
    missing_fields: List[str]

    # ---- LLM Output ----
    match: bool
    match_score: float
    reason: str
    missing_improvements: List[str]
    missing_field_explanations: Dict[str, str]
    feedback_summary: str

MAKING ALL THE FUNCTION DEFINED TO RE-MAP THE GRAPH

STEP 1 = EXTRACTION
#here we will load the pdf and extract all the 
#text and store in any list formate
#this function will return a string which is here text

In [19]:
from langchain_community.document_loaders import PyPDFLoader
def extraction(state : JobAgent) -> JobAgent:
    pdf_path = state["pdf_path"]
    loader = PyPDFLoader(pdf_path)
    docs = loader.load()

    text = ""
    for doc in docs:
        text += doc.page_content + "\n"
    
    state["resume_text"] = text
    return state
    

STEP 2

In [28]:
import re
def preprocess(state : JobAgent) -> JobAgent:
    text = state.get("resume_text")
    headings = [
        "SUMMARY", "OBJECTIVE",
        "SKILLS", "TECHNICALSKILLS",
        "EDUCATION",
        "EXPERIENCE", "WORKEXPERIENCE",
        "PROJECTS", "PROJECT",
        "CERTIFICATIONS",
        "ACHIEVEMENTS",
        "INTERNSHIPS",
        "LANGUAGES"
    ]
    required_words = ["github", "gmail.com", "linkedin", "leetcode"]

    #cheking required
    missing = []
    lower_text = text.lower()
    
    for word in required_words:
        if word not in lower_text:
            missing.append(word)
    
    #section splitting
    sections = {"OTHER" : ""}
    current = "OTHER"

    for line in text.split("/n"):
        line = line.strip()
        if not line :
            continue

        normalised = re.sub(r"[^A-Za-z]", "", line).upper()

        if normalised in headings:
            current = normalised
            sections[current] = ""
        else :
            sections[current] += line + "\n"

    state["missing_fields"] = missing
    state["sections"] = sections
    
    return state
    


In [30]:
import json
def missing(state : JobAgent) -> JobAgent:
    try: 
        job_title = state.get("job_title", "")
        sections = state.get("sections", {})
        missing_fields = state.get("missing_fields", [])

        summary = sections.get("SUMMARY", "")
        skills = sections.get("SKILLS", "") or sections.get("TECHNICALSKILLS", "")

        system_prompt = """
You are a strict and realistic technical recruiter.

Your responsibilities:

1. Compare resume summary and skills with the given job title.
2. Decide whether the resume matches the job.
3. Give a match score between 0 and 10.
4. Suggest practical improvements.
5. If any required field (like GitHub, LinkedIn, Email, LeetCode) is missing,
   explain why that field is important for recruiters.
6. Provide short structured feedback using:
   ✅ for strengths
   ❌ for weaknesses

Be concise and professional.

Return ONLY valid JSON.
Do not add any explanation outside JSON.
""" 
        human_prompt = f"""
JOB TITLE:
{job_title}

RESUME SUMMARY:
{summary}

RESUME SKILLS:
{skills}

MISSING FIELDS DETECTED:
{missing_fields}

Instructions:
- If a required field is missing, clearly explain why it matters.
- Be recruiter-like and realistic.
- Keep explanations short but meaningful.

Return JSON in this exact structure:

{{
  "match": true/false,
  "match_score": number,
  "reason": "short explanation",
  "missing_improvements": ["point1", "point2"],
  "missing_field_explanations": {{
        "field_name": "why this field is important"
  }},
  "feedback_summary": "4-6 short lines using ✅ and ❌"
}}
"""
        response = llm.invoke([
            SystemMessage(content=system_prompt),
            HumanMessage(content=human_prompt)
        ])
        content = response.content.strip()

        try:
            parsed = json.loads(content)
        except json.JSONDecodeError:
            start = content.find("{")
            end = content.find("}") + 1
            parsed = json.loads(content[start:end])
        
        state["match"] = parsed.get("match", False)
        state["match_score"] = float(parsed.get("match_score", 0))
        state["reason"] = parsed.get("reason", "")
        state["missing_improvements"] = parsed.get("missing_improvements", [])
        state["missing_field_explanations"] = parsed.get("missing_field_explanations", {})
        state["feedback_summary"] = parsed.get("feedback_summary", "")
        return state
    except Exception as e:
        # ----------------------------
        # Fail-Safe (Graph Never Crashes)
        # ----------------------------
        state["match"] = False
        state["match_score"] = 0
        state["reason"] = f"Evaluation failed: {str(e)}"
        state["missing_improvements"] = []
        state["missing_field_explanations"] = {}
        state["feedback_summary"] = "❌ Evaluation failed due to system error."
        return state

NODES

we can store all this info in cache and if in future 
the user give jd we will give me perceatge how fit he is or not

In [None]:
graph = StateGraph(JobAgent)
#NODE

graph.add_node('extraction', extraction)
graph.add_node('preprocess', preprocess)
#major part
graph.add_node('missing', missing)
graph.add_node('assestment', assesment)
#graph.add_node('voice', voice)
graph.add_node('interview', interview)
#resources
graph.add_node('dsa', dsa)
graph.add_node('job_sites', job_sites)
graph.add_node('courses', courses)

#summay and feedback
graph.add_node('jd_feedback', jd_feedback)

#now if the feedback is good we will mail the recutier and messge in linkdn
#update in excell so that he can track 

EDGES

In [None]:
#EDGE
graph.add_edge(START, 'extraction')
graph.add_edge('extraction', 'preprocess')
graph.add_edge('preprocess', 'missing')
graph.add_edge('missing', 'assestment')
#here we can add a condition edges where if he/she get 70% right he will 
#not get interview question he will direcly get interviwe and dsa 
graph.add_edge('assestment', 'interview')
graph.add_edge('interview', 'dsa')
graph.add_edge('dsa', 'job_sites')
graph.add_edge('job_sites', 'courses')
graph.add_edge('courses', 'jd_feedback')
graph.add_edge("jd_feedback", END)

workflow = graph.compile()