##Import Dependencies

In [1]:
%%capture
!pip install langgraph
!pip install -U langchain-google-genai

In [2]:
import requests
import os
from google.colab import userdata

from langchain_core.prompts import ChatPromptTemplate
from langchain_google_genai import ChatGoogleGenerativeAI
from langgraph.graph import StateGraph, START, END

from typing_extensions import TypedDict
from pydantic import BaseModel, Field
from typing import Optional

from csv import DictWriter, writer

## Setup CSV file path and API keys

In [3]:
csv_file_link = '/content/drive/MyDrive/Job Applications Tracker.csv'
jina_api_key = userdata.get("JINA_API_KEY")
gemini_api_key = userdata.get("GEMINI_API_KEY")

if "GOOGLE_API_KEY" not in os.environ:
    os.environ["GOOGLE_API_KEY"] = gemini_api_key

## AI Workflow

In [4]:
#-------------------------------Data Classes-------------------------------------
class Job(BaseModel):
    """Specific information extracted from job listing."""
    id: Optional[str] = Field(default=None, description="The unique identifier for the job posting.")
    title: str = Field(description="The official job title.")
    location: Optional[str] = Field(default=None, description="The primary work location(s).")
    company: str = Field(description="The name of the hiring company.")
    pay_range: Optional[str] = Field(default=None, description="The specified salary or compensation range (e.g., $100k - $120k, £50,000 per year).")
    about_company: Optional[str] = Field(default=None, description="A body of text describing the team/company culture and priorities.")
    roles_and_responsibilities: Optional[str] = Field(default=None, description="The main body of text that describes roles and responsibilities and type of candidate they are looking for.")
    keywords: Optional[str] = Field(default=None, description="Relevant skills, technologies, or terms associated with the job (often found in skills sections or throughout the description).")


class State(TypedDict):
    job_link: str
    markdown: str
    listing_details: dict


#-------------------------------Workflow Nodes-------------------------------------
#Scrape markdown data from Job Listing Link
def get_markdown(state: State) -> State:
    new_url = "https://r.jina.ai/"+state['job_link']
    headers = {
      "Authorization": jina_api_key,
    }
    mrk_content = requests.get(new_url, headers=headers)
    state['markdown'] = mrk_content.text
    print("Collected details from job listing!")
    return state

#Extract required details from Job Listing in JSON format
def get_job_listing_details(state: State) -> State:
    llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", temperature=0, max_retries=2)
    structured_llm = llm.with_structured_output(Job)

    prompt = ChatPromptTemplate.from_messages([
    ("system",
     """## Role:
        You are an expert Data Extraction AI. Your task is to meticulously analyze job descriptions and extract specific pieces of information.

        ## Task:
        From the provided job description text (in Markdown format), extract the following details:
        *   `ID`: The unique identifier for the job posting.
        *   `Title`: The official job title.
        *   `Location`: The primary work location(s).
        *   `Company`: The name of the hiring company.
        *   `Pay Range`: The specified salary or compensation range (e.g., "$100k - $120k", "£50,000 per year").
        *   `About Company`: A body of text describing the team/company culture and priorities. role, responsibilities, qualifications, etc.
        *   `Roles and Responsibilities`: The main body of text that describes roles and responsibilities and type of candidate they are looking for.
        *   `Keywords`: Relevant skills, technologies, or terms associated with the job (often found in skills sections or throughout the description).
        *   Use the below JSON schema to format the output, if required information is not found include 'NA' as the value.
            JSON Schema
            Job = {{'id': str, 'title': str, 'location': str, 'company': str, 'pay_range': str, 'about_company': str, 'roles_and_responsibilities': str, 'keywords': str}}
            Return: Job

        ## Input Data:
        The job description is provided below, enclosed in triple backticks:
        ```markdown
        {content}
     """)
    ])

    final_prompt = prompt.invoke({"content": state["markdown"]})
    result = structured_llm.invoke(final_prompt.messages[0].content)

    state['listing_details'] = dict(result)
    print("Extracted relevant information from listing!")
    return state

#Update details gathered to CSV file
def update_csv(state: State):
    field_names = ['job_link', 'id', 'title', 'location', 'company', 'pay_range', 'about_company', 'roles_and_responsibilities', 'keywords']
    row = state['listing_details']
    row['job_link'] = state['job_link']

    if not os.path.exists(csv_file_link):
        print(f"File not found. Creating it...")
        try:
            with open(csv_file_link, 'w', newline='') as csvfile:
                wtr = writer(csvfile)
                wtr.writerow(field_names)
            print(f"File '{csv_file_link}' created successfully with header.")
        except Exception as e:
            print(f"Error creating file: {e}")

    try:
        with open(csv_file_link, 'a', newline='') as csvfile:
            dwobj = DictWriter(csvfile, fieldnames=field_names)
            dwobj.writerow(row)
        print('Added details into the tracker file!')
    except Exception as e:
        print(f"Error appending to file: {e}")
    return

In [5]:
#Define structure of AI Workflow with Langgraph
graph_builder = StateGraph(State)

graph_builder.add_node("get_markdown", get_markdown)
graph_builder.add_node("get_job_listing_details", get_job_listing_details)
graph_builder.add_node("update_csv", update_csv)

graph_builder.add_edge(START, "get_markdown")
graph_builder.add_edge("get_markdown", "get_job_listing_details")
graph_builder.add_edge("get_job_listing_details", "update_csv")
graph_builder.add_edge("update_csv", END)

graph = graph_builder.compile()

#Trigger the workflow for each job listing link
while True:
    inp = input("Enter Job Listing Link: ")
    if inp == "quit" or inp == "exit" or inp == 'q':
        break
    initial_state: State = {
    'job_link': inp,
    'markdown': None,
    'listing_details': None
    }
    graph.invoke(initial_state)

Enter Job Listing Link: https://boards.greenhouse.io/embed/job_app?token=6506012
Collected details from job listing!
Extracted relevant information from listing!
File not found. Creating it...
File '/content/drive/MyDrive/Job Applications Tracker.csv' created successfully with header.
Added details into the tracker file!
Enter Job Listing Link: https://www.linkedin.com/jobs/view/3679531830/
Collected details from job listing!
Extracted relevant information from listing!
Added details into the tracker file!
Enter Job Listing Link: quit
