In [1]:
import os 

In [2]:
os.getcwd()

'/home/hasan/Artificial-Intelligence/projects/Resume-ATS-Score-Checker/notebook'

In [3]:
os.chdir("..")

In [4]:
os.getcwd()

'/home/hasan/Artificial-Intelligence/projects/Resume-ATS-Score-Checker'

In [5]:
from dotenv import load_dotenv
load_dotenv()

from src.ats.components.schema import JobDescription
from src.ats import logging
from firecrawl import Firecrawl
import os
import asyncio

In [22]:
class JobDescriptionParser:
    
    def __init__(self, firecrawl_api_key: str = None) -> None:
        if not firecrawl_api_key:
            firecrawl_api_key = os.getenv("FIRECRAWL_API_KEY")
        
        if not firecrawl_api_key:
            raise ValueError(f"argument and environment variable 'firecrawl_api_key' is having value '{firecrawl_api_key}'")
        
        self.firecrawl = Firecrawl(api_key=firecrawl_api_key)
    
    async def extract_job_description(self, url: str):
        """Extract job description using Firecrawl's AI-powered extraction"""
        loop = asyncio.get_running_loop()
        
        def _scrape():
            try:
                result = self.firecrawl.scrape(
                    url,
                    formats=[{
                        "type": "json",
                        "schema": JobDescription
                    }],
                    only_main_content=False,
                    timeout=120000
                )
                
                if result.metadata.status_code == 200:
                    return result.json
                else:
                    print(f"Firecrawl extraction failed: {result}")
                    return None
                    
            except Exception as e:
                print(f"Error with Firecrawl: {str(e)}")
                return None
        
        return await loop.run_in_executor(None, _scrape)
    
    async def extract_job_description_with_prompt(self, url: str):
        """Alternative method using natural language prompt"""
        loop = asyncio.get_running_loop()
        
        def _scrape_with_prompt():
            try:
                result = self.firecrawl.scrape(
                    url,
                    formats=[{
                        "type": "json",
                        "prompt": """Extract the following information from this job posting:
                        - Job title
                        - Company name
                        - Location
                        - Job type (full-time, part-time, etc.)
                        - Experience level required
                        - Complete job description
                        - Requirements and qualifications
                        - Key responsibilities
                        - Salary range (if mentioned)
                        - Posted date (if available)

                        Return as structured JSON."""
                    }],
                    only_main_content=False
                )
                
                if result.metadata.status_code == 200:
                    return result.json
                else:
                    return None
                    
            except Exception as e:
                print(f"Error: {str(e)}")
                return None
        
        return await loop.run_in_executor(None, _scrape_with_prompt)
    
    async def parse(self, url: str) -> JobDescription | None:
        logging.info("In JobDescriptionParser")
        method = "extract_job_description"
        job_data = await self.extract_job_description(url)
        
        if not job_data:
            job_data = await self.extract_job_description_with_prompt(url)
            method = "extract_job_description_with_prompt"
        logging.info(f"used method \'{method}\' to parse job description")
        logging.info("Out JobDescriptionParser")
        return job_data


In [23]:
parser = JobDescriptionParser()

In [24]:
os.getenv("JD_URL")

'https://www.naukri.com/job-listings-data-scientist-capgemini-technology-services-india-limited-bengaluru-4-to-7-years-230925920333?src=seo_srp&sid=17590531409038462&xp=1&px=1'

In [25]:
result = await parser.parse(os.getenv("JD_URL"))

In [27]:
result

{'job_type': 'Full Time, Permanent',
 'location': 'Bengaluru',
 'job_title': 'Data Scientist',
 'posted_date': '4 days ago',
 'company_name': 'Capgemini',
 'requirements': 'UG: Any Graduate, PG: Any Postgraduate. Key Skills: python, natural language processing, machine learning, iot, deep learning, c++, project management, software testing, plc, microsoft azure, artificial intelligence, sql, java, data science, predictive modeling, embedded systems, linux, embedded c, agile, aws.',
 'salary_range': 'Not Disclosed',
 'job_description': 'This role involves the development and application of engineering practice and knowledge in the following technologies: Standards and protocols, application software and embedded software for wireless and satellite networks, fixed networks and enterprise networks; connected devices (IOT and device engineering), connected applications (5G/ edge, B2X apps); and Telco Cloud, Automation and Edge Compute platforms. This role also involves the integration of n