In [59]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
import time
import pandas as pd


def extract_job_details_from_card(job_element):
    try:
        title_element = job_element.find_element(By.CSS_SELECTOR, 'h2 > a.title')
        title = title_element.text
        job_link = title_element.get_attribute('href')

        company = job_element.find_element(By.CSS_SELECTOR, 'a.comp-name').text
        experience = job_element.find_element(By.CSS_SELECTOR, 'span.expwdth').text
        location = job_element.find_element(By.CSS_SELECTOR, 'span.locWdth').text
        description = job_element.find_element(By.CSS_SELECTOR, 'span.job-desc').text
        post_date = job_element.find_element(By.CSS_SELECTOR, 'span.job-post-day').text

        # Extract skills/tags
        skill_elements = job_element.find_elements(By.CSS_SELECTOR, 'ul.tags-gt > li')
        skills = [skill.text for skill in skill_elements]

        return {
            'Title': title,
            'Company': company,
            'Experience': experience,
            'Location': location,
            'Description': description,
            'Posted': post_date,
            'Skills': ', '.join(skills),
            'Link': job_link
        }
    except Exception as e:
        print(f"Error extracting job details: {e}")
        return None
        




def scrape_naukari_jobs(keywords, location = "bengaluru", experience = 5):        
    options = webdriver.ChromeOptions()
    # options.add_argument('--headless') 
    options.add_argument('--disable-gpu')
    options.add_argument("--start-maximized")
    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
    all_jobs_df=pd.DataFrame()
    
    for keyword in keywords:
        url = f"https://www.naukri.com/{keyword}-jobs-in-{location}?experience={str(experience)}"
        driver.get(url)
        # Wait until job elements are present
        try:
            WebDriverWait(driver, 15).until(
                EC.presence_of_element_located((By.CLASS_NAME, "srp-jobtuple-wrapper"))
            )
            print("Job elements loaded.")
        except:
            print("Job elements not found or took too long to load.")
        
        # Scroll to trigger more content load
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(2)
        
        # Now get job cards
        jobs = driver.find_elements(By.CLASS_NAME, 'srp-jobtuple-wrapper')
        print(f"Found {len(jobs)} job elements.")
    
        #inspect html content for each job card
        # job=jobs[0]
        # print(job.get_attribute('innerHTML'))
    
        jobs_data = []
        for job in jobs:
            try:
                job_data=extract_job_details_from_card(job)
                jobs_data.append(job_data)
            except Exception as e:
                print(f"Error: {e}")
        jobs_df=pd.DataFrame(jobs_data)
        all_jobs_df = pd.concat([all_jobs_df, jobs_df], ignore_index=True)
    driver.quit()
    return all_jobs_df

In [60]:
keywords=["NLP","LLM","Gen AI"]
scrape_naukari_jobs(keywords=keywords,location = "bengaluru", experience = 5)

Job elements loaded.
Found 20 job elements.
Job elements loaded.
Found 20 job elements.
Job elements loaded.
Found 20 job elements.


Unnamed: 0,Title,Company,Experience,Location,Description,Posted,Skills,Link
0,Data Scientist (ML/NLP Developer),DIGITAP.AI,1-2 Yrs,Bengaluru,"BE, BTech, MTech, ME, MCA (minimum 4 years of ...",2 Days Ago,"deep learning, Computer vision, Risk analytics...",https://www.naukri.com/job-listings-data-scien...
1,Senior Machine Learning Engineer NLP/LLM,Avalara India,4-8 Yrs,"Bengaluru, Kolkata, Mumbai, New Delhi, Hyderab...",Bachelors / Masters degree in computer science...,1 Day Ago,"python, software development, microsoft azure,...",https://www.naukri.com/job-listings-senior-mac...
2,Looking For Senior Data Scientist || GEN AI | ...,Birdeye,5-9 Yrs,"Hybrid - Bengaluru, Hyderabad, Gurugram",Integrate models into real-time and batch data...,3 Days Ago,"Generative AI, Nltk, Enterprise AI, Bert, Spac...",https://www.naukri.com/job-listings-looking-fo...
3,Principal Data Scientist / Senior Data Scienti...,Benovymed Healthcare Private Limited,0-5 Yrs,Bengaluru,Job Position :Principal Data Scientist / Sr Da...,3 Days Ago,"Data Science, Tensorflow, NLP, Data Scientist,...",https://www.naukri.com/job-listings-principal-...
4,Senior AI / ML Engineer - NLP / Deep Learning,Leading Client,5-8 Yrs,"Bengaluru, Mumbai, Delhi / NCR",We are looking for a highly skilled Senior AI ...,3 Days Ago,"NLP, Data Science, Tensorflow, PyTorch, Artifi...",https://www.naukri.com/job-listings-senior-ai-...
5,NLP & LLM Engineer,Evalueserve,3-8 Yrs,Hybrid - Bangalore/ Bengaluru,Elevate Your Impact Through Innovation and Lea...,12 Days Ago,"NLP, Bert, Spacy, Large Language Models, Nltk,...",https://www.naukri.com/job-listings-nlp-llm-en...
6,Senior AI/ML Engineer - NLP & Generative AI,Siemens,5-10 Yrs,Bengaluru,Masters or Bachelors degree in Computer Scienc...,10 Days Ago,"python, natural language processing, aiml, art...",https://www.naukri.com/job-listings-senior-ai-...
7,Lead NLP/LLM Engineer,Uplers,5-9 Yrs,Remote,Senior Machine Learning EngineerExperience: 5 ...,6 Days Ago,"Pytorch, MLOps, LLM, Sagemaker, Vector Databas...",https://www.naukri.com/job-listings-lead-nlp-l...
8,Urgently hiring For AI/ML Engineer (LLM/NLP) M...,Kaizenit Infotech,12-19 Yrs,"Bengaluru, Mumbai, Pune","1.For NLP: Gen AI, Machine Learning, Technical...",6 Days Ago,"Data Science, Large Language Model, Natural La...",https://www.naukri.com/job-listings-urgently-h...
9,Machine Learning Data Scientist - NLP,Forecastera India,3-7 Yrs,"Bengaluru, Hyderabad",Education: Any graduate / postgraduate with a ...,30+ Days Ago,"Front end, Machine learning, Information retri...",https://www.naukri.com/job-listings-machine-le...
