In [200]:
import pandas as pd
import numpy as np
import glob
import os
import time
from tqdm import tqdm
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from bs4 import BeautifulSoup

In [13]:
link = "https://www.naukri.com/software-developer-jobs-in-india"

chrome_path = r'your_chrome_driver_path'

In [612]:
service = Service(chrome_path)
browser = webdriver.Chrome(service=service)

## Taking out job roles

In [710]:
data = []

for i in range(1,21):
    link = f"https://www.naukri.com/ai-ml-engineer-jobs-in-india-{i}?experience=0"
    browser.get(link)

    time.sleep(3)
    soup = BeautifulSoup(browser.page_source, "html.parser")
    
    for sp in soup.find_all("div", class_="srp-jobtuple-wrapper"):
        title = sp.find("h2").text
        link = sp.find("a", class_="title").get("href")
        try:
            location = sp.find("span", class_="locWdth").text
        except:
            location = np.nan
        try:
            required_experience = sp.find("span", class_="expwdth").text[:-4]
        except:
            required_experience = np.nan
        try:
            skills = ", ".join(skill.text.strip() for skill in sp.find("ul", class_="tags-gt").find_all("li"))
        except:
            skills = np.nan
        company_name = sp.find("span", class_="comp-dtls-wrap").find_all("a")[0].text
        try:
            reviews_count = sp.find("span", class_="comp-dtls-wrap").find_all("a")[2].text[:-9]
        except:
            reviews_count = 0
        data.append([title, link, location, required_experience, skills, company_name, reviews_count])

print(len(data))

382


In [712]:
df = pd.DataFrame(data=data, columns=["job_title","link","location","required_experience","skills","company_name","reviews_count"])

In [714]:
df["location"] = df["location"].fillna("").astype(str).str.split(",")
df["location"] = df["location"].apply(lambda x: [loc.strip() for loc in x] if isinstance(x, list) else []) 
df = df.explode("location").reset_index(drop=True)

In [716]:
df

Unnamed: 0,job_title,link,location,required_experience,skills,company_name,reviews_count
0,AI/ML Engineer- Fresher,https://www.naukri.com/job-listings-ai-ml-engi...,Pune,0-1,"Generative AI, Data Structures, Machine Learni...",OMFYS Technologies India Pvt. Ltd.,37
1,AI/ML Engineer,https://www.naukri.com/job-listings-ai-ml-engi...,Pune,0-3,"Pytorch, Django, Fast Api, Flask, Python, Tens...",Acciojob,91
2,AI/ML Engineer,https://www.naukri.com/job-listings-ai-ml-engi...,Pune,0-3,"Pytorch, Tensorflow, GenAI Tools, Scikit-Learn...",Acciojob,91
3,AI/ML Engineer Intern,https://www.naukri.com/job-listings-ai-ml-engi...,Bengaluru,0-1,"deep learning, Intern, Machine learning, Signa...",Rezlytix Technologies,0
4,Ai Ml Engineer,https://www.naukri.com/job-listings-ai-ml-engi...,Hyderabad,0,"Machine Learning, Python, SQL, Tensorflow, Pan...",Acciojob,91
...,...,...,...,...,...,...,...
396,Ai Ml Engineer,https://www.naukri.com/job-listings-ai-ml-engi...,Shenzhen,0-5,"Artificial Intelligence, Machine Learning, Dee...",Agilian Technology,193
397,Backend Engineer,https://www.naukri.com/job-listings-backend-en...,Bengaluru,0-2,"Unix, Computer science, Backend, Linux, Coding...",Sarvam,0
398,Backend Engineer,https://www.naukri.com/job-listings-backend-en...,Bengaluru,0-1,"Computer science, Backend, Version control, GI...",Sarvam,0
399,Software Engineer (AI/ML & Modern Technologies),https://www.naukri.com/job-listings-software-e...,Noida,0-2,"Maven, Core Java, Payroll, Front end, Developm...",Uzio Technology India,20


In [718]:
df.to_csv("AI-ML_data.csv", index=False)

In [720]:
df.head(20)

Unnamed: 0,job_title,link,location,required_experience,skills,company_name,reviews_count
0,AI/ML Engineer- Fresher,https://www.naukri.com/job-listings-ai-ml-engi...,Pune,0-1,"Generative AI, Data Structures, Machine Learni...",OMFYS Technologies India Pvt. Ltd.,37
1,AI/ML Engineer,https://www.naukri.com/job-listings-ai-ml-engi...,Pune,0-3,"Pytorch, Django, Fast Api, Flask, Python, Tens...",Acciojob,91
2,AI/ML Engineer,https://www.naukri.com/job-listings-ai-ml-engi...,Pune,0-3,"Pytorch, Tensorflow, GenAI Tools, Scikit-Learn...",Acciojob,91
3,AI/ML Engineer Intern,https://www.naukri.com/job-listings-ai-ml-engi...,Bengaluru,0-1,"deep learning, Intern, Machine learning, Signa...",Rezlytix Technologies,0
4,Ai Ml Engineer,https://www.naukri.com/job-listings-ai-ml-engi...,Hyderabad,0,"Machine Learning, Python, SQL, Tensorflow, Pan...",Acciojob,91
5,AI / ML Engineer,https://www.naukri.com/job-listings-ai-ml-engi...,Surat,0-1,"Computer science, Computer vision, Automation,...",Canopas,2
6,Fresher Data Analyst & AI-ML Engineer,https://www.naukri.com/job-listings-fresher-da...,Surat,0-2,"Backend, Data analysis, Analytical, Web develo...",August Infotech,14
7,AI/ML Engineer,https://www.naukri.com/job-listings-ai-ml-engi...,Mumbai,0-3,"Pytorch, Model Fine Tuning, Artificial Intelli...",Innovizia Technologies,4
8,Technical Data Analyst,https://www.naukri.com/job-listings-technical-...,Remote,0,"Python, Performance Metrics, Analyze User Beha...",Turingsxyz,2
9,AI/ML ENGINEER,https://www.naukri.com/job-listings-ai-ml-engi...,Gurugram,0-2,"Computer vision, Sales, Statistical analysis, ...",Applicate,50


In [734]:
folder_path = r'folder_having_all_csv_related_to_respective_job_roles'
csv_files = [file for file in os.listdir(folder_path) if file.endswith('.csv')]
df_list = [pd.read_csv(os.path.join(folder_path, file)) for file in csv_files]
merged_df = pd.concat(df_list, ignore_index=True)

## Basic jobs details dataset

In [736]:
merged_df.to_csv("JOBS_DATA.csv", index=False)

In [738]:
merged_df

Unnamed: 0,job_title,link,location,required_experience,skills,company_name,reviews_count
0,AI/ML Engineer- Fresher,https://www.naukri.com/job-listings-ai-ml-engi...,Pune,0-1,"Generative AI, Data Structures, Machine Learni...",OMFYS Technologies India Pvt. Ltd.,37
1,AI/ML Engineer,https://www.naukri.com/job-listings-ai-ml-engi...,Pune,0-3,"Pytorch, Django, Fast Api, Flask, Python, Tens...",Acciojob,91
2,AI/ML Engineer,https://www.naukri.com/job-listings-ai-ml-engi...,Pune,0-3,"Pytorch, Tensorflow, GenAI Tools, Scikit-Learn...",Acciojob,91
3,AI/ML Engineer Intern,https://www.naukri.com/job-listings-ai-ml-engi...,Bengaluru,0-1,"deep learning, Intern, Machine learning, Signa...",Rezlytix Technologies,0
4,Ai Ml Engineer,https://www.naukri.com/job-listings-ai-ml-engi...,Hyderabad,0,"Machine Learning, Python, SQL, Tensorflow, Pan...",Acciojob,91
...,...,...,...,...,...,...,...
3855,Software Testing Engineer (Senior & Junior),https://www.naukri.com/job-listings-software-t...,Hyderabad,0-3,"Software testing, Software design, Linux, Codi...",Cal4care,5
3856,Software Testing Engineer (Senior & Junior),https://www.naukri.com/job-listings-software-t...,Pune,0-3,"Software testing, Software design, Linux, Codi...",Cal4care,5
3857,Software Testing Engineer (Senior & Junior),https://www.naukri.com/job-listings-software-t...,Chennai,0-3,"Software testing, Software design, Linux, Codi...",Cal4care,5
3858,Software Testing Engineer (Senior & Junior),https://www.naukri.com/job-listings-software-t...,Bengaluru,0-3,"Software testing, Software design, Linux, Codi...",Cal4care,5
