In [1]:
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
import time
from urllib.parse import urljoin

In [2]:
service_object = Service(r"C:\Users\misga\OneDrive\Documents\chromedriver-win64\chromedriver.exe")

# Initialize WebDriver with the service object
driver = webdriver.Chrome(service=service_object)
driver.implicitly_wait(10)

In [3]:

# Maximize the window and load the webpage
driver.maximize_window()



In [4]:

%%time

# Set the URL of the first page
url ='https://www.naukri.com/data-scientist-jobs?k=data%20scientist'  # Replace with the actual URL
driver.get(url)

# Initialize the DataFrame to store job data
columns = ['Job Title', 'Company Name', 'Location', 'Experience', 'Salary','Job Description', 'Skills']
df = pd.DataFrame(columns=columns)

# Set page counter

current_page = 1
last_page = 830 # Set the last page number
condition = True

while condition and current_page <= last_page:
    print(f"Scraping page {current_page}...")

    # Give time for the page to load fully
    time.sleep(2)  # Increased wait time for the page to load

    # Get the job listings on the current page
    job_listings = driver.find_elements(By.CSS_SELECTOR, ".cust-job-tuple")
    print(f"Found {len(job_listings)} job listings on page {current_page}.")
    
    
    for job in job_listings:
        try:
            # Extract job details
            job_title = job.find_element(By.CSS_SELECTOR, ".title").text
            company_name = job.find_element(By.CSS_SELECTOR, ".comp-name").text
            location = driver.find_element(By.CSS_SELECTOR, "span.loc-wrap span.loc span").text
            job_description = job.find_element(By.XPATH, "//span[contains(@class, 'job-desc')]").text
            experience = job.find_element(By.XPATH, "//span[contains(@class, 'expwdth')]").text
            salary = driver.find_element(By.CSS_SELECTOR, "span.sal-wrap span.sal span").text
            skill_elements = job.find_elements(By.CSS_SELECTOR, ".tags-gt .dot-gt")
            skills = ", ".join([skill.text for skill in skill_elements])
        

            # Append job data to DataFrame
            df.loc[len(df)] = [job_title, company_name, location, experience,salary, job_description, skills]
    

        except Exception as e:
            print(f"Error extracting job details: {e}")
            pass  # Handle exceptions if any job info is missing or any error occurs

    # Try to find the "Next" button and click it
    try:
        nxt = driver.find_element(By.XPATH, '//span[text()="Next"]/..')
        driver.execute_script("arguments[0].click();", nxt)
        
        # Wait for the page to load
        time.sleep(2)  # Adjust this time based on the page load speed
        
        # Increase the page counter
        current_page += 1
        print(f"Going to next page {current_page}...")
        
        
    except Exception as e:
        print(f"Error or last page reached: {e}")
        print("No 'Next' button found or last page reached. Exiting.")
          # Stop scraping if no next button is found (i.e., last page)
        condition = False

# Close the WebDriver after scraping is done
driver.quit()

# Save the DataFrame to a CSV file


# Optionally, display the DataFrame
print(df)


Scraping page 1...
Found 25 job listings on page 1.
Going to next page 2...
Scraping page 2...
Found 20 job listings on page 2.
Going to next page 3...
Scraping page 3...
Found 20 job listings on page 3.
Going to next page 4...
Scraping page 4...
Found 20 job listings on page 4.
Going to next page 5...
Scraping page 5...
Found 20 job listings on page 5.
Going to next page 6...
Scraping page 6...
Found 20 job listings on page 6.
Going to next page 7...
Scraping page 7...
Found 20 job listings on page 7.
Going to next page 8...
Scraping page 8...
Found 20 job listings on page 8.
Going to next page 9...
Scraping page 9...
Found 20 job listings on page 9.
Going to next page 10...
Scraping page 10...
Found 20 job listings on page 10.
Going to next page 11...
Scraping page 11...
Found 20 job listings on page 11.
Going to next page 12...
Scraping page 12...
Found 20 job listings on page 12.
Going to next page 13...
Scraping page 13...
Found 20 job listings on page 13.
Going to next page 14...

In [5]:
df.head(10)

Unnamed: 0,Job Title,Company Name,Location,Experience,Salary,Job Description,Skills
0,Data Scientist,Uplers,Hybrid - Bengaluru,0-4 Yrs,19-27.5 Lacs PA,Shift : 10:00AM to 7:00PM ISTBachelors / Maste...,"Python, TensorFlow, Computer Vision, Flink, Re..."
1,Business Analyst / Data Scientist,CBRE,Hybrid - Bengaluru,0-4 Yrs,19-27.5 Lacs PA,Shift : 10:00AM to 7:00PM ISTBachelors / Maste...,"Data Visualization, Tableau, Analytics, Busine..."
2,Data Scientist - AI/ML,Trent Limited,Hybrid - Bengaluru,0-4 Yrs,19-27.5 Lacs PA,Shift : 10:00AM to 7:00PM ISTBachelors / Maste...,"Artificial Intelligence, Machine Learning, Dat..."
3,Data Scientist,Fortune 500 IT Services Company,Hybrid - Bengaluru,0-4 Yrs,19-27.5 Lacs PA,Shift : 10:00AM to 7:00PM ISTBachelors / Maste...,
4,Data Scientist,Foreign IT Consulting MNC,Hybrid - Bengaluru,0-4 Yrs,19-27.5 Lacs PA,Shift : 10:00AM to 7:00PM ISTBachelors / Maste...,
5,,,Hybrid - Bengaluru,0-4 Yrs,19-27.5 Lacs PA,Shift : 10:00AM to 7:00PM ISTBachelors / Maste...,
6,,,Hybrid - Bengaluru,0-4 Yrs,19-27.5 Lacs PA,Shift : 10:00AM to 7:00PM ISTBachelors / Maste...,
7,,,Hybrid - Bengaluru,0-4 Yrs,19-27.5 Lacs PA,Shift : 10:00AM to 7:00PM ISTBachelors / Maste...,
8,Staff Data Scientist (AI/ML),Innovaccer,Hybrid - Bengaluru,0-4 Yrs,19-27.5 Lacs PA,Shift : 10:00AM to 7:00PM ISTBachelors / Maste...,"CRO, Health insurance, Enterprise applications..."
9,Data Scientist,Ericsson,Hybrid - Bengaluru,0-4 Yrs,19-27.5 Lacs PA,Shift : 10:00AM to 7:00PM ISTBachelors / Maste...,"Wireless, Computer science, Networking, Linux,..."


In [8]:
df.to_csv('Data Science_Jobs .csv', index=False,encoding='utf-8')

In [7]:
df.tail(20)

Unnamed: 0,Job Title,Company Name,Location,Experience,Salary,Job Description,Skills
7485,Business Analyst,PSRTEK,Bengaluru,1-3 Yrs,Not disclosed,The Skills that are Key to this role . Functio...,"Analytical skills, Data analysis, Business Ana..."
7486,Analyst,Transique Corporate Advisors,Bengaluru,1-3 Yrs,Not disclosed,The Skills that are Key to this role . Functio...,"Senior Analyst, Analysis, Senior"
7487,Data Engineer,Drivetrain,Bengaluru,1-3 Yrs,Not disclosed,The Skills that are Key to this role . Functio...,"SAN, Financial planning, Venture capital, Grow..."
7488,Data Analyst,Ganesh Housing Corporation Limited,Bengaluru,1-3 Yrs,Not disclosed,The Skills that are Key to this role . Functio...,"Real Estate, Data Analyst, Data Analytics, co-..."
7489,Data Visualisation Analyst,Pulsus Healthtech,Bengaluru,1-3 Yrs,Not disclosed,The Skills that are Key to this role . Functio...,"Data analysis, Automation, PDF, Data managemen..."
7490,Business Analyst - Transaction Analysts,Transaction Analysts,Bengaluru,1-3 Yrs,Not disclosed,The Skills that are Key to this role . Functio...,"Business Analyst, Business analysis, Project m..."
7491,Attack Surface Reduction Analyst,H&M,Bengaluru,1-3 Yrs,Not disclosed,The Skills that are Key to this role . Functio...,"Testing tools, Information security, Analytica..."
7492,Principal Analyst - FP&A,Anheuser Busch InBev,Bengaluru,1-3 Yrs,Not disclosed,The Skills that are Key to this role . Functio...,"Procurement, Supply chain, ERP, Data managemen..."
7493,Business Analyst,Gammastack,Bengaluru,1-3 Yrs,Not disclosed,The Skills that are Key to this role . Functio...,"Agile scrum, Business Analyst, Project managem..."
7494,Business Analyst,Caerus3 Advisors Think-Tank,Bengaluru,1-3 Yrs,Not disclosed,The Skills that are Key to this role . Functio...,"Enterprise risk management, Assurance, human c..."
