# Import important libraries

In [1]:
import time
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By

# URL to scrape

In [2]:
URL = "https://www.instahyre.com/python-jobs"

# Initialize WebDriver

In [3]:
# Open the URL and wait for 5 seconds to ensure the page is fully loaded
driver = webdriver.Chrome()
driver.maximize_window()  # Maximize the browser window to full screen
driver.get(URL)
time.sleep(5)

# Extract Links from the Job Listings Page

In [4]:
job_links = []
page_count = 0
total_pages = 50  # Set the number of pages to scrape

# Scroll through the page and click on the next button to load more job listings
while page_count < total_pages:
    # Scroll to the bottom of the page to load new jobs
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
    time.sleep(3)
    
    try:
        # Find the next button and click it to load more jobs
        next_button = driver.find_element(By.XPATH, '//*[@id="job-function-page"]/div[2]/div/div[1]/div[1]/div[21]/li[12]')
        next_button.click()
        page_count += 1
        time.sleep(3)
        
        # Collect job listing links
        job_elements = driver.find_elements(By.XPATH, '//div[@class="opportunity-go col-xs-1"]/a[@id="employer-profile-opportunity"]')
        for element in job_elements:
            href_value = element.get_attribute('href')
            job_links.append(href_value)
    except:
        pass

driver.quit()
print(f"Total links collected: {len(job_links)}")

Total links collected: 1000


# Defining the Function to Extract Data from Each Link

In [5]:
# This function will extract the required job details from each job listing page
def extract_data(driver):
    try:
        # Extract job details from the page
        title = driver.find_element(By.XPATH, '/html/body/div[2]/div[1]/div[2]/div[2]/div/div/div[2]/div[1]/div[1]/h2').text
        job_position = driver.find_element(By.XPATH, '/html/body/div[2]/div[1]/div[2]/div[2]/div/div/div[2]/div[1]/div[1]/h1').text
        location = driver.find_element(By.XPATH, '/html/body/div[2]/div[1]/div[2]/div[2]/div/div/div[2]/div[1]/div[1]/div/span[1]').text
        founded = driver.find_element(By.XPATH, '/html/body/div[2]/div[1]/div[2]/div[2]/div/div/div[1]/div[2]/div[1]').text
        employees = driver.find_element(By.XPATH, '/html/body/div[2]/div[1]/div[2]/div[2]/div/div/div[1]/div[2]/div[2]').text
        about = driver.find_element(By.XPATH, '//*[@id="employer-summary"]/div[1]/div').text
        skills = driver.find_element(By.XPATH, '//*[@id="job-skills-description"]').text

        return {
            "Title": title,
            "Job Position": job_position,
            "Location": location,
            "Founded": founded,
            "Employees": employees,
            "About": about,
            "Skills": skills,
            "Link": driver.current_url
        }
    except:
        return None

# Initialize WebDriver and iterate through each job link to collect job details

In [6]:
driver = webdriver.Chrome()
driver.maximize_window()  # Maximize the browser window to full screen
extracted_data = []

for url in job_links:
    driver.get(url)
    time.sleep(3)  # Wait for the page to load
    data = extract_data(driver)
    if data:
        extracted_data.append(data)

driver.quit()
print("Data extraction completed")

Data extraction completed


# Convert to DataFrame and Save to CSV

In [7]:
# Convert to DataFrame
df = pd.DataFrame(extracted_data)
df

Unnamed: 0,Title,Job Position,Location,Founded,Employees,About,Skills,Link
0,Arcana,Senior Backend Engineer,"Bangalore, Coimbatore, Work From Home",Founded in 2022,10 - 50 employees,Arcana is on a mission to revolutionize the in...,Django\nPython,https://www.instahyre.com/job-300223-senior-ba...
1,AheadRace,Senior Software Developer,Mumbai,Founded in 2014,50 - 200 employees,"Better insight, faster response, and quicker e...",Python\nDjango\nFlask\nAWS\nData Structures,https://www.instahyre.com/job-329738-senior-so...
2,Nielsen,Data Engineer,Bangalore,Founded in 1923,More than 1000 employees,Nielsen Holdings plc (NYSE: NLSN) is a global ...,Python\nAmazon RDS\nPySpark\nAirflow\nCI - CD,https://www.instahyre.com/job-329760-data-engi...
3,NetSpring,Senior Fullstack Engineer,Work From Home,Founded in 2019,10 - 50 employees,NetSpring is a cloud application platform-as-a...,React.js\nPython\nNode.js\nJavaScript\nCSS,https://www.instahyre.com/job-326595-senior-fu...
4,AheadRace,Sr. DevOps Engineer,Mumbai,Founded in 2014,50 - 200 employees,"Better insight, faster response, and quicker e...",DevOps\nDocker\nKubernetes\nAWS\nPython,https://www.instahyre.com/job-327569-sr-devops...
...,...,...,...,...,...,...,...,...
992,Salarybox,Data Analyst,Gurgaon,Founded in 2020,0 - 10 employees,Salarybox is a YComibinator-backed startup fou...,SQL\nETL\nData Analysis\nPython\nPower BI,https://www.instahyre.com/job-323727-data-anal...
993,Tessell,Software Engineer - Postgres,Bangalore,Founded in 2021,10 - 50 employees,Tessell delivers highly differentiated DBaaS o...,Python\nPostgreSQL\nTerraform\nDistributed Sys...,https://www.instahyre.com/job-324586-software-...
994,Toplyne,Technical Lead,Bangalore,Founded in 2021,10 - 50 employees,We're building a platform that sits over most ...,Java\nPython\nData Structures\nMySQL\nLLD,https://www.instahyre.com/job-319654-technical...
995,DealShare.in,Backend Engineer,Pune,Founded in 2018,500 - 1000 employees,DealShare has a vision of enabling ecommerce f...,Java\nC++\nPython\nData Structures\nAlgorithms,https://www.instahyre.com/job-311943-backend-e...


In [8]:
# Save to CSV
df.to_csv('Instahyre_Project_Scraped_File.csv', index=False) 