In [1]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
import pandas as pd
import time

options = Options()
options.add_argument("--no-sandbox")
options.add_argument("--disable-dev-shm-usage")
options.add_argument("--disable-blink-features=AutomationControlled")
options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36")


driver = webdriver.Chrome(options=options)


company, industry, headq = [], [], []
reviews, jobs, salaries, benefits, interviews = [], [], [], [], []


for i in range(1, 27):
    url = f"https://www.ambitionbox.com/list-of-companies?campaign=desktop_nav&page={i}"
    print(f"\nRunning Page: {i}")
    driver.get(url)

    #using this to wait for content to load and skip page if it doesnt
    try:
        WebDriverWait(driver, 20).until(
            EC.presence_of_all_elements_located((By.CLASS_NAME, "companyCardWrapper"))
        )
    except:
        print(f"Timed out waiting for page {i} to load.")
        continue

    #scroll page
    last_height = driver.execute_script("return document.body.scrollHeight")
    for _ in range(10):
        driver.execute_script("window.scrollBy(0, 1000);")
        time.sleep(1)
        new_height = driver.execute_script("return document.body.scrollHeight")
        if new_height == last_height:
            break
        last_height = new_height


    soup = BeautifulSoup(driver.page_source, "html.parser")
    cards = soup.find_all("div", {"class": "companyCardWrapper"})
    print(f"Found {len(cards)} company cards.")

    for card in cards:
        name_tag = card.find("h2", class_="companyCardWrapper__companyName")
        company.append(name_tag.text.strip() if name_tag else "n/a")
    
        industry_location_tag = card.find("span", class_="companyCardWrapper__interLinking")
        if industry_location_tag:
            parts = industry_location_tag.text.strip().split(" |")
            industry.append(parts[0] if len(parts) > 0 else "n/a")
            headq.append(parts[1].split(" +")[0] if len(parts) > 1 else "n/a")
        else:
            industry.append("n/a")
            headq.append("n/a")

        review_tag = card.find("span", class_="companyCardWrapper__companyRatingCount")
        reviews.append(review_tag.text.strip() if review_tag else "n/a")
    
        action_counts = card.find_all("span", class_="companyCardWrapper__ActionCount")
    
        salaries.append(action_counts[1].text.strip() if len(action_counts) > 1 else "n/a")
        interviews.append(action_counts[2].text.strip() if len(action_counts) > 2 else "n/a")
        jobs.append(action_counts[3].text.strip() if len(action_counts) > 3 else "n/a")
        benefits.append(action_counts[4].text.strip() if len(action_counts) > 4 else "n/a")
        

driver.quit()
data = {
    "Company": company,
    "Industry": industry,
    "Headquarters": headq,
    "Reviews": reviews,
    "Jobs": jobs,
    "Salaries": salaries,
    "Benefits": benefits,
    "Interviews": interviews,
}

df = pd.DataFrame(data)


Running Page: 1
Found 20 company cards.

Running Page: 2
Found 20 company cards.

Running Page: 3
Found 20 company cards.

Running Page: 4
Found 20 company cards.

Running Page: 5
Found 20 company cards.

Running Page: 6
Found 20 company cards.

Running Page: 7
Found 20 company cards.

Running Page: 8
Found 20 company cards.

Running Page: 9
Found 20 company cards.

Running Page: 10
Found 20 company cards.

Running Page: 11
Found 20 company cards.

Running Page: 12
Found 20 company cards.

Running Page: 13
Found 20 company cards.

Running Page: 14
Found 20 company cards.

Running Page: 15
Found 20 company cards.

Running Page: 16
Found 20 company cards.

Running Page: 17
Found 20 company cards.

Running Page: 18
Found 20 company cards.

Running Page: 19
Found 20 company cards.

Running Page: 20
Found 20 company cards.

Running Page: 21
Found 20 company cards.

Running Page: 22
Found 20 company cards.

Running Page: 23
Found 20 company cards.

Running Page: 24
Found 20 company cards.



In [3]:
df

Unnamed: 0,Company,Industry,Headquarters,Reviews,Jobs,Salaries,Benefits,Interviews
0,TCS,IT Services & Consulting,Bangalore / Bengaluru,(95.7k),247,9L,11.9k,10.9k
1,Accenture,IT Services & Consulting,Bangalore / Bengaluru,(61.4k),10.1k,6L,7.4k,8.5k
2,Wipro,IT Services & Consulting,Bangalore / Bengaluru,(56.1k),4k,4.5L,5.3k,5.9k
3,Cognizant,IT Services & Consulting,Hyderabad / Secunderabad,(53.3k),273,5.8L,6.1k,5.8k
4,Capgemini,IT Services & Consulting,Bangalore / Bengaluru,(44.9k),1.4k,4.4L,4.2k,5k
...,...,...,...,...,...,...,...,...
515,Angel One,FinTech,Mumbai,(1.6k),25,6.8k,169,143
516,TVS Sundram Fasteners,Auto Components,Chennai,(1.6k),1,6.4k,217,74
517,Vedantu,EdTech,Bangalore / Bengaluru,(1.6k),3,9.1k,163,187
518,Torrent Power,Power,Ahmedabad,(1.6k),45,6.9k,170,132


In [5]:
df.to_csv("ambitionbox_data.csv", index=False)