# Web Scrapping Project 
In this webscrapping project i am going to scrap the website [ambitionbox](https://www.ambitionbox.com/list-of-companies). and scrapping some informations such as `company name`, `rating`, `company type`, `total employees`, `reviews` and so on.

In this project i am going to use BeautifulSoup and requests library to scrap the website and then using pandas i will transform these data into a dataframe and later save it as a csv file.

In [1]:
import pandas as pd
import requests
from bs4 import BeautifulSoup

def scrape_page(page_number):
    url = f'https://www.ambitionbox.com/list-of-companies?page={page_number}'
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; Win 64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.162 Safari/537.36'}
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.text, 'html.parser')

    company_names = []
    ratings = []
    area = []
    employees = []
    public_private = []
    reviews = []
    jobs = []

    div = soup.find_all('div', class_='companyCardWrapper')

    for element in div:
        company_name = element.find('h2').text.strip()
        company_names.append(company_name)
        ratings.append(element.find('span', class_='companyCardWrapper__companyRatingValue').text)
        company_details = element.find('span', class_='companyCardWrapper__interLinking').text.strip().split('|')[:3]
        area.append(company_details[0])
        employees.append(company_details[1])
        public_private.append(company_details[2])
        reviews.append(element.find_all('span', class_='companyCardWrapper__ActionCount')[0].text)
        jobs.append(element.find_all('span', class_='companyCardWrapper__ActionCount')[3].text)

    return pd.DataFrame({
        'company_names': company_names,
        'ratings': ratings,
        'area': area,
        'employees': employees,
        'public_private': public_private,
        'reviews': reviews,
        'jobs': jobs
    })

# Scraping multiple pages
num_pages = 10  
all_data = []

for page_number in range(1, num_pages + 1):
    page_data = scrape_page(page_number)
    all_data.append(page_data)

result_df = pd.concat(all_data, ignore_index=True)

In [2]:
result_df

Unnamed: 0,company_names,ratings,area,employees,public_private,reviews,jobs
0,TCS,3.8,IT Services & Consulting,1 Lakh+ Employees,Public,68.5k,532
1,Accenture,4.0,IT Services & Consulting,1 Lakh+ Employees,Public,43.4k,4.2k
2,Cognizant,3.9,IT Services & Consulting,1 Lakh+ Employees,Forbes Global 2000,39k,599
3,Wipro,3.8,IT Services & Consulting,1 Lakh+ Employees,Public,36.1k,364
4,ICICI Bank,4.0,Banking,1 Lakh+ Employees,Public,31.4k,222
...,...,...,...,...,...,...,...
195,Wipro Infotech,4.1,IT Services & Consulting,1 Lakh+ Employees (Global),MNC,2.4k,--
196,Access Healthcare,3.9,IT Services & Consulting,5k-10k Employees,12 years old,2.4k,37
197,Vishal Mega Mart,3.8,Retail,10k-50k Employees,15 years old,2.4k,10
198,ITC Infotech,3.5,IT Services & Consulting,10k-50k Employees,23 years old,2.4k,93


In [4]:
result_df.to_csv(r'C:\Users\LENOVO\Desktop\webscrapping\Ambition_box.csv', index=False)