In [1]:
import requests
from bs4 import BeautifulSoup
import re
import pandas as pd

# Fetch the webpage
url = "https://www.failory.com/startups/india"
page = requests.get(url)

if page.status_code == 200:
    # Parse the webpage content
    html_code = page.text
    soup = BeautifulSoup(html_code, 'html.parser')

    # Extract the relevant sections
    sections = soup.find_all("ul", {"role": "list"})
    section_text = "".join(section.text for section in sections)

    # Extract company names
    company_names = [jk.text for jk in soup.find_all("h3")]
    
    # Extract cities using regex
    cities_pattern = r"\s([A-Za-z]+)Started"
    cities = [match.split('S')[0] for match in re.findall(cities_pattern, section_text)]

    # Extract starting years using regex
    starts_pattern = r"Started in: (\d+)"
    start_years = re.findall(starts_pattern, section_text)

    # Extract founders using regex
    founders_pattern = r"Founders: ([A-Za-z\s,]+)Industries:"
    founders = [match.strip() for match in re.findall(founders_pattern, section_text)]

    # Extract industries using regex
    industries_pattern = r"Industries: (.+?)Number of"
    industries = re.findall(industries_pattern, section_text)

    # Extract number of employees using regex
    employees_pattern = r"Number of employees: (\d+-\d+)"
    number_of_employees = re.findall(employees_pattern, section_text)

    # Extract funding amounts using regex
    funding_pattern = r"Funding: \$([\d,]+)"
    funding_amounts = re.findall(funding_pattern, section_text)

    # Extract funding rounds using regex
    funding_rounds_pattern = r"Funding rounds: (\d+)"
    funding_rounds = re.findall(funding_rounds_pattern, section_text)

    # Extract number of investors using regex
    investors_pattern = r"Number of investors: (\d+)"
    number_of_investors = re.findall(investors_pattern, section_text)

    # Create a DataFrame with the extracted data
    data = {
        "Company": company_names[:277],
        "Cities": cities[:277],
        "Year of Starting": start_years[:277],
        "Founders": founders[:277],
        "Industries": industries[:277],
        "Funding Amount": funding_amounts[:277],
        "Funding Rounds": funding_rounds[:277],
        "Number of Employees": number_of_employees[:277],
        "Number of Investors": number_of_investors[:277]
    }

    startup_df = pd.DataFrame(data)
    
    # Display the DataFrame
    print(startup_df.head())
else:
    print("Failed to retrieve the webpage.")

            Company     Cities Year of Starting  \
0  1) Urban Company    Gurgaon             2014   
1      2) Classplus      Noida             2018   
2          3) Paytm      Noida             2010   
3           4) Apna     Mumbai             2019   
4       5) Razorpay  Bengaluru             2014   

                                            Founders  \
0  Abhiraj Singh Bhal, Raghav Chandra, Varun Khaitan   
1  Bhaswat Agarwal, Bikash Dash, Mukul Rustagi, N...   
2                Akshay Khanna, Vijay Shekhar Sharma   
3                                      Nirmit Parikh   
4                     Harshil Mathur, Shashank Kumar   

                                          Industries Funding Amount  \
0  Apps, Home Services, Marketplace, Service Indu...    445,920,356   
1  B2B, E-Learning, EdTech, Education, Mobile App...     89,506,451   
2  E-Commerce, Finance, Financial Services, Inter...     32,448,851   
3  Employment, Human Resources, Recruiting, Staff...     93,450,000   
4

In [2]:
startup_df.to_csv("startupdata.csv")