Website = https://www.fortuneindia.com/rankings/fortune-500/2024

In [1]:
import numpy as np
import pandas as pd

import requests
import bs4
from bs4 import BeautifulSoup

import warnings
warnings.filterwarnings("ignore")

In [2]:
# Step 1: Send a request to the website
url = "https://www.fortuneindia.com/rankings/fortune-500/2024"

In [3]:
response = requests.get(url)
print(response.raise_for_status())
print(response.status_code)

None
200


In [4]:
# Step 2: Parse the HTML
html_data = response.text
soup = BeautifulSoup(html_data, "html.parser")

In [5]:
soup

<!DOCTYPE html>

<html class="" lang="en">
<head>
<link crossorigin="" href="https://assets.fortuneindia.com" rel="preconnect"/>
<link crossorigin="" href="https://media.fortuneindia.com" rel="preconnect"/>
<meta content="@FortuneIndia" name="twitter:site"/>
<meta content="https://www.fortuneindia.com" name="twitter:domain"/>
<meta content="1113250065862572" property="fb:app_id"/>
<meta content="Fortune India" property="og:site_name"/>
<meta content="Fortune India 500 2024: This Year's Top Performing Companies - Fortune India" property="og:title"/>
<meta content="https://media.fortuneindia.com/fortune-india/2025-07-03/rx21zgga/Fortune500Desktop.png?auto=format,compress&amp;format=webp&amp;w=1200&amp;h=675&amp;dpr=1.0&amp;q=90&amp;fit=cover" property="og:image"/>
<meta content="1200" property="og:height"/>
<meta content="675" property="og:width"/>
<meta content="https://media.fortuneindia.com/fortune-india/2025-07-03/rx21zgga/Fortune500Desktop.png?auto=format,compress&amp;format=webp&am

In [6]:
table = soup.find("table")

----

In [10]:
# ---- Get table headers ----
headers_row = soup.find("thead").find_all("th")
headers = [h.get_text(strip=True) for h in headers_row]

# First header is Rank, second is Company (we will split into Company + Owner)
headers = ["Rank", "Company", "Owner"] + headers[2:]


In [11]:
headers

['Rank',
 'Company',
 'Owner',
 'TOTAL INCOME \n(INR Cr)',
 'NET OPERATING INCOME \n(INR Cr)',
 'PROFIT \n(INR Cr)',
 'Profit as % \nof Revenue',
 'INTEREST COST / AS % OF \nEBITDA',
 'cash & bank balance \n(INR Cr)',
 'total debt \n(INR Cr)',
 'debt-equity \nratio',
 'TOTAL ASSETS \n(INR Cr)',
 'NET WORTH \n(INR Cr)',
 'RONW%',
 'ROCE%',
 'EMPLOYEES \n(IN NOS.)',
 'T12M AVG \nM-CAP \n(INR Cr)',
 'TSR%']

---

In [7]:
# Extract table rows
rows = soup.find("tbody").find_all("tr")

data = []

for row in rows:
    cols = row.find_all("td")
    row_data = []

    # Rank
    rank = cols[0].get_text(strip=True).split("\n")[0]  
    row_data.append(rank)

    # Company and Ownership (span)
    company = cols[1].find("a").get_text(strip=True) if cols[1].find("a") else None
    ownership = cols[1].find("span").get_text(strip=True) if cols[1].find("span") else None
    row_data.extend([company, ownership])

    # Remaining columns (ignore span values)
    for c in cols[2:]:
        text = c.get_text(" ", strip=True)
        # Remove span content (numbers with %)
        if c.find("span"):
            span_text = c.find("span").get_text(strip=True)
            text = text.replace(span_text, "").strip()
        row_data.append(text)

    data.append(row_data)



In [16]:
df = pd.DataFrame(data, columns=headers)

In [17]:
df

Unnamed: 0,Rank,Company,Owner,TOTAL INCOME \n(INR Cr),NET OPERATING INCOME \n(INR Cr),PROFIT \n(INR Cr),Profit as % \nof Revenue,INTEREST COST / AS % OF \nEBITDA,cash & bank balance \n(INR Cr),total debt \n(INR Cr),debt-equity \nratio,TOTAL ASSETS \n(INR Cr),NET WORTH \n(INR Cr),RONW%,ROCE%,EMPLOYEES \n(IN NOS.),T12M AVG \nM-CAP \n(INR Cr),TSR%
0,10,Reliance Industries,Mukesh Ambani,922391,901064,69621,7.6%,23118,97225,458991,0.58,1396970,793481,10.5%,10.5%,347362,1921959,7.7%
1,2+1,LIC,Govt of India,860795,845966,40916,4.8%,128,41824,0,NAP,5252252,82937,63.3%,1%,98661,610044,48.8%
2,3-1,Indian Oil Corporation,Govt of India,780509,776352,41730,5.4%,7881,3159,132628,0.72,326519,183416,26.7%,21.2%,30321,225021,35.9%
3,40,ONGC,Govt of India,610642,591447,49221,8.1%,10194,36690,153181,0.45,570448,337070,18.4%,17.3%,15804,339534,33.5%
4,5+1,State Bank of India,Govt of India,594575,439189,67085,11.3%,259736,326572,5606147,NAP,6746909,387384,19%,1.1%,232296,681659,46.8%
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
495,496R-E ‘22,Cochin Shipyard,Govt of India,4140,3830,783,18.9%,46,3864,502,0.1,5551,5003,16.6%,21.1%,805,37837,140.1%
496,497-30,Strides Pharma Science,Arun Kumar,4130,4051,-71,NAP,314,191,2517,1.18,4694,2126,NAP,0%,2364,8910,179.6%
497,498-6,Route Mobile,S.K. & R.K. Gupta,4129,4023,375,9.1%,30,757,378,0.18,2611,2150,19.6%,20.7%,492,9921,-9.6%
498,499NEW,Gandhar Oil Refinery India,Ramesh Babulal Parekh & family,4126,4113,141,3.4%,59,277,271,0.23,1499,1172,17.3%,22.3%,381,2240,-30.9%


In [18]:
# Save to CSV
df.to_csv("fortune500_india.csv", index=False)

----

In [23]:
df = pd.read_csv(r"fortune500_india.csv")

In [24]:
df

Unnamed: 0,Rank,Company,Owner,TOTAL INCOME \n(INR Cr),NET OPERATING INCOME \n(INR Cr),PROFIT \n(INR Cr),Profit as % \nof Revenue,INTEREST COST / AS % OF \nEBITDA,cash & bank balance \n(INR Cr),total debt \n(INR Cr),debt-equity \nratio,TOTAL ASSETS \n(INR Cr),NET WORTH \n(INR Cr),RONW%,ROCE%,EMPLOYEES \n(IN NOS.),T12M AVG \nM-CAP \n(INR Cr),TSR%
0,10,Reliance Industries,Mukesh Ambani,922391,901064,69621,7.6%,23118,97225,458991,0.58,1396970,793481,10.5%,10.5%,347362,1921959,7.7%
1,2+1,LIC,Govt of India,860795,845966,40916,4.8%,128,41824,0,NAP,5252252,82937,63.3%,1%,98661,610044,48.8%
2,3-1,Indian Oil Corporation,Govt of India,780509,776352,41730,5.4%,7881,3159,132628,0.72,326519,183416,26.7%,21.2%,30321,225021,35.9%
3,40,ONGC,Govt of India,610642,591447,49221,8.1%,10194,36690,153181,0.45,570448,337070,18.4%,17.3%,15804,339534,33.5%
4,5+1,State Bank of India,Govt of India,594575,439189,67085,11.3%,259736,326572,5606147,NAP,6746909,387384,19%,1.1%,232296,681659,46.8%
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
495,496R-E ‘22,Cochin Shipyard,Govt of India,4140,3830,783,18.9%,46,3864,502,0.1,5551,5003,16.6%,21.1%,805,37837,140.1%
496,497-30,Strides Pharma Science,Arun Kumar,4130,4051,-71,NAP,314,191,2517,1.18,4694,2126,NAP,0%,2364,8910,179.6%
497,498-6,Route Mobile,S.K. & R.K. Gupta,4129,4023,375,9.1%,30,757,378,0.18,2611,2150,19.6%,20.7%,492,9921,-9.6%
498,499NEW,Gandhar Oil Refinery India,Ramesh Babulal Parekh & family,4126,4113,141,3.4%,59,277,271,0.23,1499,1172,17.3%,22.3%,381,2240,-30.9%
