In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd


In [2]:
url = 'https://en.wikipedia.org/wiki/List_of_countries_by_GDP_(nominal)'
response = requests.get(url)

if response.status_code == 200:
    print("Successfully fetched the web page")
else:
    print("Failed to fetch the web page")


Successfully fetched the web page


In [3]:
soup = BeautifulSoup(response.content, 'html.parser')

In [5]:
table = soup.find('table', {'class': 'wikitable'})

In [6]:
# Extract table headers considering colspan
headers = []
for header in table.find_all('tr')[0].find_all('th'):
    text = header.get_text(strip=True)
    colspan = int(header.get('colspan', '1'))
    if colspan > 1:
        for i in range(colspan):
            headers.append(f"{text} {i+1}")
    else:
        headers.append(text)

In [7]:
# Extract table rows
rows = []
for row in table.find_all('tr')[1:]:  # Skip the header row
    cells = row.find_all(['td', 'th'])
    cells = [cell.get_text(strip=True) for cell in cells]
    rows.append(cells)

In [8]:
# Adjust rows if their length doesn't match header length
for i in range(len(rows)):
    if len(rows[i]) < len(headers):
        rows[i] += [''] * (len(headers) - len(rows[i]))
    elif len(rows[i]) > len(headers):
        rows[i] = rows[i][:len(headers)]

df = pd.DataFrame(rows, columns=headers)

In [9]:
print(df.head(10))  # Display first 10 rows for inspection

  Country/Territory UN region IMF[1][13] 1 IMF[1][13] 2 World Bank[14] 1  \
0          Forecast      Year     Estimate         Year         Estimate   
1             World         —  109,529,216         2024      100,562,011   
2     United States  Americas   28,781,083         2024       25,462,700   
3             China      Asia   18,532,633    [n 1]2024       17,963,171   
4           Germany    Europe    4,591,100         2024        4,072,192   
5             Japan      Asia    4,110,452         2024        4,231,141   
6             India      Asia    3,937,011         2024        3,385,090   
7    United Kingdom    Europe    3,495,261         2024        3,070,668   
8            France    Europe    3,130,014         2024        2,782,905   
9            Brazil  Americas    2,331,391         2024        1,920,096   

  World Bank[14] 2 United Nations[15] 1 United Nations[15] 2  
0             Year                                            
1             2022           96,698,0

In [10]:
df.to_csv('countries_gdp.csv', index=False)