In [94]:
import numpy as np
import pandas as pd
import requests
from io import StringIO

In [95]:
# Extract the required GDP data from the given URL using web scraping
URL="https://web.archive.org/web/20230902185326/https://en.wikipedia.org/wiki/List_of_countries_by_GDP_%28nominal%29"

In [96]:
# Extract table from webpage using pandas

# 1 Send a GET request to the URL
response = requests.get(URL, verify=False)

# 2 Find all tables on the page
html_string = response.text
table = pd.read_html(StringIO(html_string), attrs={'class': 'wikitable sortable static-row-numbers plainrowheaders srn-white-background'})

# 3 Convert list to dataframe
df = table[0].copy()

# 4 Replace the column headers with column numbers
df.columns = range(df.shape[1])
#print(df)

# 5 Retain the rows with index 1 to 10, indicating the top 10 economies of the world
top_10_economies = df.iloc[0:11]

# 6 Retain the columns with index 0 and 2 (name of country and value of GDP quoted by IMF)
top_10_economies_final = top_10_economies.iloc[:, [0, 2]]

# 7 Assign column names as Country and GDP (Million USD)
top_10_economies_final.columns = ['Country', 'GDP (Million USD)']
print(top_10_economies_final)




           Country GDP (Million USD)
0            World         105568776
1    United States          26854599
2            China          19373586
3            Japan           4409738
4          Germany           4308854
5            India           3736882
6   United Kingdom           3158938
7           France           2923489
8            Italy           2169745
9           Canada           2089672
10          Brazil           2081235


In [97]:
# 1 Modify the GDP column of the DataFrame, converting the value available in Million USD to Billion USD 
top_10_economies_final.iloc[:,1] = pd.to_numeric(top_10_economies_final['GDP (Million USD)'], errors='coerce')
top_10_economies_final.iloc[:,1] = top_10_economies_final.iloc[:,1].fillna(0)
top_10_economies_final.iloc[:,1] = top_10_economies_final.iloc[:,1].astype(int)

# 2 Convert the GDP value in Million USD to Billion USD
def convert_mill_to_bill(gdp):
    return gdp / 1000

top_10_economies_final.iloc[:,1] = top_10_economies_final.iloc[:,1].apply(convert_mill_to_bill)

# 3 Rename the column header to GDP (Billion USD)
top_10_economies_final = top_10_economies_final.copy()
top_10_economies_final.rename(columns={'GDP (Million USD)': 'GDP (Billion USD)'}, inplace=True)

print('type of top 10 economies', type(top_10_economies_final))


type of top 10 economies <class 'pandas.core.frame.DataFrame'>


  top_10_economies_final.iloc[:,1] = top_10_economies_final.iloc[:,1].fillna(0)


In [99]:
# 1 Save the df to the csv file named largest-economies.csv
top_10_economies_final.to_csv('largest-economies.csv')