# Create a script that can extract list of top 10 largest economies of the world in descending order of their GDPs in Billion USD

In [123]:
import pandas as pd
import numpy as np

In [124]:
URL="https://web.archive.org/web/20230902185326/https://en.wikipedia.org/wiki/List_of_countries_by_GDP_%28nominal%29"

### Extract tables from webpage using Pandas. Retain table number 3 as the required dataframe.

In [125]:
table = pd.read_html(URL)
df = table[3]
df.head()

Unnamed: 0_level_0,Country/Territory,UN region,IMF[1][13],IMF[1][13],World Bank[14],World Bank[14],United Nations[15],United Nations[15]
Unnamed: 0_level_1,Country/Territory,UN region,Estimate,Year,Estimate,Year,Estimate,Year
0,World,—,105568776,2023,100562011,2022,96698005,2021
1,United States,Americas,26854599,2023,25462700,2022,23315081,2021
2,China,Asia,19373586,[n 1]2023,17963171,[n 3]2022,17734131,[n 1]2021
3,Japan,Asia,4409738,2023,4231141,2022,4940878,2021
4,Germany,Europe,4308854,2023,4072192,2022,4259935,2021


### Replace the column headers with column numbers

In [126]:
df.columns = range(df.shape[1])
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7
0,World,—,105568776,2023,100562011,2022,96698005,2021
1,United States,Americas,26854599,2023,25462700,2022,23315081,2021
2,China,Asia,19373586,[n 1]2023,17963171,[n 3]2022,17734131,[n 1]2021
3,Japan,Asia,4409738,2023,4231141,2022,4940878,2021
4,Germany,Europe,4308854,2023,4072192,2022,4259935,2021


### Retain columns with index 0 and 2 (name of country and value of GDP quoted by IMF)

In [127]:
# a = df.iloc[0:, 0:3:2]
# a.head()
df = df[[0,2]]
df.head()

Unnamed: 0,0,2
0,World,105568776
1,United States,26854599
2,China,19373586
3,Japan,4409738
4,Germany,4308854


### Retain the Rows with index 1 to 10, indicating the top 10 economies of the world.


In [128]:
df = df.iloc[1:11,:]
df

Unnamed: 0,0,2
1,United States,26854599
2,China,19373586
3,Japan,4409738
4,Germany,4308854
5,India,3736882
6,United Kingdom,3158938
7,France,2923489
8,Italy,2169745
9,Canada,2089672
10,Brazil,2081235


### Assign column names as "Country" and "GDP (Million USD)"

In [129]:
df.columns = ['Country', 'GDP (Million USD)']
df

Unnamed: 0,Country,GDP (Million USD)
1,United States,26854599
2,China,19373586
3,Japan,4409738
4,Germany,4308854
5,India,3736882
6,United Kingdom,3158938
7,France,2923489
8,Italy,2169745
9,Canada,2089672
10,Brazil,2081235


### Rename the column header from 'GDP (Million USD)' to 'GDP (Billion USD)'


In [130]:
df = df.rename(columns = {'GDP (Million USD)': 'GDP (Billion USD)'})
df

Unnamed: 0,Country,GDP (Billion USD)
1,United States,26854599
2,China,19373586
3,Japan,4409738
4,Germany,4308854
5,India,3736882
6,United Kingdom,3158938
7,France,2923489
8,Italy,2169745
9,Canada,2089672
10,Brazil,2081235


### Change the data type of the 'GDP (Billion USD)' column to integer. Use astype() method.

In [131]:
df['GDP (Billion USD)'] = df['GDP (Billion USD)'].astype(int)
df

Unnamed: 0,Country,GDP (Billion USD)
1,United States,26854599
2,China,19373586
3,Japan,4409738
4,Germany,4308854
5,India,3736882
6,United Kingdom,3158938
7,France,2923489
8,Italy,2169745
9,Canada,2089672
10,Brazil,2081235


### Convert the GDP value in Million USD to Billion USD

In [132]:
df['GDP (Billion USD)'] = df['GDP (Billion USD)']/1000
df

Unnamed: 0,Country,GDP (Billion USD)
1,United States,26854.599
2,China,19373.586
3,Japan,4409.738
4,Germany,4308.854
5,India,3736.882
6,United Kingdom,3158.938
7,France,2923.489
8,Italy,2169.745
9,Canada,2089.672
10,Brazil,2081.235


### Use numpy.round() method to round the value to 2 decimal places.

In [133]:
df[['GDP (Billion USD)']] = np.round(df[['GDP (Billion USD)']], 2)
df

Unnamed: 0,Country,GDP (Billion USD)
1,United States,26854.6
2,China,19373.59
3,Japan,4409.74
4,Germany,4308.85
5,India,3736.88
6,United Kingdom,3158.94
7,France,2923.49
8,Italy,2169.74
9,Canada,2089.67
10,Brazil,2081.24


In [137]:
df.to_csv('./Largest_economies.csv')