In [2]:
import warnings
warnings.filterwarnings("ignore")

In [3]:
from pandas_datareader import wb
import pandas as pd

#Indicators: GDP, GDP per capita, access to electricity, population, CO2 emissions
indicators = ["NY.GDP.MKTP.CD", "NY.GDP.PCAP.CD", "EG.ELC.ACCS.ZS", "SP.POP.TOTL", "EN.ATM.CO2E.KT"]

#ISO Code of countries
#Australia, Bhutan, Germany, France, Indonesia, India, Japan, Korea, Netherlands, Nepal, Russia, South Africa 
countries = ["AUS", "BTN", "DEU", "FRA", "IDN", "IND", "JPN", "KOR", "NPL", "NLD", "RUS", "ZAF"]


df = wb.download(country = countries,
                 indicator = indicators,
                 start = 2018, 
                 end = 2018)

df.rename({"NY.GDP.MKTP.CD":"GDP",
           "NY.GDP.PCAP.CD":"GDP per capita",
           "EG.ELC.ACCS.ZS":"Access to electricity",
           "SP.POP.TOTL": "Population",
           "EN.ATM.CO2E.KT": "kt CO2"}, 
         axis = 1, inplace = True)

df.index = df.index.get_level_values(0)

df

Unnamed: 0_level_0,GDP,GDP per capita,Access to electricity,Population,kt CO2
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Australia,1428289000000.0,57207.871509,100.0,24966643,387070.0
Bhutan,2446866000.0,3210.706269,99.974365,762096,1050.0
Germany,3974443000000.0,47939.278288,100.0,82905782,707700.0
France,2790957000000.0,41557.854859,100.0,67158348,307050.0
Indonesia,1042272000000.0,3902.661668,98.510002,267066843,576990.0
India,2702930000000.0,1974.377788,95.699997,1369003306,2451930.0
Japan,5037835000000.0,39727.1166,100.0,126811000,1116150.0
"Korea, Rep.",1724846000000.0,33436.923065,100.0,51585058,630640.0
Netherlands,914043400000.0,53044.532435,100.0,17231624,151360.0
Nepal,33111530000.0,1161.534374,93.919998,28506712,15190.0


In [4]:
df_target = pd.DataFrame({"countries":["Bhutan", "Germany", "Japan", "Nepal", "Netherlands"]})
df_target

Unnamed: 0,countries
0,Bhutan
1,Germany
2,Japan
3,Nepal
4,Netherlands


### Step 1: Create empty columns for the desired indicators

First, we create a dataframe called df_target where we want to have the desired subset data. Initially, the dataframe comprises the names of the five desired countries. And then we create empty columns for the indicators- CO2 emissions (tonnes) and Population that we want to return from df.

In [5]:
df_target["CO2 emissions (tonnes)"] = ""
df_target["Population"] = ""
df_target

Unnamed: 0,countries,CO2 emissions (tonnes),Population
0,Bhutan,,
1,Germany,,
2,Japan,,
3,Nepal,,
4,Netherlands,,


### Step 2: Set the column common with ```df``` in ```df_target``` as index

The "countries" column in df_target is common with the "country" column of df. Therefore, second, we set the "countries" column in df_target as the index.


In [6]:
df_target.set_index("countries", inplace = True)
df_target

Unnamed: 0_level_0,CO2 emissions (tonnes),Population
countries,Unnamed: 1_level_1,Unnamed: 2_level_1
Bhutan,,
Germany,,
Japan,,
Nepal,,
Netherlands,,


### Step 3: Mapping

Third - this is the main step - we map the index in df_target against df to get the data for required columns as output. For example, the values in kt CO2 column of df multiplied by 1000 is returned for the CO2 emissions (tonnes) column of df_target. The map() function maps the value of Series according to input correspondence and is used for substituting each value in a Series with another value, that could be extracted from a function, a dictionary or a Series.

In [7]:
df_target["CO2 emissions (tonnes)"] = df_target.index.map(df["kt CO2"]) * 1000
df_target["Population"]             = df_target.index.map(df["Population"])
df_target["t CO2/capita "]          = df_target["CO2 emissions (tonnes)"] / df_target["Population"]
df_target

Unnamed: 0_level_0,CO2 emissions (tonnes),Population,t CO2/capita
countries,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Bhutan,1050000.0,762096,1.377779
Germany,707700000.0,82905782,8.536196
Japan,1116150000.0,126811000,8.801681
Nepal,15190000.0,28506712,0.532857
Netherlands,151360000.0,17231624,8.78385


In [8]:
df.columns

Index(['GDP', 'GDP per capita', 'Access to electricity', 'Population',
       'kt CO2'],
      dtype='object')

In [9]:
df.loc[["Australia","Bhutan","Germany"],["kt CO2","Population"]]

Unnamed: 0_level_0,kt CO2,Population
country,Unnamed: 1_level_1,Unnamed: 2_level_1
Australia,387070.007324,24966643
Bhutan,1049.999952,762096
Germany,707700.012207,82905782
