# Import Libraries

In [1]:
import pandas as pd
from requests import get
from bs4 import BeautifulSoup
from pandas_datareader import wb

# Obtain Data

Get html for webpage with list of country codes for all nations and territories

In [2]:
response = get('https://www.iban.com/country-codes')
response

<Response [200]>

Parse html to extract country names/codes for all nations and territories

In [3]:
soup = BeautifulSoup(response.content, 'html.parser')
heading = soup.find('tr')
heading_cells = heading.find_all('th')
headers = []
for heading_cell in heading_cells:
    headers.append(heading_cell.text)
table = soup.find('tbody')
rows = table.find_all('tr')
data = []
for row in rows:
    row_data = []
    cells = row.find_all('td')
    for cell in cells:
        row_data.append(cell.text)
    data.append(row_data)
country_codes_df = pd.DataFrame(data, columns=headers)
country_codes_df

Unnamed: 0,Country,Alpha-2 code,Alpha-3 code,Numeric
0,Afghanistan,AF,AFG,004
1,Åland Islands,AX,ALA,248
2,Albania,AL,ALB,008
3,Algeria,DZ,DZA,012
4,American Samoa,AS,ASM,016
...,...,...,...,...
244,Wallis and Futuna,WF,WLF,876
245,Western Sahara,EH,ESH,732
246,Yemen,YE,YEM,887
247,Zambia,ZM,ZMB,894


Create list of country codes to scrape world bank world development indicators for

In [4]:
countries = list(country_codes_df['Alpha-3 code'].values)
countries.remove('ALA')
countries.remove('AIA')
countries.remove('ATA')
countries.remove('BES')
countries.remove('BVT')
countries.remove('IOT')
countries.remove('CXR')
countries.remove('CCK')
countries.remove('COK')
countries.remove('FLK')
countries.remove('GUF')
countries.remove('ATF')
countries.remove('GLP')
countries.remove('GGY')
countries.remove('HMD')
countries.remove('VAT')
countries.remove('JEY')
countries.remove('MTQ')
countries.remove('MYT')
countries.remove('MSR')
countries.remove('NIU')
countries.remove('NFK')
countries.remove('PCN')
countries.remove('REU')
countries.remove('BLM')
countries.remove('SHN')
countries.remove('SPM')
countries.remove('SGS')
countries.remove('SJM')
countries.remove('TWN')
countries.remove('TKL')
countries.remove('UMI')
countries.remove('WLF')
countries.remove('ESH')
countries[0:5]

['AFG', 'ALB', 'DZA', 'ASM', 'AND']

Search world bank id for gdp world development indicator

In [5]:
gdp_search = wb.search('GDP')
gdp_search[gdp_search['source']=='World Development Indicators'].loc[10501]

id                                                       NY.GDP.MKTP.CD
name                                                  GDP (current US$)
unit                                                                   
source                                     World Development Indicators
sourceNote            GDP at purchaser's prices is the sum of gross ...
sourceOrganization    b'World Bank national accounts data, and OECD ...
topics                                                 Economy & Growth
Name: 10501, dtype: object

Fetch gdp data by country from world development indicators database

In [6]:
gdp_df = wb.download(indicator='NY.GDP.MKTP.CD', start=2019, end=2019, country=countries)
gdp_df

Unnamed: 0_level_0,Unnamed: 1_level_0,NY.GDP.MKTP.CD
country,year,Unnamed: 2_level_1
Aruba,2019,3.310056e+09
Afghanistan,2019,1.879945e+10
Angola,2019,6.930910e+10
Albania,2019,1.540183e+10
Andorra,2019,3.155149e+09
...,...,...
Samoa,2019,8.520071e+08
"Yemen, Rep.",2019,2.188761e+10
South Africa,2019,3.879346e+11
Zambia,2019,2.330867e+10


Search world bank id for co2 emission gdp world development indicator

In [7]:
pollution_search = wb.search('co2')
pollution_search[pollution_search['source']=='World Development Indicators'].loc[5999]

id                                                       EN.ATM.CO2E.KT
name                                                 CO2 emissions (kt)
unit                                                                   
source                                     World Development Indicators
sourceNote            Carbon dioxide emissions are those stemming fr...
sourceOrganization    b'Climate Watch. 2020. GHG Emissions. Washingt...
topics                                    Climate Change ; Environment 
Name: 5999, dtype: object

Fetch co2 emissions data by country from world developmentindicators database

In [8]:
pollution_df = wb.download(indicator='EN.ATM.CO2E.KT', start=2019, end=2019, country=countries)
pollution_df

Unnamed: 0_level_0,Unnamed: 1_level_0,EN.ATM.CO2E.KT
country,year,Unnamed: 2_level_1
Aruba,2019,
Afghanistan,2019,6079.999924
Angola,2019,25209.999084
Albania,2019,4829.999924
Andorra,2019,500.000000
...,...,...
Samoa,2019,300.000012
"Yemen, Rep.",2019,11100.000381
South Africa,2019,439640.014648
Zambia,2019,6800.000191


Merge dfs with co2 data and gdp data

In [9]:
df = pd.merge(gdp_df, pollution_df, on=['country', 'year'])
df.reset_index(inplace=True)
df

Unnamed: 0,country,year,NY.GDP.MKTP.CD,EN.ATM.CO2E.KT
0,Aruba,2019,3.310056e+09,
1,Afghanistan,2019,1.879945e+10,6079.999924
2,Angola,2019,6.930910e+10,25209.999084
3,Albania,2019,1.540183e+10,4829.999924
4,Andorra,2019,3.155149e+09,500.000000
...,...,...,...,...
210,Samoa,2019,8.520071e+08,300.000012
211,"Yemen, Rep.",2019,2.188761e+10,11100.000381
212,South Africa,2019,3.879346e+11,439640.014648
213,Zambia,2019,2.330867e+10,6800.000191


Save df to csv file

In [10]:
df.to_csv('data/climate_data.csv')