In [4]:
import wbgapi as wb
import wbdata
import pandas as pd
from countrycode import countrycode
import numpy as np
from currency_converter import CurrencyConverter

In [5]:
# list of countries that need GDP data for 
countries = pd.read_csv('data/export_countries.csv', index_col=0, header = 1, names = ['Country'])

In [6]:
countries[10:30]

Unnamed: 0,Country
11,Belarus
12,Belgium
13,Belize
14,Bermuda
15,Bolivia
16,"Bonaire, Sint Eustatius and Saba"
17,Brazil
18,Bulgaria
19,Burkina Faso
20,Cabo Verde


## Explore worldbank api

In [7]:
# wb.source.info()
# wb.series.info() 
wb.series.info('NY.GDP.PCAP.CD') 
wb.economy.info(['CAN', 'USA', 'MEX']) 

id,value,region,incomeLevel
CAN,Canada,NAC,HIC
MEX,Mexico,LCN,UMC
USA,United States,NAC,HIC
,3 elements,,


## GDP DATA

In [8]:
gdp_indicator = 'NY.GDP.MKTP.CD'
country_code = ['PRK', 'TWN']

In [9]:
# Assuming `countries` is a DataFrame or a list of country names
country_names = countries.iloc[:, 0].to_list()
country_names[:20]

['Albania',
 'Antigua and Barbuda',
 'Argentina',
 'Armenia',
 'Aruba',
 'Australia',
 'Austria',
 'Bahamas',
 'Bahrain',
 'Barbados',
 'Belarus',
 'Belgium',
 'Belize',
 'Bermuda',
 'Bolivia',
 'Bonaire, Sint Eustatius and Saba',
 'Brazil',
 'Bulgaria',
 'Burkina Faso',
 'Cabo Verde']

In [10]:
# using world bank API, match country names. WBAPI will match on several types of country names that reuquire GDP data For
data = wb.economy.coder(country_names)
data.items()

dict_items([('Albania', 'ALB'), ('Antigua and Barbuda', 'ATG'), ('Argentina', 'ARG'), ('Armenia', 'ARM'), ('Aruba', 'ABW'), ('Australia', 'AUS'), ('Austria', 'AUT'), ('Bahamas', 'BHS'), ('Bahrain', 'BHR'), ('Barbados', 'BRB'), ('Belarus', 'BLR'), ('Belgium', 'BEL'), ('Belize', 'BLZ'), ('Bermuda', 'BMU'), ('Bolivia', 'BOL'), ('Bonaire, Sint Eustatius and Saba', None), ('Brazil', 'BRA'), ('Bulgaria', 'BGR'), ('Burkina Faso', 'BFA'), ('Cabo Verde', 'CPV'), ('Cambodia', 'KHM'), ('Cayman Islands', 'CYM'), ('Central African Republic', 'CAF'), ('Chile', 'CHL'), ('China', 'CHN'), ('Christmas Island', None), ('Cocos', None), ('Colombia', 'COL'), ('Congo, Democratic Republic of the', 'COD'), ('Costa Rica', 'CRI'), ('Cuba', 'CUB'), ('Cyprus', 'CYP'), ('Czechia', 'CZE'), ('Czechoslovakia', None), ("Côte d'Ivoire", 'CIV'), ('Denmark', 'DNK'), ('Djibouti', 'DJI'), ('Dominica', 'DMA'), ('Dominican Republic', 'DOM'), ('East Germany', 'DEU'), ('Ecuador', 'ECU'), ('Egypt', 'EGY'), ('Estonia', 'EST'), ('

In [39]:
# all countries and regions in wb api

wb_countries = wb.economy.info()
type(wb_countries)
wb_countries

id,value,region,incomeLevel
ABW,Aruba,LCN,HIC
AFE,Africa Eastern and Southern,,
AFG,Afghanistan,SAS,LIC
AFW,Africa Western and Central,,
AGO,Angola,SSF,LMC
ALB,Albania,ECS,UMC
AND,Andorra,ECS,HIC
ARB,Arab World,,
ARE,United Arab Emirates,MEA,HIC
ARG,Argentina,LCN,UMC


In [40]:
print(dir(data))

['__class__', '__class_getitem__', '__contains__', '__delattr__', '__delitem__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__getitem__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__ior__', '__iter__', '__le__', '__len__', '__lt__', '__module__', '__ne__', '__new__', '__or__', '__reduce__', '__reduce_ex__', '__repr__', '__reversed__', '__ror__', '__setattr__', '__setitem__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_coder_report', '_repr_html_', 'clear', 'copy', 'fromkeys', 'get', 'items', 'keys', 'pop', 'popitem', 'setdefault', 'update', 'values']


In [41]:
data.keys()

dict_keys(['Albania', 'Antigua and Barbuda', 'Argentina', 'Armenia', 'Aruba', 'Australia', 'Austria', 'Bahamas', 'Bahrain', 'Barbados', 'Belarus', 'Belgium', 'Belize', 'Bermuda', 'Bolivia', 'Bonaire, Sint Eustatius and Saba', 'Brazil', 'Bulgaria', 'Burkina Faso', 'Cabo Verde', 'Cambodia', 'Cayman Islands', 'Central African Republic', 'Chile', 'China', 'Christmas Island', 'Cocos', 'Colombia', 'Congo, Democratic Republic of the', 'Costa Rica', 'Cuba', 'Cyprus', 'Czechia', 'Czechoslovakia', "Côte d'Ivoire", 'Denmark', 'Djibouti', 'Dominica', 'Dominican Republic', 'East Germany', 'Ecuador', 'Egypt', 'Estonia', 'Eswatini', 'Fiji', 'Finland', 'Former Union of Soviet Socialist Republics', 'France', 'French Polynesia', 'French Southern Antarctic Territories', 'French Southern Territories', 'Gabon', 'Gambia', 'Georgia', 'Germany', 'Ghana', 'Greece', 'Greenland', 'Guadeloupe', 'Guam', 'Guatemala', 'Guinea', 'Guinea-Bissau', 'Haiti', 'Heard Island and McDonald Islands', 'Honduras', 'Hong Kong', '

In [42]:
# extract only values (country codes)
data.values()

dict_values(['ALB', 'ATG', 'ARG', 'ARM', 'ABW', 'AUS', 'AUT', 'BHS', 'BHR', 'BRB', 'BLR', 'BEL', 'BLZ', 'BMU', 'BOL', None, 'BRA', 'BGR', 'BFA', 'CPV', 'KHM', 'CYM', 'CAF', 'CHL', 'CHN', None, None, 'COL', 'COD', 'CRI', 'CUB', 'CYP', 'CZE', None, 'CIV', 'DNK', 'DJI', 'DMA', 'DOM', 'DEU', 'ECU', 'EGY', 'EST', 'SWZ', 'FJI', 'FIN', None, 'FRA', 'PYF', None, None, 'GAB', 'GMB', 'GEO', 'DEU', 'GHA', 'GRC', 'GRL', None, 'GUM', 'GTM', 'GIN', 'GNB', 'HTI', None, 'HND', 'HKG', 'HUN', 'ISL', 'IND', 'IDN', 'IRN', 'IRL', 'ISR', 'ITA', 'JAM', 'JPN', 'JOR', 'KAZ', 'KEN', 'KIR', 'PRK', 'KOR', 'KWT', 'KGZ', 'LAO', 'LBN', 'LSO', 'LUX', 'MAC', 'MWI', 'MYS', 'MDV', 'MLT', None, 'MUS', 'MEX', 'MDA', 'MNG', 'MAR', 'MMR', 'NAM', 'NRU', 'NPL', 'NLD', 'NLD', 'NZL', 'NER', None, 'NOR', 'OMN', 'PAK', 'PAN', 'PER', 'PHL', 'POL', 'PRT', 'QAT', 'ROU', 'RUS', None, None, 'SAU', 'SEN', 'SRB', 'SYC', 'SLE', 'SGP', 'SVK', 'SVN', 'ZAF', 'SSD', 'ESP', 'LKA', 'SWE', 'CHE', 'SYR', 'TWN', 'TJK', 'TZA', 'THA', 'TLS', None, 

In [43]:
# store country codes in a list 
country_codes = list(data.values())
# country_codes 

In [44]:
# using country codes, get annual GDP by year for the relevant years in data 
gdp_data = wb.data.DataFrame(gdp_indicator, country_codes, time=range(1988, 2023))

In [45]:
gdp_data

Unnamed: 0_level_0,YR1988,YR1989,YR1990,YR1991,YR1992,YR1993,YR1994,YR1995,YR1996,YR1997,...,YR2013,YR2014,YR2015,YR2016,YR2017,YR2018,YR2019,YR2020,YR2021,YR2022
economy,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ABW,5.966480e+08,6.955307e+08,7.648045e+08,8.720670e+08,9.586592e+08,1.083240e+09,1.245810e+09,1.320670e+09,1.379888e+09,1.531844e+09,...,2.727933e+09,2.791061e+09,2.963128e+09,2.983799e+09,3.092179e+09,3.276188e+09,3.395794e+09,2.610039e+09,3.126019e+09,
ALB,2.051236e+09,2.253090e+09,2.028554e+09,1.099559e+09,6.521750e+08,1.185315e+09,1.880951e+09,2.392765e+09,3.199641e+09,2.258514e+09,...,1.277622e+10,1.322815e+10,1.138685e+10,1.186120e+10,1.301973e+10,1.515642e+10,1.540183e+10,1.516273e+10,1.793057e+10,1.888210e+10
ARE,3.627567e+10,4.146500e+10,5.070144e+10,5.155217e+10,5.423917e+10,5.562517e+10,5.930509e+10,6.574367e+10,7.357123e+10,7.883901e+10,...,4.002185e+11,4.141054e+11,3.702755e+11,3.692553e+11,3.905168e+11,4.270494e+11,4.179897e+11,3.494730e+11,4.150216e+11,5.075349e+11
ARG,1.268902e+11,7.662973e+10,1.413526e+11,1.897200e+11,2.287790e+11,2.367417e+11,2.574400e+11,2.580318e+11,2.721498e+11,2.928590e+11,...,5.520251e+11,5.263197e+11,5.947493e+11,5.575323e+11,6.436284e+11,5.248199e+11,4.477547e+11,3.855404e+11,4.872271e+11,6.327703e+11
ARM,,,2.256839e+09,2.069870e+09,1.272835e+09,1.201313e+09,1.315159e+09,1.468317e+09,1.596969e+09,1.639492e+09,...,1.112146e+10,1.160951e+10,1.055334e+10,1.054614e+10,1.152746e+10,1.245794e+10,1.361929e+10,1.264170e+10,1.386141e+10,1.950278e+10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
USA,5.236438e+12,5.641580e+12,5.963144e+12,6.158129e+12,6.520327e+12,6.858559e+12,7.287236e+12,7.639749e+12,8.073122e+12,8.577554e+12,...,1.684319e+13,1.755068e+13,1.820602e+13,1.869511e+13,1.947734e+13,2.053306e+13,2.138098e+13,2.106047e+13,2.331508e+13,2.546270e+13
UZB,,,1.336061e+10,1.367762e+10,1.294130e+10,1.309901e+10,1.289916e+10,1.335047e+10,1.394889e+10,1.474460e+10,...,7.318004e+10,8.084539e+10,8.619626e+10,8.613829e+10,6.208132e+10,5.287011e+10,6.028350e+10,6.022470e+10,6.960061e+10,8.039185e+10
VEN,6.022641e+10,4.353671e+10,4.860695e+10,5.345344e+10,6.041652e+10,6.003746e+10,5.841867e+10,7.738949e+10,7.054321e+10,8.583768e+10,...,3.710054e+11,4.823593e+11,,,,,,,,
VNM,2.542381e+10,6.293305e+09,6.471741e+09,9.613370e+09,9.866990e+09,1.318095e+10,1.628643e+10,2.073616e+10,2.465747e+10,2.684370e+10,...,2.137088e+11,2.334515e+11,2.392583e+11,2.570960e+11,2.813536e+11,3.101065e+11,3.343653e+11,3.466158e+11,3.661376e+11,4.088024e+11


In [46]:
type(gdp_data)

pandas.core.frame.DataFrame

In [47]:
# export gdp data to csv for easier use later
gdp_data.to_csv('data/gdp_data.csv', index = False)

In [48]:
# convert GDP to CAD? 


### WBAPI regions 
(not same as continent.)

In [49]:
economy_data = wb.economy.DataFrame()
economy_data

Unnamed: 0_level_0,name,aggregate,longitude,latitude,region,adminregion,lendingType,incomeLevel,capitalCity
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
ABW,Aruba,False,-70.0167,12.51670,LCN,,LNX,HIC,Oranjestad
AFE,Africa Eastern and Southern,True,,,,,,,
AFG,Afghanistan,False,69.1761,34.52280,SAS,SAS,IDX,LIC,Kabul
AFW,Africa Western and Central,True,,,,,,,
AGO,Angola,False,13.2420,-8.81155,SSF,SSA,IBD,LMC,Luanda
...,...,...,...,...,...,...,...,...,...
XKX,Kosovo,False,20.9260,42.56500,ECS,ECA,IDX,UMC,Pristina
YEM,"Yemen, Rep.",False,44.2075,15.35200,MEA,MNA,IDX,LIC,Sana'a
ZAF,South Africa,False,28.1871,-25.74600,SSF,SSA,IBD,UMC,Pretoria
ZMB,Zambia,False,28.2937,-15.39820,SSF,SSA,IDX,LMC,Lusaka


In [50]:
economy_data = economy_data.reset_index(drop=False)
economy_data = economy_data[['id', 'name', 'region']]
economy_data

Unnamed: 0,id,name,region
0,ABW,Aruba,LCN
1,AFE,Africa Eastern and Southern,
2,AFG,Afghanistan,SAS
3,AFW,Africa Western and Central,
4,AGO,Angola,SSF
...,...,...,...
261,XKX,Kosovo,ECS
262,YEM,"Yemen, Rep.",MEA
263,ZAF,South Africa,SSF
264,ZMB,Zambia,SSF


## get annual gdp for countries in export data

### Mapping Countries - code, continent

In [51]:
# get continent for each country for visuals. 
continent_list = []
for country_code in country_codes:
    if country_code is None:
        continent_list.append(np.nan)
    else:
        continent_value = countrycode(country_code, origin = "iso3c", destination = "continent")
        continent_list.append(continent_value)

In [52]:
continent_list[:10]

['Europe',
 'Americas',
 'Americas',
 'Asia',
 'Americas',
 'Oceania',
 'Europe',
 'Americas',
 'Asia',
 'Americas']

In [53]:
# add contient to countries df
countries['continent'] = continent_list
countries

Unnamed: 0,Country,continent,country_code
1,Albania,Europe,ALB
2,Antigua and Barbuda,Americas,ATG
3,Argentina,Americas,ARG
4,Armenia,Asia,ARM
5,Aruba,Americas,ABW
...,...,...,...
155,Uzbekistan,Asia,UZB
156,Venezuela,Americas,VEN
157,Viet Nam,Asia,VNM
158,West Germany,Europe,DEU


In [54]:
# add country code to contries df for easier mapping
countries['country_code'] = country_codes
countries[:20]

Unnamed: 0,Country,continent,country_code
1,Albania,Europe,ALB
2,Antigua and Barbuda,Americas,ATG
3,Argentina,Americas,ARG
4,Armenia,Asia,ARM
5,Aruba,Americas,ABW
6,Australia,Oceania,AUS
7,Austria,Europe,AUT
8,Bahamas,Americas,BHS
9,Bahrain,Asia,BHR
10,Barbados,Americas,BRB


In [58]:
## Cross reference - drop countries if not significant 

no_gdp_countries = countries[countries['country_code'].isna()]

In [55]:
countries.to_csv('data/country_code_mapping.csv', index = False)

In [56]:
# check taiwan 
countries[countries['Country'].str.contains('Taiwan|Hong|Macao')]
# hong kong


# north korea

Unnamed: 0,Country,continent,country_code
67,Hong Kong,Asia,HKG
90,Macao,Asia,MAC
138,Taiwan,Asia,TWN
