In [267]:
import pandas as pd
import fredapi as fa # to import data from Federal Reserve API
import wbgapi as wb # to import data from World Bank API
import plotly.express as px # to chart the data

In [268]:
# Downloads data from Federal Reserve API

fred = fa.Fred(api_key="afaf79e2dc5aa8fa23e422406368e64a")

# comparable datasets
real_gdp_capita_US_USD_NSA_A = fred.get_series('NYGDPPCAPKDUSA') # Constant GDP per capita for the United States (2010 U.S. Dollars, Not Seasonally Adjusted)
real_gdp_capita_UK_USD_NSA_A = fred.get_series('NYGDPPCAPKDGBR') # Constant GDP per capita for the United Kingdom (2010 U.S. Dollars, Not Seasonally Adjusted)
nominal_gdp_US_USD_A = fred.get_series('MKTGDPUSA646NWDB') # Gross Domestic Product for United States
nominal_gdp_UK_USD_A = fred.get_series('MKTGDPGBA646NWDB') # Gross Domestic Product for United Kingdom
nominal_gdp_US_NSA_Q = fred.get_series('NA000334Q') # Gross Domestic Product for United States
nominal_gdp_UK_NSA_Q = fred.get_series('CPMNACNSAB1GQUK') # Gross Domestic Product for United Kingdom
CPI_US_YoY_NSA_M = fred.get_series('CPALTT01USM659N') # Consumer Price Index: Total All Items for the United States 
CPI_UK_YoY_NSA_M = fred.get_series('CPALTT01GBM659N') # Consumer Price Index: Total All Items for the United Kingdom
unemp_US_SA_M = fred.get_series('UNRATE') # unemployment rate in the United States
unemp_UK_SA_M = fred.get_series('AURUKM') # Unemployment Rate in the United Kingdom
population_US_NSA_A = fred.get_series('POPTHM') # Population in the United States
population_UK_NSA_A = fred.get_series('POPUKA') # Population in the United Kingdom
gov_debt_to_gdp_US_NSA_A = fred.get_series('GGGDTAUSA188N') # General government gross debt for United States
gov_debt_to_gdp_UK_NSA_A = fred.get_series('GGGDTAGBA188N') # General government gross debt for United Kingdom
house_debt_to_gdp_US_NSA_Q = fred.get_series('HDTGPDUSQ163N') # Household Debt to GDP for United States
house_debt_to_gdp_UK_NSA_Q = fred.get_series('HDTGPDGBQ163N') # Household Debt to GDP for United Kingdom
# non-comparable datasets
real_gdp_US_SA_Q = fred.get_series('GDPC1') # Real Gross Domestic Product for United States (Billions of Chained 2012 Dollars, Seasonally Adjusted Annual Rate)
real_gdp_UK_SA_Q = fred.get_series('NGDPRSAXDCGBQ') # Real Gross Domestic Product for Great Britain (Domestic Currency, Seasonally Adjusted)
ind_prod_US_SA_M = fred.get_series('INDPRO') # Industrial Production: Total Index in the United States (Index 2017=100, Seasonally Adjusted)
ind_prod_UK_SA_M = fred.get_series('GBRPROINDMISMEI') # Production of Total Industry in the United Kingdom (Index 2015=100, Seasonally Adjusted)

# current account balance
# FDI

real_gdp_US_SA_Q.tail()

2021-07-01    19672.594
2021-10-01    20006.181
2022-01-01    19924.088
2022-04-01    19895.271
2022-07-01    20039.406
dtype: float64

In [269]:
ind_prod_US_SA_M.name = 'ind_prod_US_SA_M'
ind_prod_UK_SA_M.name = 'ind_prod_UK_SA_M'

df = pd.merge(ind_prod_US_SA_M, ind_prod_UK_SA_M, how='inner', left_index=True, right_index=True) # merge data for 2 countries into a single DataFrame only for rows for which both countries have data
df.tail()

Unnamed: 0,ind_prod_US_SA_M,ind_prod_UK_SA_M
2022-04-01,104.2709,113.229572
2022-05-01,104.1646,113.65405
2022-06-01,104.0772,113.335692
2022-07-01,104.7577,112.062257
2022-08-01,104.6544,110.045985


In [270]:
# Downloads data from World Bank API. 
# Documentation is here: https://github.com/tgherzog/wbgapi

# find the right data to import

# wb.source.info() # shows names of World Bank databases.
# wb.series.info() # shows all world bank indicators (e.g. 'EG.ELC.ACCS.ZS'). By defaults, it shows all indicators in World Development Indicators (WDI) database.
# wb.series.info(q="GDP") # to search for a specific indicator in WB API using a keyword
# wb.economy.info() # shows names and codes of countries and economies and codes for their region & income levels in World Bank databases
# wb.economy.info(q="Azerbaijan") # to search for a specific country/economy in WB API using a keyword
# wb.region.info() # shows names of geographic regions in World Bank databases
# wb.income.info() # shows income groups in World Bank databases
# wb.search('NY.GDP.MKTP.CD') # deeper search on all meta data 

In [8]:
# create functions as described in the logic for the code here: https://miro.com/app/board/uXjVP6Rlhjg=/

# function for users to select data type by choosing either Snapshot (if user types '1') or Time series (if user types '2'). The function will return 1 or 2.
def data_type_func():
    data_type_dict = {1: 'Snapshot', 2: 'Time series'}
    while True:
        try: # use try+except to force the users to re-enter values if they entered them incorrectly
            data_type_user_choice = int(input('What data type do you want to capture? Enter 1 for Snapshot or 2 for Time series'))
            if data_type_user_choice in (1, 2):
                print('You selected '+ data_type_dict[data_type_user_choice])
                return data_type_user_choice
                break
            else:
                print('You typed the wrong value. Try again.')
        except:
            print('You typed the wrong value. Try again.')

# function for users to enter countries/regions that they want the data. The options will differ depending on whether they selected Country (1) or Region (2). The function will return country/countries or region(s)
def geo_list_func(country_or_region):
    geo_list = []
    x = 0
    if country_or_region == 1:
        geo_msg = 'Enter ISO-3 country code (e.g. USA). Press "q" to finish.'
    elif country_or_region == 2:
        geo_msg = 'Enter geographical code (e.g. EEU). Press "q" to finish.'
    while x == 0:
        country = input(geo_msg)
        if country == 'q':
            x = x + 1
        else:
            geo_list.append(country.upper())
    print('You selected ', geo_list)
    return geo_list

# function for users to select between Country (if user types '1') or Region (if user types '2'). The function will return 1 or 2.
def geo_func():
    geo_dict = {1: 'Country', 2: 'Region'}
    while True:
        try: # use try+except to force the users to re-enter values if they entered them incorrectly
            geo_user_choice = int(input('What geography do you want to capture? Enter 1 for Country or 2 for Region'))
            if geo_user_choice == 1:
                print('You selected '+ geo_dict[geo_user_choice])
                countries = geo_list_func(geo_user_choice) # calls geo_list_func to get user to enter countries, then assigns them to 'countries' list
                return geo_user_choice, countries
                break
            elif geo_user_choice == 2:
                print('You selected '+ geo_dict[geo_user_choice])
                regions = geo_list_func(geo_user_choice) # calls geo_list_func to get user to enter regions, then assigns them to 'regions' list
                return geo_user_choice, regions
                break
            else:
                print('You typed the wrong value. Try again.')
        except:
            print('You typed the wrong value. Try again.')

# function for users to type year(s) depending on their earlier selection between Snapshot and Time series data type. The function will return year(s).
def year_func(data_type_user_choice):
    while True:
        try: # use try+except to force the users to re-enter values if they entered them incorrectly
            if data_type_user_choice == 1: # runs this code if option 1 (i.e. Snapshot) has been previously selected as data type. Outputs year using year_user_choice variable.
                year_user_choice = input('What year do you want to see the data for? For example, 2019.')
                if len(year_user_choice) == 4 and int(year_user_choice) > 0: # checks to ensure that year value is 4 characters long
                    print('You selected ' + year_user_choice)
                    return int(year_user_choice)
                    break
            elif data_type_user_choice == 2: # runs this code if option 2 (i.e. Time series) has been previously selected as data type. Outputs start and end year using start_year_user_choice and end_year_user_choice variables.
                start_year_user_choice = input('What start year do you want to see the data for? For example, 2019.')
                end_year_user_choice = input('What end year do you want to see the data for? For example, 2019.')
                if len(start_year_user_choice) == 4 and len(end_year_user_choice) == 4 and int(start_year_user_choice) < int(end_year_user_choice): # checks to ensure that years are 4-characters long and that start year value is lower than end year value.
                    print('You selected ' + start_year_user_choice)
                    print('You selected ' + end_year_user_choice)
                    year_user_choice = (int(start_year_user_choice), int(end_year_user_choice))
                    return year_user_choice
                    break
            else:
                print('You typed the wrong value. Try again.')
        except:
            print('You typed the wrong value. Try again.')

# function for users to type the data field(s) that they wanna download. Whether is a field or several fields depends on users' earlier selection between Snapshot and Time series data type. . The function will return data field(s) in a list format.
def data_fields_func(field_or_fields):
    field_list = []
    x = 0
    if field_or_fields == 1:
        while x == 0:
            geo_msg = 'Enter data fields to download data for (e.g. EG.ELC.ACCS.ZS). Press "q" to finish.'
            data_field = input(geo_msg)
            if data_field == 'q':
                x = x + 1
            else:
                field_list.append(data_field.upper())
    elif field_or_fields == 2:
        geo_msg = 'Enter data field to download data for (e.g. EG.ELC.ACCS.ZS).'
        data_field = input(geo_msg)
        field_list.append(data_field.upper())
    print('You selected ', field_list)
    return field_list

# function to select the appropriate chart depending on whether the user previously selected Snapshot (in which case this function will automatically return 'Bar chart: benchmarked') or Time series data type (in which case the user will be given an option to either select 'Line chart: benchmarked' or 'Line chart: percent change'). The function will return 0, 1 or 2.
def chart_type_func(data_type_user_choice):
    chart_type_dict = {0: 'Bar chart: benchmarked', 1: 'Line chart: benchmarked', 2: 'Line chart: percent change'}
    if data_type_user_choice == 1:
        print('You selected '+ chart_type_dict[0])
        return 0 # returns '0' value which represents 'Bar chart: benchmarked'
    elif data_type_user_choice == 2:
        while True:
            try: # use try+except to force the users to re-enter values if they entered them incorrectly
                chart_type_user_choice = int(input("What chart do you want to create? Enter 1 for 'Line chart: benchmarked' or 2 for 'Line chart: percent change'"))
                if chart_type_user_choice == 1:
                    print('You selected '+ chart_type_dict[chart_type_user_choice])
                    return chart_type_user_choice
                    break
                elif chart_type_user_choice == 2:
                    print('You selected '+ chart_type_dict[chart_type_user_choice])
                    return chart_type_user_choice
                    break
                else:
                    print('You typed the wrong value. Try again.')
            except:
                print('You typed the wrong value. Try again.')


# call the functions

data_type_user_choice = data_type_func() # runs a function to let users select either Snapshot or Time series data type and outputs variable data_type_user_choice with user selection either as 1 (Snapshot) or 2 (Time series).
year_user_choice = year_func(data_type_user_choice) # runs a function to let users select year (if they previously selected Snapshot data type) or start & end year (if they previuosly selected Time series data type)
geo_user_choice = geo_func() # runs a function geo_func and returns a list of two lists, where the first entry shows whether the user picked country (option 1) or region (option 2) and the second entry is a list of countries/regions that the user has inputted
data_fields_user_choice = data_fields_func(data_type_user_choice) # runs a function to let users select several data fields (if they previously selected Snapshot data type) or one data field (if they previuosly selected Time series data type). This function outputs field(s) inputted by user as a list.
chart_type_func_choice = chart_type_func(data_type_user_choice) # runs a function to let users select a chart type (if they previuosly selected Time series data type) by choosing between 'Line chart: benchmarked' (option 1) or 'Line chart: percent change' (option 2). If the users previously selected Snapshot data type, then this function will automatically select 'Bar chart: benchmarked' (option 0).

You selected Time series
You selected 2000
You selected 2019
You selected Country
You selected  ['USA', 'GER']
You selected  ['DATA1']
You selected Line chart: benchmarked


In [276]:
# download data from World Bank for multiple countries & data fields/indicators but for a single year. Then show it as a bar chart.

countries = ['USA', 'GBP', 'CHN', 'BRA', 'IND', 'ZWE', 'AZE', 'WLD'] # select countries
region = 'EUU' # select region

data_fields_dict = {'EG.ELC.ACCS.ZS' : 'Access to electricity (% of population)', 
                    'NY.GDP.MKTP.CD' : 'GDP (current US$)',
                    'NY.GDP.PCAP.CD' : 'GDP per capita (current US$)',
                    'SP.POP.TOTL.FE.ZS' : 'Population, female (% of total population'
                    } # create a dictionary of indicator codes and descriptions

df = wb.data.DataFrame(list(data_fields_dict.keys()), wb.region.members(region), time=2020, labels=True).dropna() # downloads data for all EU countries for specific data fields/indicators for year 2020. Also, drops all blank values
df.columns = ['Country'] +  list(data_fields_dict.values()) # change columns names from using indicator codes to indicator names
df.set_index(['Country'], inplace=True) # replace index that has country codes with index using values from 'Country' column
#df = wb.data.DataFrame(data_fields_list, countries, mrv=30, numericTimeKeys=True)
#df = wb.data.DataFrame(data_fields_list, coutnries, time=range(2010,2020), index='time', numericTimeKeys=True, labels=True)
df

Unnamed: 0_level_0,Access to electricity (% of population),GDP (current US$),GDP per capita (current US$),"Population, female (% of total population"
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Ireland,100.0,425889000000.0,85422.542868,50.355401
Greece,100.0,188835200000.0,17647.232688,50.918176
Lithuania,100.0,56546960000.0,20232.302036,53.715933
Finland,100.0,271837000000.0,49160.837152,50.684983
Romania,100.0,249511300000.0,12956.566248,51.377136
Sweden,100.0,541487200000.0,52300.206199,49.907795
France,100.0,2630318000000.0,39037.122631,51.604879
Portugal,100.0,228539200000.0,22194.566115,52.690275
Italy,100.0,1892574000000.0,31834.972618,51.311884
Bulgaria,99.699997,69889350000.0,10079.203381,51.440632


In [None]:
# chart the data

fig = px.bar(df, x=df.index, y=df.columns)
fig.show()

In [None]:
# to have Country as row and all column names under a new 'data_field' column and their values under 'value' column
df_new = df.reset_index() # move 'Country' column from index to a separate column
df_new = df_new.melt(id_vars=['Country'], value_vars=list(df.columns)[1:], var_name='data_field', value_name='value')
df_new.head()

Unnamed: 0,Country,data_field,value
0,Ireland,GDP (current US$),425889000000.0
1,Greece,GDP (current US$),188835200000.0
2,Lithuania,GDP (current US$),56546960000.0
3,Finland,GDP (current US$),271837000000.0
4,Romania,GDP (current US$),249511300000.0


In [None]:
# TODO fix chart so that it shows data as grouped bars

fig = px.bar(df_new, x='Country', y='value', color='data_field', barmode = 'group')
fig.show()

In [None]:
# TODO select datapoints to download from https://data.worldbank.org/ and add them to data_fields_dict
# TODO https://www.youtube.com/watch?v=fXqAajIgtWc
# TODO create geographic maps using this tutorial: https://www.youtube.com/watch?v=Oht6cf-Acl0