In [1]:
# For reading in API Keys
import os
from dotenv import load_dotenv

# For basic analysis
import pandas as pd
from pathlib import Path
import numpy as np
import time

# For parsing through JSON dumps from API calls
import requests
import json

In [2]:
# Base URL for World Bank API
wb_api_base = "http://api.worldbank.org/v2/country/"

# Dictionary of user given indicators with their Corresponding API indicators
ind_dict = {
    "GDP":"NY.GDP.MKTP.CD", # GDP
    "GDG":"NY.GDP.MKTP.KD.ZG", # GDP Growth YoY
    "GDC":"NY.GDP.PCAP.CD", # GDP per Capita
    "CPI":"FP.CPI.TOTL.ZG", # CPI
    #"CAB":"BN.CAB.XOKA.GD.ZS", # Current Account Balance as % of GDP
    "UEM":"SL.UEM.TOTL.NE.ZS" # Unemployment rate
}

In [3]:
full_country_basket = [
    "USA", # USA
    "AUS", # Australia
    "BRA", # Brazil
    "GBR", # Great Britain
    "CAN", # Canada
    "IND", # India
    "JPN", # Japan
    "MYS", # Malaysia
    "MEX", # Mexico
    "NZL", # New Zealand
    "NOR", # Norway
    "SGP", # Singapore
    "ZAF", # South Africa
    "KOR", # South Korea
    "LKA", # Sri Lanka
    "SWE", # Sweden
    "CHE", # Switzerland
    "THA", # Thailand
    "CHN", # China
    "EUU" # European Union
]
# Note Taiwan is not included in the World Bank's Data Set

In [4]:
def getEconIndicator (indicator, num, countries, training = False):
    """
    Returns the indicators for the given list of countries as reported by the World Bank in the form of a pandas Dataframe
    
    Countries must be given as their 3-Digit ISO Code (https://countrycode.org/) and in the form of a list
    
    Indicator must be selected from the ind_dict defined above.
    """
    ind_data_dict = {}

    # Validating Indicators
    dict_keys = list(ind_dict.keys())
    if indicator not in dict_keys:
        raise Exception("This indicator is not supported.")
    
    # Setting indicator into API form
    api_ind = "/indicator/" + ind_dict[indicator] + f"?mrnev={num}&per_page=500&format=json"    
    
    # Building request URL.
    countries = ";".join(countries)
    wb_api_url = wb_api_base + countries + api_ind
    
    # Sending request to World Bank API
    api_response = requests.get(wb_api_url)
    attempts = 1
    
    # Error handling for API calls
    while (api_response.status_code != 200) & (attempts < 11):
        print("Retrying API call for: " + indicator + " in 10 seconds.")
        time.sleep(10)
        api_response = requests.get(wb_api_url)
        attempts += 1
        
    if api_response.status_code != 200:
        raise Exception("API call failed for" + indicator + "with reason code" + api_response.status_code)
    
    # Parsing out body of returned JSON
    ind_json = api_response.json()
    ind_json = ind_json[1]
    
    if training:
        # Creating dictionary of indicator by country ID
        for entry in ind_json:
            ind_data_dict[entry["countryiso3code"] + entry["date"]] = [entry["date"], entry["value"]]
            
        # Converting dictionary to pandas Dataframe    
        ind_data_df = pd.DataFrame.from_dict(ind_data_dict, orient = "index", columns = [indicator + " Year", indicator])
    else:
        # Creating dictionary of indicator by country ID
        for entry in ind_json:
            ind_data_dict[entry["countryiso3code"]] = [entry["value"]]
        
        # Converting dictionary to pandas Dataframe    
        ind_data_df = pd.DataFrame.from_dict(ind_data_dict, orient = "index", columns = [indicator])
    
    return ind_data_df

In [8]:
def getAllIndicators (countries):
    """
    Returns the all of the defined indicators (in ind_dict) for the given list of countries as reported by the World Bank 
    Countries must be given as their 3-Digit ISO Code (https://countrycode.org/) and in the form of a list
    
    Data returned in the form of a pandas DataFrame
    """
    
    dict_keys = list(ind_dict.keys())
    counter = 1
    
    for ind in dict_keys:
        if counter == 1:
            all_data_df = getEconIndicator(ind, 1, countries)
            counter += 1
        else:
            temp_data_df = getEconIndicator(ind, 1, countries)
            all_data_df = pd.concat([all_data_df, temp_data_df], axis = "columns", join = "outer")
            counter += 1
            
    return all_data_df

In [6]:
def getAllIndicatorsTraining (num, countries):
    """
    This function is for preparing data for training. 
    
    Num is the number of values for each metric that should be pulled.
    
    Returns the all of the defined indicators (in ind_dict) for the given list of countries as reported by the World Bank 
    Countries must be given as their 3-Digit ISO Code (https://countrycode.org/) and in the form of a list
    
    Data returned in the form of a pandas DataFrame
    """
    
    dict_keys = list(ind_dict.keys())
    counter = 1
    
    for ind in dict_keys:
        if counter == 1:
            all_data_df = getEconIndicator(ind, num, countries, True)
            counter += 1
        else:
            temp_data_df = getEconIndicator(ind, num, countries, True)
            all_data_df = pd.concat([all_data_df, temp_data_df], axis = "columns", join = "inner")
            counter += 1
            
    cols = []
    for ind in ind_dict:
        cols.append(ind + " Year")

    all_data_df.drop(columns = cols[1:], inplace = True)
    all_data_df.rename(columns = {cols[0]: "Year"}, inplace = True)
    
    return all_data_df

In [7]:
def relativeStrength(allIndicators, training = True):
    """
    Takes in the indicators of the full country basket and assigns relative strengths based on the maximum value of each.
    """
    
    if training:
        relativeIndicators = allIndicators.sort_values("Year")
        current_year = int(relativeIndicators["Year"].min())
        max_year = int(relativeIndicators["Year"].max())

        # Looping through by year and normalizing GDP and GDP per capita by max value
        while current_year <= max_year:

            relativeIndicators.loc[relativeIndicators["Year"] == str(current_year), "Rel GDP"] = relativeIndicators["GDP"]/relativeIndicators.loc[relativeIndicators["Year"] == str(current_year)]["GDP"].max()
            relativeIndicators.loc[relativeIndicators["Year"] == str(current_year), "Rel GDC"] = relativeIndicators["GDC"]/relativeIndicators.loc[relativeIndicators["Year"] == str(current_year)]["GDC"].max()

            current_year += 1
    
    else:
        relativeIndicators = allIndicators.copy()
        relativeIndicators["Rel GDP"] = relativeIndicators["GDP"]/relativeIndicators["GDP"].max()
        relativeIndicators["Rel GDC"] = relativeIndicators["GDC"]/relativeIndicators["GDC"].max()
        
    relativeIndicators = relativeIndicators.drop(columns = ["GDP", "GDC"])
    
    return relativeIndicators