In [1]:
%%capture captured_output
!pip install pandas requests wget datetime numpy requests

In [2]:
import json
import pandas as pd
import requests
import wget
from datetime import datetime
import numpy as np
import requests
import json
import os

import ipywidgets as widgets
from IPython.display import display, clear_output
from jupyter_ui_poll import ui_events

import time

In [3]:
# Inclusion Criteria Parameters
# Update Parameters to experiment with thresholds and resulting country_inclusion output

POPULATION_THRESHOLD = 1_000_000
EXPORTS_THRESHOLD = 1_000_000_000
REPORTED_YEARS = [2019, 2020, 2021]

# respective data sources for setting criteria
# 1. reported years is based on IMF data for "current_account", "gdp", "gdp_ppp", "population"
# 2. exports and population threshold are set by extracted from the Atlas API

In [4]:
if datetime.now().month > 7:
    current_atlas_year = datetime.now().year - 2
else:
    current_atlas_year = datetime.now().year - 3

output_dir = "/n/hausmann_lab/lab/*data_tools_for_GL/country_inclusion_output"

In [5]:
class InclusionCriteria:
    def __init__(self, population_threshold, exports_threshold, reported_years):
        self.population_threshold = population_threshold
        self.exports_threshold = exports_threshold
        self.reported_years = reported_years

    def __repr__(self):
        return """Inclusion Criteria:
                    population_threshold={},
                    exports_threshold={},
                    reported_years = {})""".format(
            self.population_threshold, self.exports_threshold, self.reported_years
        )

In [6]:
pop_widget = widgets.IntText(
    description="Set population threshold:", style={"description_width": "initial"}
)
export_widget = widgets.IntText(
    description="Set exports threshold:", style={"description_width": "initial"}
)
start_reporting_year_widget = widgets.IntText(
    description="Started reporting in:", style={"description_width": "initial"}
)
end_reporting_year_widget = widgets.IntText(
    description="Reported through:", style={"description_width": "initial"}
)

# Create a button widget for submitting the request
submit_button = widgets.Button(description="Update Inclusion Criteria")

In [7]:
# DISPLAYS FORM
form = widgets.VBox(
    [pop_widget, export_widget, start_reporting_year_widget, end_reporting_year_widget]
)

# display(form)

In [8]:
def get_input_data():
    """
    Gets input data to run country inclusion analysis
    Returns:
    - imf_df: pandas df downloaded from the IMF API for the annual Atlas data update
    - location_country: pandas df linking to iso3_code to internally assigned country id
    - atlas: pandas df with population and total export values for each country from the Atlas API
    """

    os.makedirs(output_dir, exist_ok=True)
    if not os.path.exists(os.path.join(output_dir, "location_country.csv")):
        location_country_url = "https://docs.google.com/uc?export=download&confirm=t&id=1JfbMKZ_utY18p5lW60syjX5QETAYpw2x"
        location_country_file = wget.download(location_country_url, out=output_dir)
    location_country_df = pd.read_csv(
        "/n/hausmann_lab/lab/atlas/country_inclusion_output/location_country.csv"
    )

    if not os.path.exists(os.path.join(output_dir, "imf_data.csv")):
        imf_data_url = "https://docs.google.com/uc?export=download&confirm=t&id=1dLt0VKt-aCXc-P7LnYT_TqM99xFHMJyc"
        imf_data_file = wget.download(imf_data_url, out=output_dir)
    imf_df = pd.read_csv(
        "/n/hausmann_lab/lab/atlas/country_inclusion_output/imf_data.csv"
    )

    # if not os.path.exists(os.path.join(output_dir, "atlas.csv")):
    atlas_pop_and_export_values(imf_df)
    atlas_df = pd.read_csv(os.path.join(output_dir, "atlas.csv"))

    return location_country_df, imf_df, atlas_df

In [9]:
def atlas_pop_and_export_values(imf_data):
    """
    From the Atlas API get population and total export values for each country
    from the most recent Atlas data update year

    Returns:
    - Dataframe with population and total export value for each country
    """
    atlas_api = "https://dev.rewrite.cid-labs.com/api/graphql"

    min_year = str(current_atlas_year - 3)
    total_exports_request_template = """{{countryYear (yearMin: {year})
                        {{
                          countryId
                          year
                          exportValue
                          population}}
                      }}"""
    total_exports_request = total_exports_request_template.format(year=min_year)
    country_codes_request = """
                  {
                    locationCountry
                    {
                      countryId
                      iso3Code
                      inRankings
                      inCp
                    }
                  }
                  """

    exports = requests.post(url=atlas_api, json={"query": total_exports_request})
    export_data = json.loads(exports.text)["data"]["countryYear"]

    codes = requests.post(url=atlas_api, json={"query": country_codes_request})
    country_code_data = json.loads(codes.text)["data"]["locationCountry"]

    df = pd.json_normalize(export_data).merge(
        pd.json_normalize(country_code_data), on="countryId", how="left"
    )

    series_avg_export = df.groupby("iso3Code")["exportValue"].mean()
    atlas = df.groupby("iso3Code").max("year")

    atlas["avg_export"] = series_avg_export

    imf_data = imf_data[imf_data.year == current_atlas_year].rename(
        columns={"population": "imf_population"}
    )
    atlas = atlas.merge(
        imf_data[["code", "imf_population"]],
        how="left",
        left_on="iso3Code",
        right_on="code",
    )
    atlas["min_population"] = np.where(
        atlas["imf_population"] > POPULATION_THRESHOLD, 1, 0
    )

    # atlas['min_population'] = np.where(atlas['population'] > POPULATION_THRESHOLD, 1, 0)
    atlas["min_avg_export"] = np.where(atlas["avg_export"] > EXPORTS_THRESHOLD, 1, 0)
    atlas.to_csv(os.path.join(output_dir, "atlas.csv"))

In [10]:
# translate IMF indicator terminology to Growth Lab Atlas terminology
IMF_INDICATORS = {
    "NGDPD": "gdp",
    "PPPGDP": "gdp_ppp",
    "LP": "population",
    "BCA": "current_account",
}

In [11]:
def generate_country_inclusion_criteria():
    """
    uses IMF api to generate inclusion list for Atlas Countries
    """
    location_country_df, imf_df, atlas_df = get_input_data()
    atlas_df.reset_index(inplace=True)
    atlas_df = atlas_df.rename(columns={"code": "iso3_code"})

    current_state = location_country_df[["name_en", "iso3_code"]].merge(
        atlas_df, on="iso3_code"
    )
    current_state = current_state[
        [
            "iso3_code",
            "name_en",
            "inCp",
            "inRankings",
            "min_avg_export",
            "min_population",
        ]
    ]
    imf_recent = recent_df(imf_df, REPORTED_YEARS)
    imf_recent = imf_recent.rename(
        columns={
            "current_account": "imf_current_account",
            "gdp": "imf_gdp",
            "gdp_ppp": "imf_gdp_ppp",
            "population": "imf_population",
            "all": "imf_all",
        }
    )

    current_state = current_state.rename(columns={"iso3_code": "code"})
    current_state = current_state.dropna()

    country_inclusion = imf_recent.merge(current_state, on="code", how="outer")
    country_inclusion[
        ["inCp", "inRankings", "min_avg_export", "min_population"]
    ] = country_inclusion[
        ["inCp", "inRankings", "min_avg_export", "min_population"]
    ].replace(
        {False: 0, True: 1}
    )
    country_inclusion = country_inclusion[
        [
            "code",
            "name_en",
            "imf_all",
            "inCp",
            "inRankings",
            "min_avg_export",
            "min_population",
            "imf_current_account",
            "imf_gdp",
            "imf_gdp_ppp",
            "imf_population",
        ]
    ]
    country_inclusion = country_inclusion[country_inclusion["name_en"].notna()]
    country_inclusion.fillna(0, inplace=True)
    country_inclusion["include"] = (
        country_inclusion[["imf_all", "min_avg_export", "min_population"]]
        .eq(1)
        .all(1)
        .replace({False: 0, True: 1})
    )
    country_inclusion = country_inclusion.loc[
        :,
        [
            "code",
            "name_en",
            "include",
            "inCp",
            "inRankings",
            "imf_all",
            "min_avg_export",
            "min_population",
            "imf_current_account",
            "imf_gdp",
            "imf_gdp_ppp",
            "imf_population",
        ],
    ]
    country_inclusion.to_csv(os.path.join(output_dir, "country_inclusion.csv"))

In [12]:
def recent_df(df, years):
    """
    transform imf_data to reflect availability, defaults to the most recent three
    years for each indicator
    """
    df = df.loc[df["year"].isin(years)]
    df_copy = df.copy()
    df_copy[["current_account", "gdp", "gdp_ppp", "population"]] = df_copy[
        ["current_account", "gdp", "gdp_ppp", "population"]
    ].notna()
    agg_function = {
        "current_account": "all",
        "gdp": "all",
        "gdp_ppp": "all",
        "population": "all",
    }
    df_copy = df_copy.groupby("code").aggregate(agg_function)
    df_copy[["current_account", "gdp", "gdp_ppp", "population"]] = df_copy[
        ["current_account", "gdp", "gdp_ppp", "population"]
    ].replace({False: 0, True: 1})
    df_copy["all"] = (
        df_copy[["current_account", "gdp", "gdp_ppp", "population"]]
        .eq(1)
        .all(1)
        .replace({False: 0, True: 1})
    )
    return df_copy

In [13]:
# ...
# while True:
#     clicked = False
#     def on_button_clicked(b):
#         global clicked
#         clicked = True
#         # with output:
#         #     logging.debug("button clicked")

#     submit_button.on_click(on_button_clicked)

#     with ui_events() as poll:
#         while not clicked:
#             # logging.debug("waiting...")
#             poll(1000) # poll queued UI events including button
#             time.sleep(1) # wait for 1 second before checking again

#     #intiate code run
#     logging.info("------------------------")
#     logging.info('Data request initiated..')
#     inclusion_obj = InclusionCriteria(
#         population_threshold = pop_widget.value,
#         exports_threshold = export_widget.value,
#         reported_years = list(range(start_reporting_year.value, end_reporting_year.value))
#     )
#     inclusion_obj.run()

In [14]:
if __name__ == "__main__":
    generate_country_inclusion_criteria()