# Installs

In [None]:
pip install pymongo==3.11.0

In [None]:
pip install requests

# Imports

In [None]:
from os import environ
from requests import get
from numbers import Number
from datetime import datetime
from pprint import pprint as pp
from pymongo import MongoClient

# Database Link

In [None]:
client = MongoClient(environ["MONGO_PORT_27017_TCP_ADDR"], 27017)
database = client.polyplot
indicators = database["indicators"]
countries = database["countries"]

# Basic Functions

In [None]:
def find_indicator(query={}, filter={"_id": 0}):

    return dict(indicators.find_one(query, filter))

def find_indicators(query={}, filter={"_id": 0}, sort=[("name", 1)], limit=0):

    indicators.create_index(sort)

    return list(indicators.find(query, filter).sort(sort).limit(limit))

def update_indicator(indicator):

    return indicators.update_one({"code": indicator["code"]}, {"$set": indicator})

def find_country(query={}, filter={"_id": 0}):

    return dict(countries.find_one(query, filter))

def find_countries(query={}, filter={"_id": 0}, sort=[("name", 1)], limit=0):

    countries.create_index(sort)

    return list(countries.find(query, filter).sort(sort).limit(limit))

def update_country(country):

    return countries.update_one({"code": country["code"]}, {"$set": country})

# Classes

## Indicator Class

In [None]:
class Indicator():

    def __init__(self, indicator):

        self.code = indicator["code"]
        self.name = indicator["name"]
        self.featured = indicator["featured"]
        self.categories = indicator["categories"]

        self.description = indicator["description"]
        self.limitations = indicator["limitations"]
        self.methodology = indicator["methodology"]
        self.relevance = indicator["relevance"]

        self.countries = indicator["countries"] if "countries" in indicator else []

        self.min_year = indicator["min_year"] if "min_year" in indicator else None
        self.max_year = indicator["max_year"] if "max_year" in indicator else None

        self.min_value = indicator["min_value"] if "min_value" in indicator else None
        self.max_value = indicator["max_value"] if "max_value" in indicator else None

        self.last_updated = indicator["last_updated"] if "last_updated" in indicator else None

        self.completeness = indicator["completeness"] if "completeness" in indicator else 0
        self.size = indicator["size"] if "size" in indicator else 0

    def calculate_size(self):

        size = 0

        for country in self.countries:

            country["size"] = len(str(country["history"]).encode("utf-8"))
            size += country["size"]

        self.size = size

        return self

    def calculate_completeness(self):

        total_data = 0
        total_count = 0

        for country in self.countries:

            data = 0
            count = 0

            for date in country["history"]:

                count += 1
                total_count += 1

                if isinstance(date["value"], Number):

                    data += 1
                    total_data += 1

            country["completeness"] = (data / count) * 100

        self.completeness = (total_data / total_count) * 100

        return self

    def update(self):

        try:

            api = "https://api.worldbank.org/v2/country/all/indicator/"
            meta = get("{}{}?format=json&per_page=1".format(api, self.code)).json()[0]

            if not self.last_updated or datetime.strptime(meta["lastupdated"], "%Y-%m-%d") > datetime.strptime(self.last_updated, "%Y-%m-%d"):

                data = get("{}{}?format=json&per_page={}".format(api, self.code, meta["total"])).json()[1]
                countries = get("https://gist.githubusercontent.com/jgphilpott/a1366c890935e615f87a6843b72f541a/raw/878e2f31aebde8cf20832f1a0e61a9bc433101ec/countryCodes.js").json()

                for item in data:

                    if item["countryiso3code"] in countries:

                        if int(item["date"]):

                            if not self.min_year: self.min_year = int(item["date"])
                            if not self.max_year: self.max_year = int(item["date"])

                            if int(item["date"]) < self.min_year: self.min_year = int(item["date"])
                            if int(item["date"]) > self.max_year: self.max_year = int(item["date"])

                        if isinstance(item["value"], Number):

                            if not self.min_value: self.min_value = item["value"]
                            if not self.max_value: self.max_value = item["value"]

                            if item["value"] < self.min_value: self.min_value = item["value"]
                            if item["value"] > self.max_value: self.max_value = item["value"]

                        year = {"year": int(item["date"]), "value": item["value"]}
                        country = [country for country in self.countries if country["code"] == item["countryiso3code"]]

                        if country:

                            country[0]["history"].append(year)

                        else:

                            country = countries[item["countryiso3code"]]

                            name = country["name"]
                            formal_name = country["formal_name"]
                            region = country["region"]
                            factbook = country["factbook"]
                            wiki = country["wiki"]

                            self.countries.append({"code": item["countryiso3code"], "name": name, "formal_name": formal_name, "region": region, "factbook": factbook, "wiki": wiki, "history": [year]})

                self.calculate_size()
                self.calculate_completeness()
                self.last_updated = datetime.utcnow().strftime("%Y-%m-%d")

        except:

            pass

        return self

## Country Class

In [None]:
class Country():

    def __init__(self, country):

        self.centroid = country["centroid"]
        self.code = country["code"]
        self.description = country["description"]
        self.factbook = country["factbook"]
        self.formal_name = country["formal_name"]
        self.name = country["name"]
        self.region = country["region"]
        self.wiki = country["wiki"]

        self.min_year = country["min_year"] if "min_year" in country else None
        self.max_year = country["max_year"] if "max_year" in country else None

        self.indicators = country["indicators"] if "indicators" in country else {}
        self.last_updated = country["last_updated"] if "last_updated" in country else None

    def update(self):

        try:

            time_range = []

            query= {"countries": {"$exists": True, "$ne": []}, "completeness": {"$gt": 0}}
            filter = {"_id": 0, "code": 1, "name": 1, "categories": 1, "min_year": 1, "max_year": 1, "min_value": 1, "max_value": 1, "size": 1, "completeness": 1, "countries": {"$elemMatch": {"code": self.code}}}

            for indicator in find_indicators(query, filter):

                del indicator["countries"][0]["code"]
                del indicator["countries"][0]["factbook"]
                del indicator["countries"][0]["formal_name"]
                del indicator["countries"][0]["name"]
                del indicator["countries"][0]["region"]
                del indicator["countries"][0]["wiki"]

                indicator["countries"][0]["code"] = indicator["code"]
                indicator["countries"][0]["name"] = indicator["name"]
                indicator["countries"][0]["categories"] = indicator["categories"]

                years = [item["year"] for item in indicator["countries"][0]["history"] if isinstance(item["year"], Number)]
                values = [item["value"] for item in indicator["countries"][0]["history"] if isinstance(item["value"], Number)]

                indicator["countries"][0]["min_year"] = min(years) if years else None
                indicator["countries"][0]["max_year"] = max(years) if years else None

                indicator["countries"][0]["min_value"] = min(values) if values else None
                indicator["countries"][0]["max_value"] = max(values) if values else None

                indicator["countries"][0]["min_year_total"] = indicator["min_year"]
                indicator["countries"][0]["max_year_total"] = indicator["max_year"]

                if type(indicator["min_year"]) == int: time_range.append(indicator["min_year"])
                if type(indicator["max_year"]) == int: time_range.append(indicator["max_year"])

                indicator["countries"][0]["min_value_total"] = indicator["min_value"]
                indicator["countries"][0]["max_value_total"] = indicator["max_value"]

                indicator["countries"][0]["size_total"] = indicator["size"]
                indicator["countries"][0]["completeness_total"] = indicator["completeness"]

                self.indicators[indicator["code"].replace(".", "-")] = indicator["countries"][0]

            self.min_year = min(time_range)
            self.max_year = max(time_range)

            self.last_updated = datetime.utcnow().strftime("%Y-%m-%d")

        except:

            pass

        return self

# Setup

First, view a list of all indicators that don't yet have a countries list downloaded.

In [None]:
indicators_with_on_data = find_indicators({"$or": [{"countries": {"$exists": False}}, {"countries": []}]}, {"_id": 0, "code": 1, "name": 1})

for indicator in indicators_with_on_data:

    pp(indicator)

Next, select the indicator you would like to download and copy the code into the cell below. If you would like to download all of the indicators type 'ALL'.

In [None]:
code = "PASTE_CODE_HERE"

# Download

**Note: Depending on your connection speed this step might take some time.**

Now, run the cell below to download the selected indicator(s) and update the database.

In [None]:
if code == "ALL":

    for indicator in indicators_with_on_data:

        update_indicator(Indicator(find_indicator({"code": indicator["code"]})).update().__dict__)

else:

    update_indicator(Indicator(find_indicator({"code": code})).update().__dict__)

Also, run the cell below to update all of the countries in the database with the new indicator(s) data.

In [None]:
for country in find_countries({}, {"_id": 0, "code": 1}):

    update_country(Country(find_country({"code": country["code"]})).update().__dict__)

# Varify (one at a time)

Check that the new indicator is now updated in the database.

In [None]:
code in [indicator["code"] for indicator in find_indicators({"countries": {"$exists": True, "$ne": []}}, {"code": 1})]

If you want you can also view/inspect the freshly downloaded indicator.

In [None]:
find_indicator({"code": code})

You should also check that the countries were updated properly. Run the cell below to verify that all countries now have the new indicator data. If you don't get an error then everything is okay!

In [None]:
keys = []

for country in find_countries():

    if not code.replace(".", "-") in country["indicators"]:

        pp("Error!")

    if len(country["indicators"]) not in keys:

        keys.append(len(country["indicators"]))

if len(keys) > 1:

    pp("Error!")

If you want you can also view/inspect one of the countries to see that the update was successful.

In [None]:
find_country({}, {"_id": 0, "code": 1, "name": 1, "indicators." + code.replace(".", "-"): 1})

**Congratulations, you have successfully downloaded a new indicator and updated the database! You should now repeat this process for all indicators that you would like to download.**