In [17]:
import pandas as pd
import numpy as np
import pycountry
from db_countries import db_countries

# Read the data from pickle files
data_names = ["CRS", "BII", "DIS", "EF", "MTI", "PAKA", "RLI"]
dfs = {}

for data_name in data_names:
    df = pd.read_pickle("pickle/" + data_name + ".pkl")
    dfs[data_name] = df

# Use a common name for index
dfs["CRS"].rename(columns={"index": "ISO3"}, inplace=True)
dfs["BII"].rename(columns={"region": "ISO3"}, inplace=True)
dfs["DIS"].rename(columns={"ISO": "ISO3"}, inplace=True)
dfs["EF"].rename(columns={"ISO alpha-3 code": "ISO3"}, inplace=True)
dfs["MTI"].rename(columns={"ISO": "ISO3"}, inplace=True)
dfs["PAKA"].rename(columns={"ISO": "ISO3"}, inplace=True)
dfs["RLI"].rename(columns={"ISO": "ISO3"}, inplace=True)

'''Convert alpha-3 ISO code to numeric 3166-1 ISO code'''
def convert_alpha3_to_numeric(alpha_3):
    try:
        country = pycountry.countries.get(alpha_3=alpha_3)
    except LookupError as e:
        return np.NAN
    if country is None:
        return np.NAN
    return country.numeric

for data_name, df in dfs.items():
    df["id"] = df["ISO3"].apply(convert_alpha3_to_numeric)
    df.dropna(subset=["id"], inplace=True)
    df["id"] = pd.to_numeric(df["id"], errors="coerce")

# Export as CSV
for data_name, df in dfs.items():
    df.columns = map(str.lower, df.columns)
    if data_name == "EF":
        df["value"] = df["total"]
    if data_name == "MTI":
        df["value"] = df["mti"]
    if data_name != "CRS":
        df = df[["id", "year", "value"]]
    
    df = df[df["id"].isin(db_countries)]

    df.to_csv("csv/" + data_name + ".csv", index=False)

# Diff the countries in the biodiversity data with the countries in the database

In [18]:
all_ids = set()

for name, df in dfs.items():
    all_ids = all_ids.union(set(df["id"]))

missing_countries = {pycountry.countries.get(numeric=f"{id:03}") for id in all_ids - db_countries}
for c in missing_countries:
    print(c.flag, c.name)

🇳🇺 Niue
🇬🇬 Guernsey
🇨🇨 Cocos (Keeling) Islands
🇷🇪 Réunion
🇮🇲 Isle of Man
🇩🇲 Dominica
🇬🇮 Gibraltar
🇧🇶 Bonaire, Sint Eustatius and Saba
🇲🇹 Malta
🇦🇽 Åland Islands
🇦🇸 American Samoa
🇮🇴 British Indian Ocean Territory
🇳🇷 Nauru
🇦🇮 Anguilla
🇬🇵 Guadeloupe
🇺🇲 United States Minor Outlying Islands
🇸🇽 Sint Maarten (Dutch part)
🇱🇨 Saint Lucia
🇦🇼 Aruba
🇸🇨 Seychelles
🇱🇮 Liechtenstein
🇧🇭 Bahrain
🇲🇵 Northern Mariana Islands
🇦🇩 Andorra
🇹🇨 Turks and Caicos Islands
🇸🇬 Singapore
🇻🇦 Holy See (Vatican City State)
🇬🇩 Grenada
🇨🇰 Cook Islands
🇧🇱 Saint Barthélemy
🇻🇨 Saint Vincent and the Grenadines
🇲🇭 Marshall Islands
🇵🇳 Pitcairn
🇬🇸 South Georgia and the South Sandwich Islands
🇲🇸 Montserrat
🇸🇭 Saint Helena, Ascension and Tristan da Cunha
🇲🇶 Martinique
🇰🇲 Comoros
🇯🇪 Jersey
🇻🇬 Virgin Islands, British
🇲🇺 Mauritius
🇸🇯 Svalbard and Jan Mayen
🇨🇻 Cabo Verde
🇲🇴 Macao
🇧🇲 Bermuda
🇵🇼 Palau
🇻🇮 Virgin Islands, U.S.
🇬🇫 French Guiana
🇹🇰 Tokelau
🇲🇫 Saint Martin (French part)
🇹🇻 Tuvalu
🇬🇺 Guam
🇾🇹 Mayotte
🇨🇼 Curaçao
🇭🇰 Hong Kong
🇸