In [13]:
import pandas as pd
import numpy as np
import pycountry

# Read the data from pickle files
data_names = ["CRS", "BII", "DIS", "EF", "MTI", "PAKA", "RLI"]
dfs = {}

for data_name in data_names:
    df = pd.read_pickle("pickle/" + data_name + ".pkl")
    dfs[data_name] = df

# Use a common name for index
dfs["CRS"].rename(columns={"index": "ISO3"}, inplace=True)
dfs["BII"].rename(columns={"region": "ISO3"}, inplace=True)
dfs["DIS"].rename(columns={"ISO": "ISO3"}, inplace=True)
dfs["EF"].rename(columns={"ISO alpha-3 code": "ISO3"}, inplace=True)
dfs["MTI"].rename(columns={"ISO": "ISO3"}, inplace=True)
dfs["PAKA"].rename(columns={"ISO": "ISO3"}, inplace=True)
dfs["RLI"].rename(columns={"ISO": "ISO3"}, inplace=True)

'''Convert alpha-3 ISO code to numeric 3166-1 ISO code'''
def convert_alpha3_to_numeric(alpha_3):
    try:
        country = pycountry.countries.get(alpha_3=alpha_3)
    except LookupError as e:
        return np.NAN
    if country is None:
        return np.NAN
    return country.numeric

for data_name, df in dfs.items():
    df["id"] = df["ISO3"].apply(convert_alpha3_to_numeric)
    df.dropna(subset=["id"], inplace=True)
    df["id"] = pd.to_numeric(df["id"], errors="coerce")

# Export as CSV
for data_name, df in dfs.items():
    df.columns = map(str.lower, df.columns)
    if data_name == "EF":
        df["value"] = df["total"]
    if data_name == "MTI":
        df["value"] = df["mti"]
    if data_name != "CRS":
        df = df[["id", "year", "value"]]
    df.to_csv("csv/" + data_name + ".csv", index=False)

# Diff the countries in the biodiversity data with the countries in the database

In [2]:
all_ids = set()

for name, df in dfs.items():
    all_ids = all_ids.union(set(df["id"]))

db_countries = {
    4,
    8,
    10,
    12,
    24,
    31,
    32,
    36,
    40,
    44,
    50,
    51,
    56,
    64,
    68,
    70,
    72,
    76,
    84,
    90,
    96,
    100,
    104,
    108,
    112,
    116,
    120,
    124,
    140,
    144,
    148,
    152,
    156,
    158,
    170,
    178,
    180,
    188,
    191,
    192,
    196,
    203,
    204,
    208,
    214,
    218,
    222,
    226,
    231,
    232,
    233,
    238,
    242,
    246,
    250,
    260,
    262,
    266,
    268,
    270,
    275,
    276,
    288,
    300,
    304,
    320,
    324,
    328,
    332,
    340,
    348,
    352,
    356,
    360,
    364,
    368,
    372,
    376,
    380,
    384,
    388,
    392,
    398,
    400,
    404,
    408,
    410,
    414,
    417,
    418,
    422,
    426,
    428,
    430,
    434,
    440,
    442,
    450,
    454,
    458,
    466,
    478,
    484,
    496,
    498,
    499,
    504,
    508,
    512,
    516,
    524,
    528,
    540,
    548,
    554,
    558,
    562,
    566,
    578,
    586,
    591,
    598,
    600,
    604,
    608,
    616,
    620,
    624,
    626,
    630,
    634,
    642,
    643,
    646,
    682,
    686,
    688,
    694,
    703,
    704,
    705,
    706,
    710,
    716,
    724,
    728,
    729,
    732,
    740,
    748,
    752,
    756,
    760,
    762,
    764,
    768,
    780,
    784,
    788,
    792,
    795,
    800,
    804,
    807,
    818,
    826,
    834,
    840,
    854,
    858,
    860,
    862,
    887,
    894,
}

missing_countries = {pycountry.countries.get(numeric=f"{id:03}") for id in all_ids - db_countries}
for c in missing_countries:
    print(c.flag, c.name)

🇰🇾 Cayman Islands
🇵🇲 Saint Pierre and Miquelon
🇫🇲 Micronesia, Federated States of
🇲🇨 Monaco
🇲🇻 Maldives
🇰🇮 Kiribati
🇳🇫 Norfolk Island
🇨🇽 Christmas Island
🇧🇻 Bouvet Island
🇰🇳 Saint Kitts and Nevis
🇵🇫 French Polynesia
🇹🇻 Tuvalu
🇸🇹 Sao Tome and Principe
🇲🇭 Marshall Islands
🇳🇺 Niue
🇬🇬 Guernsey
🇦🇽 Åland Islands
🇷🇪 Réunion
🇮🇲 Isle of Man
🇨🇨 Cocos (Keeling) Islands
🇩🇲 Dominica
🇼🇫 Wallis and Futuna
🇧🇶 Bonaire, Sint Eustatius and Saba
🇲🇹 Malta
🇬🇮 Gibraltar
🇮🇴 British Indian Ocean Territory
🇦🇸 American Samoa
🇧🇧 Barbados
🇺🇲 United States Minor Outlying Islands
🇳🇷 Nauru
🇬🇵 Guadeloupe
🇦🇮 Anguilla
🇱🇨 Saint Lucia
🇸🇽 Sint Maarten (Dutch part)
🇧🇭 Bahrain
🇲🇵 Northern Mariana Islands
🇸🇨 Seychelles
🇱🇮 Liechtenstein
🇭🇰 Hong Kong
🇼🇸 Samoa
🇸🇬 Singapore
🇻🇦 Holy See (Vatican City State)
🇦🇩 Andorra
🇹🇨 Turks and Caicos Islands
🇲🇸 Montserrat
🇧🇱 Saint Barthélemy
🇻🇨 Saint Vincent and the Grenadines
🇦🇼 Aruba
🇬🇩 Grenada
🇦🇬 Antigua and Barbuda
🇨🇰 Cook Islands
🇸🇭 Saint Helena, Ascension and Tristan da Cunha
🇲🇶 Martiniq