In [None]:
"""Create thesaurus associated to the regions of the world.

The thesaurus include regions, subregions and intermediate regions. The
thesaury also includes historical regions, subregions and intermediate 
regions, for compatibility with Scopus.

"""
import pandas as pd  # type: ignore
import pycountry
from countryinfo import CountryInfo

#
# Current countries with regions
data_frame = pd.read_csv(
    "https://raw.githubusercontent.com/lukes/ISO-3166-Countries-with-Regional-Codes/master/all/all.csv",
    dtype={
        "country-code": str,
        "region-code": str,
        "sub-region-code": str,
        "intermediate-region-code": str,
    },
)
data_frame = data_frame[["name", "alpha-2", "alpha-3", "country-code", "region", "sub-region"]]

#
# Completes the table with historical data from pycountry
df_pycountry = pd.DataFrame(
    {
        "name": [country.name for country in pycountry.historic_countries],
        "alpha-2": [country.alpha_2 for country in pycountry.historic_countries],
        "alpha-3": [country.alpha_3 for country in pycountry.historic_countries],
        #         "country-code": [country.numeric for country in pycountry.historic_countries],
    }
)


# Complete region with NaN
for fields in [
    ("region", "name"),
    ("sub-region", "region"),
]:
    for index, row in data_frame.iterrows():
        if pd.isna(row[fields[0]]):
            row[fields[0]] = row[fields[1]]


data_frame = data_frame.sort_values(by="alpha-2")
with open("../thesaurus/countries.txt", "w") as file:
    for _, row in data_frame.iterrows():
        file.write(f"{row['name'].lower()}\n")
        file.write(f"    {row['name']}\n")


# data_frame = data_frame.sort_values(by="alpha-2")
# with open("../thesaurus/alpha2-to-country.txt", "w") as file:
#     for _, row in data_frame.iterrows():
#         file.write(f"{row['alpha-2']}\n")
#         file.write(f"    {row['name'].lower()}\n")

# data_frame = data_frame.sort_values(by="name")
# with open("../thesaurus/country-to-region.txt", "w") as file:
#     for _, row in data_frame.iterrows():
#         file.write(f"{row['name'].lower()}\n")
#         file.write(f"    {row['region'].lower()}\n")

# data_frame = data_frame.sort_values(by="name")
# with open("../thesaurus/country-to-sub-region.txt", "w") as file:
#     for _, row in data_frame.iterrows():
#         file.write(f"{row['name'].lower()}\n")
#         file.write(f"    {row['sub-region'].lower()}\n")

In [None]:
#