In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from pathlib import Path
data = Path("../data")
plot_path = data / "plots"
map_path = data / "graph3_map"
stat_path = map_path / "stats"
geojson_path = map_path / "geojson"

import json

# Load geojson data

- `custom.geo.json` contains less GeoJSON data than `custom50.json` but stores for each feature/country its name in Japanese (the `name_ja` property), so we need to use it
- However there are a few inconsistencies in the names (`admin` property) between the two files that we first need to fix

In [2]:
# Load custom.geo.json
with open(geojson_path / "custom.geo.json") as f:
    custom = json.load(f)
df_custom = pd.DataFrame(
    [
        {
            "country": feature["properties"]["admin"],
            "country_aff": feature["properties"]["sovereignt"],
            "name_ja": feature["properties"]["name_ja"]
        }
        for feature in custom["features"]
    ]
)

In [3]:
# Load custom50.json
with open(geojson_path / "custom50.json") as f:
    custom50 = json.load(f)

df_custom50 = pd.DataFrame(
    [
        {
            "country": feature["properties"]["admin"],
            "country_aff": feature["properties"]["sovereignt"],
        }
        for feature in custom50["features"]
    ]
)

As mentioned above, `custom50.json` contains more data than `custom.geo.json`:

In [4]:
# The countries that are in custom50 but not in custom
set(df_custom50["country"]) - set(df_custom["country"])

{'Aland',
 'American Samoa',
 'Andorra',
 'Anguilla',
 'Antarctica',
 'Antigua and Barbuda',
 'Aruba',
 'Ashmore and Cartier Islands',
 'Bahrain',
 'Barbados',
 'Bermuda',
 'British Indian Ocean Territory',
 'British Virgin Islands',
 'Cape Verde',
 'Cayman Islands',
 'Comoros',
 'Cook Islands',
 'Curaçao',
 'Czech Republic',
 'Dominica',
 'Faroe Islands',
 'Federated States of Micronesia',
 'French Polynesia',
 'French Southern and Antarctic Lands',
 'Grenada',
 'Guam',
 'Guernsey',
 'Guinea Bissau',
 'Heard Island and McDonald Islands',
 'Hong Kong S.A.R.',
 'Indian Ocean Territories',
 'Isle of Man',
 'Jersey',
 'Kiribati',
 'Liechtenstein',
 'Macao S.A.R',
 'Macedonia',
 'Maldives',
 'Malta',
 'Marshall Islands',
 'Mauritius',
 'Monaco',
 'Montserrat',
 'Nauru',
 'Niue',
 'Norfolk Island',
 'Northern Mariana Islands',
 'Palau',
 'Pitcairn Islands',
 'Republic of Congo',
 'Saint Barthelemy',
 'Saint Helena',
 'Saint Kitts and Nevis',
 'Saint Lucia',
 'Saint Martin',
 'Saint Pierre a

In [5]:
# The countries that are in custom but not in custom50
set(df_custom["country"]) - set(df_custom50["country"])

{'Czechia',
 'Guinea-Bissau',
 'North Macedonia',
 'Republic of the Congo',
 'eSwatini'}

Those correspond to inconsistencies. We fix them by applying the following mapping:
- Czechia -> Czech Republic
- Guinea-Bissau -> Guinea Bissau
- North Macedonia -> Macedonia
- Republic of the Congo -> Republic of Congo
- eSwatini -> Swaziland

In [6]:
custom_to_custom50 = {
    "Czechia": "Czech Republic",
    "Guinea-Bissau": "Guinea Bissau",
    "North Macedonia": "Macedonia",
    "Republic of the Congo": "Republic of Congo",
    "eSwatini": "Swaziland"
}

# Modify the custom json file by changing the all the "admin" and "sovereignt" fields that need it according to the mapping defined in the markdown cell just above
df_custom["country"] = df_custom["country"].apply(lambda x: custom_to_custom50[x] if x in custom_to_custom50 else x)
df_custom["country_aff"] = df_custom["country_aff"].apply(lambda x: custom_to_custom50[x] if x in custom_to_custom50 else x)

# Now the countries that are in custom are a subset of those in custom50
assert set(df_custom["country"]).issubset(set(df_custom50["country"]))

Now we can "join" `custom` to `custom50` by adding the `name_ja` field to `custom50` that correspond to the `name_ja` field of `custom` (based on `admin`)


In [7]:
for feature in custom50["features"]:
    if feature["properties"]["admin"] in df_custom["country"].values:
        feature["properties"]["name_ja"] = df_custom[df_custom["country"] == feature["properties"]["admin"]]["name_ja"].values[0]
    else:
        feature["properties"]["name_ja"] = ""

# Contains now the name_ja field for each feature/country
with open(geojson_path / "custom50_ja.json", "w") as f:
    json.dump(custom50, f)