In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from pathlib import Path
data = Path("../data")
plot_path = data / "plots"
map_path = data / "graph3_map"
stat_path = map_path / "stats"
geojson_path = map_path / "geojson"

import json

# Load geojson data


In [21]:
# Load custom.geo.json
with open(geojson_path / "custom.geo.json") as f:
    custom = json.load(f)
df_custom = pd.DataFrame(
    [
        {
            "country": feature["properties"]["admin"],
            "country_aff": feature["properties"]["sovereignt"],
            "name_ja": feature["properties"]["name_ja"]
        }
        for feature in custom["features"]
    ]
)
df_custom.sort_values(by="country_aff").to_csv(map_path / "custom_countries.csv", index=False)

In [22]:
# Load custom50.json
with open(geojson_path / "custom50.json") as f:
    custom50 = json.load(f)

df_custom50 = pd.DataFrame(
    [
        {
            "country": feature["properties"]["admin"],
            "country_aff": feature["properties"]["sovereignt"],
        }
        for feature in custom50["features"]
    ]
)
df_custom50.sort_values(by="country_aff").to_csv(map_path / "custom50_countries.csv", index=False)

In [4]:
# The countries that are in custom but not in custom50
set(df_custom["country"]) - set(df_custom50["country"])

{'Czechia',
 'Guadeloupe',
 'Guinea-Bissau',
 'Martinique',
 'Mayotte',
 'North Macedonia',
 'Republic of the Congo',
 'Réunion',
 'eSwatini'}

Fixed:
- Czechia -> Czech Republic
- Guinea-Bissau -> Guinea Bissau
- North Macedonia -> Macedonia
- Republic of the Congo -> Republic of Congo
- eSwatini -> Swaziland

In [26]:
custom_to_custom50 = {
    "Czechia": "Czech Republic",
    "Guinea-Bissau": "Guinea Bissau",
    "North Macedonia": "Macedonia",
    "Republic of the Congo": "Republic of Congo",
    "eSwatini": "Swaziland"
}


# Modify the custom json file by changing the all the "admin" and "sovereignt" fields that need it according to the mapping defined in the markdown cell just above
for feature in custom["features"]:
    if feature["properties"]["admin"] in custom_to_custom50:
        feature["properties"]["admin"] = custom_to_custom50[feature["properties"]["admin"]]
    if feature["properties"]["sovereignt"] in custom_to_custom50:
        feature["properties"]["sovereignt"] = custom_to_custom50[feature["properties"]["sovereignt"]]

df_custom = pd.DataFrame(
    [
        {
            "country": feature["properties"]["admin"],
            "country_aff": feature["properties"]["sovereignt"],
            "name_ja": feature["properties"]["name_ja"]
        }
        for feature in custom["features"]
    ]
)
df_custom.sort_values(by="country_aff").to_csv(map_path / "custom_countries.csv", index=False)

# The countries that are in custom but not in custom50
set(df_custom["country"]) - set(df_custom50["country"])

{'Guadeloupe', 'Martinique', 'Mayotte', 'Réunion'}

In [28]:
# "Join" custom to custom50 by adding the "name_ja" field to custom50 that correspond to the "name_ja" field of custom (based on "admin")
for feature in custom50["features"]:
    if feature["properties"]["admin"] in df_custom["country"].values:
        feature["properties"]["name_ja"] = df_custom[df_custom["country"] == feature["properties"]["admin"]]["name_ja"].values[0]

    
with open(geojson_path / "custom50_processed.json", "w") as f:
    json.dump(custom50, f)

In [5]:
# The countries that are in custom50 but not in custom
set(df_custom50["country"]) - set(df_custom["country"])

{'Aland',
 'American Samoa',
 'Andorra',
 'Anguilla',
 'Antarctica',
 'Antigua and Barbuda',
 'Aruba',
 'Ashmore and Cartier Islands',
 'Bahrain',
 'Barbados',
 'Bermuda',
 'British Indian Ocean Territory',
 'British Virgin Islands',
 'Cape Verde',
 'Cayman Islands',
 'Comoros',
 'Cook Islands',
 'Curaçao',
 'Czech Republic',
 'Dominica',
 'Faroe Islands',
 'Federated States of Micronesia',
 'French Polynesia',
 'French Southern and Antarctic Lands',
 'Grenada',
 'Guam',
 'Guernsey',
 'Guinea Bissau',
 'Heard Island and McDonald Islands',
 'Hong Kong S.A.R.',
 'Indian Ocean Territories',
 'Isle of Man',
 'Jersey',
 'Kiribati',
 'Liechtenstein',
 'Macao S.A.R',
 'Macedonia',
 'Maldives',
 'Malta',
 'Marshall Islands',
 'Mauritius',
 'Monaco',
 'Montserrat',
 'Nauru',
 'Niue',
 'Norfolk Island',
 'Northern Mariana Islands',
 'Palau',
 'Pitcairn Islands',
 'Republic of Congo',
 'Saint Barthelemy',
 'Saint Helena',
 'Saint Kitts and Nevis',
 'Saint Lucia',
 'Saint Martin',
 'Saint Pierre a

# Custom / Countries

In [9]:
df_countries = pd.read_csv(map_path / "stats/country_num_users.csv")
df_countries["country"].sort_values().to_csv(map_path / "countries.csv", index=False)

In [10]:
# The countries that are in countries but not in custom50 (do not have geojson) or have different names
set(df_countries["country"]) - set(df_custom50["country"])

{'Bahamas',
 'Bouvet Island',
 'Brunei Darussalam',
 'Cabo Verde',
 'Canarias',
 'Christmas Island',
 'Congo',
 'Congo DRC',
 'Curacao',
 "Côte d'Ivoire",
 'Eswatini',
 'French Guiana',
 'North Macedonia',
 'Null Island',
 'Palestinian Territory',
 'Pitcairn',
 'Russian Federation',
 'Réunion',
 'Serbia',
 'Svalbard',
 'Tanzania',
 'US Virgin Islands',
 'United States',
 'Vatican City'}

Non fixed:
- Bouvet Island
- Canarias
- Christmas Island
- French Guiana
- Null Island
- Réunion
- Svalbard

In [19]:
# The countries that are in custom50 but not in countries (do not have users or have different names) - 61
set(df_custom50["country"]) - set(df_countries["country"])

{'Aland',
 'American Samoa',
 'Ashmore and Cartier Islands',
 'British Indian Ocean Territory',
 'Brunei',
 'Burundi',
 'Cape Verde',
 'Central African Republic',
 'Cook Islands',
 'Curaçao',
 'Democratic Republic of the Congo',
 'East Timor',
 'Eritrea',
 'Ethiopia',
 'Falkland Islands',
 'Federated States of Micronesia',
 'French Southern and Antarctic Lands',
 'Gambia',
 'Guinea Bissau',
 'Heard Island and McDonald Islands',
 'Hong Kong S.A.R.',
 'Indian Ocean Territories',
 'Ivory Coast',
 'Japan',
 'Lesotho',
 'Macao S.A.R',
 'Macedonia',
 'Malawi',
 'Montserrat',
 'Nauru',
 'Niger',
 'Niue',
 'Norfolk Island',
 'Northern Cyprus',
 'Palau',
 'Palestine',
 'Pitcairn Islands',
 'Republic of Congo',
 'Republic of Serbia',
 'Russia',
 'Rwanda',
 'Saint Barthelemy',
 'Saint Helena',
 'Saint Martin',
 'Sao Tome and Principe',
 'Siachen Glacier',
 'Sint Maarten',
 'Somaliland',
 'South Sudan',
 'Swaziland',
 'Tajikistan',
 'The Bahamas',
 'Tonga',
 'Turkmenistan',
 'United Republic of Ta