In [4]:
import pandas as pd
import pycountry

from update_countries import generate_countries, read_countries
from utils import (
    insert_section,
    read_html_to_lines,
    write_lines_to_html,
)

from pathlib import Path

In [6]:
TA_HTML_PATH = Path.cwd().parent / "public_html/travel_achievements.html"
COUNTRIES_PATH = Path.cwd() / "data/countries.txt"

In [7]:
lines_html = read_html_to_lines(TA_HTML_PATH)
countries = read_countries(COUNTRIES_PATH)
html_countries = generate_countries(countries)
lines_html = insert_section(lines_html, html_countries, "countries")
write_lines_to_html(lines_html, TA_HTML_PATH)

In [8]:
[c for c in pycountry.countries if "Cy" in c.name]

[Country(alpha_2='CY', alpha_3='CYP', flag='🇨🇾', name='Cyprus', numeric='196', official_name='Republic of Cyprus')]

In [5]:
df_sites = pd.read_excel("data/whc-sites-2021.xls")

df_sites_en = df_sites[[c for c in df_sites.columns if "_fr" not in c]]

df_sites_cats = df_sites_en[
    [
        "name_en",
        "short_description_en",
        "iso_code",
        "category",
        "states_name_en",
        "criteria_txt",
        "rev_bis",
    ]
].copy()

df_sites_cats["iso_code"] = df_sites_cats["iso_code"].fillna("")

In [None]:
def get_flag(iso_code: str) -> str:
    c = iso_code.split(",")[0].upper()
    country = pycountry.countries.get(alpha_2=c)
    flg = country.flag if country else ""
    return flg


place_descriptors = [
    "city of",
    "old city of",
    "old town of",
    "ancient city of",
    "historic centre of",
    "historic city of",
    "historic town of",
    "sacred city of",
]

known_cities = [
    "Venice",
    "Budapest",
    "Brasilia",
    "Rio de Janeiro",
    "Tiwanaku",
    "Asmara",
    "Aksum",
    "Nice",
    "Fatehpur Sikri",
    "Jaipur",
    "Dholavira",
    "Hatra",
    "Babylon",
    "Ashur",
    "Anjar",
    "Baalbek",
    "Namhansanseong",
    "San Marino",
    "Kairouan",
    "Ephesus",
    "L'viv",
]

visited_cities = [
    "Brugge",
    "Prague",
    "Florence",
    "Venice",
    "Siena",
    "Naples",
    "Jerusalem",
    "Valletta",
    "Warsaw",
    "Saint Petersburg",
    "Bath",
    "Rome",
    "L'viv",
    "San Marino",
    "Dubrovnik",
    "Vienna",
]


def extract_name_descriptor(place_name: str, descriptor: str) -> str:
    descriptor_length = len(descriptor.split())
    place_name_words = place_name.split(",")[0].split(" – ")[0].split(": ")[0].split()
    place_name_words = place_name_words[descriptor_length:]

    city_name = []
    for word in place_name_words:
        if (word.lower() in ["the", "city", "town", "of"]) & (not city_name):
            continue
        if (word[0].upper() == word[0]) | (word in ["de", "los"]):
            city_name.append(word)
        else:
            break

    return " ".join(city_name)


def get_city_name(place_name: str) -> str:
    for place_descriptor in place_descriptors:
        if place_name.lower().startswith(place_descriptor):
            return extract_name_descriptor(place_name, place_descriptor)
    for known_city in known_cities:
        if place_name.lower().startswith(known_city.lower()):
            return known_city
    return "No city"

In [None]:
lines_html = read_html_to_lines(TA_HTML_PATH)

html_whs = []
i = 0
for _, r in df_sites_cats.iterrows():
    # if 'city' in r['short_description_en'].lower():
    #     if not get_city_name(r['name_en']):
    #         print(r['name_en'])
    #         print('=====', r['short_description_en'])
    # continue
    city = get_city_name(r["name_en"])
    if city:
        i += 1
        country_flag = get_flag(r["iso_code"])
        country_name = r["states_name_en"].split(" (")[0]
        visited = "&#x2714" if city in visited_cities else "&#x25A2"
        description = r["name_en"]
        # print(i, r['name_en'])
        # print('--- ', f"{city} {country_flag} {country_name} ")
        html_whs.append("<p>\n")
        html_whs.append(f"    {visited};\n")
        html_whs.append(f'    <span title="{description}">{city}</span>\n')
        html_whs.append(f'    <span title="{country_name}">{country_flag}</span>\n')
        html_whs.append("</p>\n\n")

lines_html = insert_section(lines_html, html_whs, "whs")
write_lines_to_html(lines_html, TA_HTML_PATH)