In [6]:
import pandas as pd
import json
import numpy as np

df_uzemi = pd.read_csv("docs/uzemi_index.csv")
# keep only where uzemi_kod has 6 digits
df_uzemi = df_uzemi[df_uzemi["uzemi_kod"].apply(lambda x: len(str(x)) == 6)]
df_uzemi = df_uzemi.drop(columns=["nb_digits"])
df_uzemi = df_uzemi.set_index("uzemi_kod")

uzemi_codes_which_are_futher_subdivided = [
    554782,  # Praha
    582786,  # Brno
    554821,  # Ostrava
    554791,  # Plzeň
    563889,  # Liberec
    554804,  # Ústí nad Labem
    505927,  # Opava
    555134,  # Pardubice
]

# remove uzemi_codes_which_are_futher_subdivided from df_uzemi
df_uzemi = df_uzemi[~df_uzemi.index.isin(uzemi_codes_which_are_futher_subdivided)]

df_population = pd.read_csv("docs/vira_by_uzemi.csv")
df_population = df_population[["uzemi_kod", "0"]]
# only keep rows where uzemi_kod has 6 digits
df_population = df_population[
    df_population["uzemi_kod"].apply(lambda x: len(str(x)) == 6)
]
df_population = df_population.set_index("uzemi_kod")
df_population.columns = ["population"]

# remove rows from df_population which are further subdivided
df_population = df_population[
    ~df_population.index.isin(uzemi_codes_which_are_futher_subdivided)
]

df_population["uzemi_txt"] = df_uzemi["uzemi_txt"]
df_population = df_population.sort_values(by="population", ascending=False)
df_population

# load obce.json
with open("obce.json", "r") as f:
    obce = json.load(f)

obce["municipalities"]

# for each keep
# hezkyNazev
# souradnice
# adresaUradu.obecKod but might be None
municipalities = []
for obec in obce["municipalities"]:
    municipalities.append(
        {
            "name": obec["hezkyNazev"],
            "lat": obec["souradnice"][0] if obec["souradnice"] is not None else None,
            "lon": obec["souradnice"][1] if obec["souradnice"] is not None else None,
            "uzemi_kod": obec["adresaUradu"]["obecKod"],
        }
    )

df_municipalities = pd.DataFrame(municipalities)

# remove duplicates from df_municipalities["uzemi_kod"]
df_municipalities.loc[
    df_municipalities["uzemi_kod"].isin(
        df_municipalities["uzemi_kod"]
        .value_counts()[df_municipalities["uzemi_kod"].value_counts() > 1]
        .index
    ),
    "uzemi_kod",
] = None

# without uzemi_kod
df_municipalities_without_uzemi_kod = df_municipalities[
    df_municipalities["uzemi_kod"].isna()
]

# keep only unique uzemi_txt rows
df_uzemi_unique = df_uzemi[~df_uzemi["uzemi_txt"].duplicated(keep=False)]
df_uzemi_unique

# Try to match municipalities without code to df_uzemi_unique
for idx, row in df_municipalities_without_uzemi_kod.iterrows():
    matches = df_uzemi_unique[df_uzemi_unique["uzemi_txt"] == row["name"]]
    if len(matches) == 1:
        df_municipalities.loc[idx, "uzemi_kod"] = matches.index[0]


manual_matches = {
    "Brno Řečkovice a Mokrá Hora": "551244",
    "Brno – Královo Pole": "551007",
    "Brno – Maloměřice a Obřany": "551252",
    "Brno – Nový Lískovec": "551112",
    "Brno – Starý Lískovec": "551091",
    "Brno-Jih": "551074",
    "Brno-Sever": "551031",
    "Brno-Střed": "550973",
    "Praha – Dolní Chabry": "547301",
    "Praha – Dolní Měcholupy": "547379",
    "Praha – Dolní Počernice": "538175",
    "Praha – Přední Kopanina": "539589",
    "Praha – Velká Chuchle": "547115",
    "Liberec – Vratislavice nad Nisou": "556891",
    "Pardubice Ii": "555126",
    "Pardubice Iii": "557064",
    "Pardubice Iv": "555096",
    "Pardubice V.": "557072",
    "Pardubice Vi": "555100",
    "Pardubice Vii": "555118",
    "Pardubice Viii": "575020",
    "Plzeň 10 – Lhota": "557978",
    "Plzeň 2 – Slovany": "545988",
    "Plzeň 5 – Křimice": "554731",
    "Plzeň 6 – Litice": "554758",
    "Plzeň 7 – Radčice": "554766",
    "Plzeň 8 – Černice": "554774",
    "Plzeň 9 – Malesice": "559199",
    "Ústí nad Labem – Neštěmice": "502081",
    "Ústí nad Labem – Severní Terasa": "501298",
    "Ústí nad Labem – Střekov": "502316",
    "Ústí nad Labem – Město": "567892",
    "Albrechtice N.o.": "576077",
    "Žďár N.o.": "576956",
    "Libavá": "500160",
    "Opočno": "576590",
    "Přibyslav": "569321",
    "Bořetice": "584347",
    "Bystré": "577928",
    "Chlístov": "590703",
    "Cvrčovice": "532185",
    "Dobřany": "576280",
    "Hrabová": "554669",
    "Ivaň": "589578",
    "Janov": "578134",
    "Jilem": "548413",
    "Jílovice": "576352",
    "Kojetín": "568881",
    "Komárov": "531324",
    "Kounov": "541907",
    "Kozlov": "500135",
    "Kožlí": "568902",
    "Králova Lhota": "549525",
    "Křepice": "584592",
    "Lhotka": "531898",
    "Lípa": "569038",
    "Michalovice": "548316",
    "Mostek": "580660",
    "Nová Ves": "535648",
    "Olešná": "569216",
    "Pavlov": "569241",
    "Plesná": "554740",
    "Přestavlky": "530131",
    "Radostín": "569364",
    "Sedliště": "540692",
    "Skorkov": "548324",
    "Skuhrov": "569470",
    "Slavětín": "548553",
    "Třebovice": "581071",
    "Val": "576875",
    "Vilémovice": "569721",
    "Vítkovice": "554227",
    "Voděrady": "576891",
    "Věžnice": "569704",
    "Čestice": "576221",
    "Bystré": "576166",
    "Chlístov": "548791",
    "Cvrčovice": "550272",
    "Ivaň": "584517",
    "Janov": "576328",
    "Komárov": "555355",
    "Kounov": "576395",
    "Kozlov": "568899",
    "Králova Lhota": "576409",
    "Lhotka": "554324",
    "Mostek": "579556",
    "Nová Ves": "554367",
    "Plesná": "554723",
    "Třebovice": "554715",
    "Liberec": "556904",
    "Opava": "555321",
    # 'Brno'
    # 'Ostrava'
    # 'Pardubice',
    # 'Plzeň'
    # 'Ústí nad Labem'
    # 'Vojenský Újezd Boletice',
    # 'Vojenský Újezd Březina'
    # 'Vojenský Újezd Hradiště',
    # 'Vojenský Újezd Libavá'
}

for name, code in manual_matches.items():
    if not (df_municipalities["uzemi_kod"] == code).any():
        df_municipalities.loc[
            (df_municipalities["name"] == name)
            & (df_municipalities["uzemi_kod"].isna()),
            "uzemi_kod",
        ] = code

# df_municipalities_without_uzemi_kod = df_municipalities[
#     df_municipalities["uzemi_kod"].isna()
# ]
# df_municipalities["uzemi_kod"].value_counts()
# df_municipalities_without_uzemi_kod

# remove without uzemi_kod
df_municipalities = df_municipalities.dropna(subset=["uzemi_kod"])
df_municipalities["uzemi_kod"] = df_municipalities["uzemi_kod"].astype(np.int64)
df_municipalities = df_municipalities.set_index("uzemi_kod")
df_municipalities.index

# # is index unique
df_municipalities.index.is_unique

# Manual lat/lon fixes
updates = {
    511986: (49.5297197, 18.6379925),  # Horní Lomná
    532401: (50.3186994, 14.0854961),  # Jarpice
    532886: (49.8198953, 14.5779383),  # Chářovice
    533157: (50.2465686, 14.1544156),  # Žižice
    537811: (50.0965347, 15.1071400),  # Sokoleč
    546500: (49.1891972, 15.0663606),  # Jarošov nad Nežárkou
    547107: (50.0023100, 14.4139308),  # Praha 12
    547824: (49.1440289, 16.9372367),  # Křižanovice
    548081: (49.5535722, 15.3182783),  # Jiřice
    552852: (49.4777986, 14.8121044),  # Pohnání
    554669: (49.8438169, 16.9522664),  # Hrabová
    555401: (49.8757544, 17.9756942),  # Podvihov
    557005: (49.4483792, 13.3555800),  # Předslav
    557226: (49.5740331, 18.7871447),  # Písečná
    561258: (49.7658206, 12.9100972),  # Svojšín
    562823: (50.9502286, 14.4966581),  # Staré Křečany
    563617: (50.6736633, 15.2915439),  # Jílové u Držkova
    565237: (49.5656328, 14.9903181),  # Lukavec
    571300: (49.7612769, 15.9863908),  # Dědová
    573558: (49.5444964, 15.4499875),  # Boňkov
    575020: (50.0034006, 15.8650483),  # Pardubice Viii
    580635: (49.8730808, 16.5979622),  # Luková
    583791: (49.2548625, 16.5109633),  # Rozdrojovice
    584673: (48.8342756, 16.9314750),  # Moravský Žižkov
    587770: (49.3686878, 16.2010442),  # Milešín
    588768: (49.2473075, 17.2069419),  # Morkovice-Slížany
    591122: (49.1804881, 15.8276308),  # Mastník
    591637: (49.2896056, 15.9496867),  # Rudíkov
    592919: (49.0532531, 16.8374183),  # Bošovice
    595705: (49.5761806, 16.3208161),  # Chlum-Korouhvice
    548308: (49.6298828, 15.6227342),  # Kyjov (okr. Havlíčkův Brod)
}

for code, (lat, lon) in updates.items():
    df_municipalities.loc[code, ["lat", "lon"]] = [lat, lon]

# check which lat and lon are missing
df_municipalities[df_municipalities["lat"].isna() | df_municipalities["lon"].isna()]

df_municipalities.to_csv("docs/municipalities.csv")