In [None]:
import duckdb
from pipelines.tasks.config.common import DUCKDB_FILE
from pipelines.tasks.client.https_client import HTTPSClient
from pipelines.tasks.config.common import CACHE_FOLDER
import json
import os
import pandas as pd
from tqdm import tqdm

In [None]:
https_client = HTTPSClient(
    "https://public.opendatasoft.com/api/explore/v2.1/catalog/datasets/"
)

In [None]:
path = "georef-france-commune/exports/geojson?lang=fr&timezone=Africa%2FLagos"
filepath = os.path.join(CACHE_FOLDER, "georef-france-commune.geojson")
https_client.download_file_from_https(path, filepath)

In [None]:
con = duckdb.connect(database=DUCKDB_FILE, read_only=True)

In [None]:
query_2024 = """
select * from ana__resultats_communes
"""

prelevements_2024 = con.sql(query_2024)
prelevements_2024_df = prelevements_2024.df()
prelevements_2024_df.head(2)

In [None]:
with open(
    os.path.join(CACHE_FOLDER, "georef-france-commune.geojson"), "r", encoding="utf-8"
) as file:
    data_geo = json.load(file)

In [None]:
data_geo_features = data_geo["features"][0:1000]
for i in tqdm(range(len(data_geo_features))):
    elem = data_geo_features[i]
    code_insee = elem["properties"]["bv2022_code"]
    if code_insee is not None:
        code_insee = code_insee[0]
        prelevement = prelevements_2024_df[
            (prelevements_2024_df.commune_code_insee == code_insee)
            & (prelevements_2024_df.annee == 2024)
        ]
        if len(prelevement) == 1:
            for column in prelevement:
                elem[column] = str(prelevement[column].iloc[0])

In [None]:
new_geo_json = {"type": "FeatureCollection"}
new_geo_json["features"] = data_geo_features

In [None]:
# Write the dictionary to a GeoJSON file
with open(
    os.path.join(CACHE_FOLDER, "georef-france-commune-prelevement.geojson"),
    "w",
    encoding="utf-8",
) as file:
    json.dump(new_geo_json, file, indent=4)

# Tests


In [None]:
df_geo = pd.DataFrame(data_geo["features"])
df_geo_flatten = df_geo.join(pd.json_normalize(df_geo["properties"]))
df_geo_flatten.bv2022_code.map(lambda x: len(x) if x is not None else 0).value_counts()

df_geo = pd.DataFrame(data_geo["features"])
df_geo_flatten = df_geo.join(pd.json_normalize(df_geo["properties"]))
df_geo_flatten.bv2022_code.map(lambda x: len(x) if x is not None else 0).value_counts()
