# Imports

In [None]:
import os
import pandas as pd
import geopandas as gpd
from utils import download_file, S3Manager
from pyproj import CRS


# Notebook parameters

In [11]:
URL_SIC_N2000 = "https://inpn.mnhn.fr/docs/Shape/sic.zip"
SIC_N2000 = "../data/Natura_2000/sic.zip"

URL_ZPS_N2000 = "https://inpn.mnhn.fr/docs/Shape/zps.zip"
ZPS_N2000 = "../data/Natura_2000/zps.zip"

UPLOAD_TO_S3 = False
LOCAL_FOLDER = "../data/Natura_2000/"
OUTPUT_N2000_PARQUET = "Merged_n2000.parquet"

In [12]:
# Initialize s3 manager
s3_manager = S3Manager()

# Downloading and processing of the data 

## SIC

In [13]:
# "Sites classés au titre de la Directive Habitats : périmètres transmis à la CE (ZSC/pSIC/SIC)"
if os.path.exists(SIC_N2000):
    nat2000_sic = gpd.read_file(SIC_N2000)
else:
    download_file(url=URL_SIC_N2000, save_path=SIC_N2000)
    nat2000_sic = gpd.read_file(SIC_N2000)

## ZPS

In [14]:
# "Zones de protection spéciale (ZPS)"
if os.path.exists(ZPS_N2000):
    nat2000_zps = gpd.read_file(ZPS_N2000)
else:
    download_file(url=URL_ZPS_N2000, save_path=ZPS_N2000)
    nat2000_zps = gpd.read_file(ZPS_N2000)

# Merging both overlapping dataset

In [15]:
nat2000 = gpd.GeoDataFrame(
    geometry=[pd.concat([nat2000_sic, nat2000_zps]).union_all()], crs=nat2000_sic.crs
)
# Explode the multipolygon to get the individual polygons (one for each site)
nat2000 = nat2000.explode()

# Assertion tests and upload to s3

In [None]:
# Vérification du CRS
assert CRS(nat2000.crs).to_epsg() == 2154, "Le CRS du fichier Natura 2000 n'est pas EPSG:2154"

# Vérification des colonnes
assert list(nat2000.columns) == ["geometry"], (
    "Le dataframe doit contenir uniquement une colonne 'geometry'"
)

# Définition du chemin de sauvegarde
local_file = os.path.join(LOCAL_FOLDER, OUTPUT_N2000_PARQUET)

# Sauvegarde au format Parquet avec gestion des erreurs
try:
    nat2000.to_parquet(local_file)
except Exception as e:
    RuntimeError(f"Échec de l'enregistrement du fichier {local_file}: {e}")

# Upload vers S3 si activé et si le fichier existe
if UPLOAD_TO_S3:
    if os.path.exists(local_file):
        s3_manager.upload_to_s3(
            file_path=local_file, s3_key=os.path.join("static_layers", OUTPUT_N2000_PARQUET)
        )
    else:
        print(f"Le fichier {local_file} n'existe pas, upload annulé.")