**Notebook** : setup_meteo_folder.ipynb

**Description** : Préparation du dossier meteo/ pour diohine/modeleCommun

In [1]:
from pathlib import Path
import pandas as pd
import geopandas as gpd
from typing import List, Union

## 1. Creation des dossiers `meteo/` et `meteo/simulee/` dans le dossier `diohine/modeleCommun`

In [2]:
# Définir le chemin de base du projet
base_dir = Path.cwd().parent.resolve()

# Définir les chemins spécifiques
meteo_dir = base_dir / "diohine" / "modeleCommun" / "meteo"
simulee_dir = meteo_dir / "simulee"

In [3]:
# Créer meteo/ s'il n'existe pas
if not meteo_dir.exists():
    meteo_dir.mkdir(parents=True, exist_ok=True)
    print(f"Création du dossier : {meteo_dir}")
else:
    print(f"Dossier déjà existant : {meteo_dir}")

Dossier déjà existant : C:\Users\Cheikhou\Desktop\Ferlo_Sine\maelia-instanciation-ferlo-sine\diohine\modeleCommun\meteo


In [4]:
# Créer simulee/ s'il n'existe pas
if not simulee_dir.exists():
    simulee_dir.mkdir(parents=True, exist_ok=True)
    print(f"Création du dossier : {simulee_dir}")
else:
    print(f"Dossier déjà existant : {simulee_dir}")

Dossier déjà existant : C:\Users\Cheikhou\Desktop\Ferlo_Sine\maelia-instanciation-ferlo-sine\diohine\modeleCommun\meteo\simulee


In [5]:
# Résumé de la situation
print("\nStructure prévue :")
print(f"- {meteo_dir}")
print(f"- {simulee_dir}")


Structure prévue :
- C:\Users\Cheikhou\Desktop\Ferlo_Sine\maelia-instanciation-ferlo-sine\diohine\modeleCommun\meteo
- C:\Users\Cheikhou\Desktop\Ferlo_Sine\maelia-instanciation-ferlo-sine\diohine\modeleCommun\meteo\simulee


## 2. Data Cleaning des donnees climatiques brutes issues de CRNM-ESM2-1

### 2.1. Exploratory Data Analysis

In [6]:
# Définir manuellement le chemin vers le fichier
fichier_climat = base_dir / "data" / "climat" / "niakharData" / "CNRM-ESM2-1_ssp126.csv"

# Lire le fichier
df = pd.read_csv(fichier_climat, sep=",")  # Ajuste le séparateur si besoin

# Afficher les 5 premières lignes
df.head()

Unnamed: 0,time,rsds,tasmin,tasmax,hurs,sfcWind,pr
0,date,W/m2,Celsius,Celsius,%,m/s,mm/day
1,1950-01-01 12:00:00,271.7951354980469,17.899871826171875,34.546234130859375,26.503536224365234,5.849405765533447,0.0
2,1950-01-02 12:00:00,273.6487121582031,16.197845458984375,34.462249755859375,29.381072998046875,4.4467082023620605,0.0
3,1950-01-03 12:00:00,272.430908203125,12.836456298828125,35.140869140625,33.332759857177734,2.7305986881256104,0.0
4,1950-01-04 12:00:00,268.4210205078125,13.605072021484375,34.72869873046875,41.25194549560547,2.702117681503296,0.0


In [7]:
df.columns

Index(['time', 'rsds', 'tasmin', 'tasmax', 'hurs', 'sfcWind', 'pr'], dtype='object')

In [8]:
df.dtypes

time       object
rsds       object
tasmin     object
tasmax     object
hurs       object
sfcWind    object
pr         object
dtype: object

In [9]:
df.describe()

Unnamed: 0,time,rsds,tasmin,tasmax,hurs,sfcWind,pr
count,36891,36891.0,36891.0,36891.0,36891.0,36891.0,36891.0
unique,36891,36781.0,35123.0,34352.0,36834.0,36827.0,11415.0
top,2050-12-31 12:00:00,331.67315673828125,24.517486572265625,34.53302001953125,82.84820556640625,3.3301892280578613,0.0
freq,1,2.0,4.0,4.0,2.0,2.0,25475.0


In [10]:
df.sample(10)

Unnamed: 0,time,rsds,tasmin,tasmax,hurs,sfcWind,pr
4213,1961-07-14 12:00:00,251.80455017089844,24.57464599609375,33.681793212890625,79.45977020263672,2.361743688583374,1.5325095097068695
8521,1973-04-30 12:00:00,324.2193908691406,19.8414306640625,37.624114990234375,50.763126373291016,4.608160495758057,0.0
23477,2014-04-11 12:00:00,355.81231689453125,18.241912841796875,38.36154174804688,45.77432632446289,4.340848445892334,0.0
23359,2013-12-14 12:00:00,190.09169006347656,19.66802978515625,32.473175048828125,38.76617813110352,4.398098468780518,0.0
8466,1973-03-06 12:00:00,335.1317138671875,20.0943603515625,38.73016357421875,32.464290618896484,5.341839790344238,0.0
17032,1996-08-18 12:00:00,277.2318115234375,23.17266845703125,33.492034912109375,80.38040161132812,2.776437759399414,0.0
22195,2010-10-07 12:00:00,262.3690185546875,24.01446533203125,33.32586669921875,78.00859069824219,2.102344274520874,0.9769607917405664
21356,2008-06-20 12:00:00,311.9302062988281,22.54989624023437,35.729705810546875,74.05864715576172,4.142176628112793,0.1982951951504219
5968,1966-05-04 12:00:00,336.3537292480469,20.402496337890625,37.89849853515625,50.71974182128906,6.565980434417725,0.0
3369,1959-03-23 12:00:00,271.0419921875,17.019622802734375,33.650421142578125,62.01554107666016,5.913414001464844,0.0


### 2.2. Nettoyage des donnees

In [11]:
# Définir les chemins
input_dir = base_dir / "data" / "climat" / "niakharData"
output_dir = base_dir / "data" / "climat" / "processed"

# Créer output_dir s'il n'existe pas
output_dir.mkdir(parents=True, exist_ok=True)

# Charger le fichier climat brut
fichier_climat = input_dir / "CNRM-ESM2-1_ssp126.csv"

df_raw = pd.read_csv(fichier_climat, sep=",")

In [12]:
# Affichage rapide pour vérifier
print("Colonnes d'origine :", df_raw.columns.tolist())
print("Premières lignes :")
display(df_raw.head())

Colonnes d'origine : ['time', 'rsds', 'tasmin', 'tasmax', 'hurs', 'sfcWind', 'pr']
Premières lignes :


Unnamed: 0,time,rsds,tasmin,tasmax,hurs,sfcWind,pr
0,date,W/m2,Celsius,Celsius,%,m/s,mm/day
1,1950-01-01 12:00:00,271.7951354980469,17.899871826171875,34.546234130859375,26.503536224365234,5.849405765533447,0.0
2,1950-01-02 12:00:00,273.6487121582031,16.197845458984375,34.462249755859375,29.381072998046875,4.4467082023620605,0.0
3,1950-01-03 12:00:00,272.430908203125,12.836456298828125,35.140869140625,33.332759857177734,2.7305986881256104,0.0
4,1950-01-04 12:00:00,268.4210205078125,13.605072021484375,34.72869873046875,41.25194549560547,2.702117681503296,0.0


In [13]:
# Supprimer la première ligne (unités)
df_clean = df_raw.drop(index=0).reset_index(drop=True)

# Parser la colonne 'time' en datetime
df_clean["time"] = pd.to_datetime(df_clean["time"], format="%Y-%m-%d %H:%M:%S")

# Conversion des colonnes numériques
cols_to_convert = ["rsds", "tasmin", "tasmax", "hurs", "sfcWind", "pr"]
for col in cols_to_convert:
    df_clean[col] = df_clean[col].astype(float)

# Renommage des colonnes
df_clean = df_clean.rename(columns={
    "time": "DATE",
    "pr": "RRmm",
    "tasmin": "Tmin",
    "tasmax": "Tmax",
    "rsds": "RGI"
})

In [14]:
# Affichage rapide pour vérifier
print("Colonnes d'origine :", df_clean.columns.tolist())
print("Premières lignes :")
display(df_clean.head())

Colonnes d'origine : ['DATE', 'RGI', 'Tmin', 'Tmax', 'hurs', 'sfcWind', 'RRmm']
Premières lignes :


Unnamed: 0,DATE,RGI,Tmin,Tmax,hurs,sfcWind,RRmm
0,1950-01-01 12:00:00,271.795135,17.899872,34.546234,26.503536,5.849406,0.0
1,1950-01-02 12:00:00,273.648712,16.197845,34.46225,29.381073,4.446708,0.0
2,1950-01-03 12:00:00,272.430908,12.836456,35.140869,33.33276,2.730599,0.0
3,1950-01-04 12:00:00,268.421021,13.605072,34.728699,41.251945,2.702118,0.0
4,1950-01-05 12:00:00,265.328827,14.521393,34.071106,45.360634,3.337594,0.0


### 2.4. Approximation de l'ETP (formule simplifiée Hargreaves-Samani)

In [15]:
# Calcul ETP
df_clean["ETP"] = 0.0023 * ((df_clean["Tmax"] - df_clean["Tmin"]) ** 0.5) * (df_clean["Tmax"] + df_clean["Tmin"] + 17.8)

### 2.5. Conversion RGI

In [16]:
# Conversion RGI (de W/m² vers MJ/m²/jour)
df_clean["RGI"] = (df_clean["RGI"] * 86400) / 1_000_000

### 2.5. Sauvegrader dans `climat/processed/`

In [17]:
# Réorganiser les colonnes selon ordre documentation
# (Pas encore d'ID_PDG pour l'instant, on l'ajoutera plus tard)

# Reformater la date au format DD/MM/YYYY
df_clean["DATE"] = df_clean["DATE"].dt.strftime("%d/%m/%Y")

df_clean = df_clean[["DATE", "RRmm", "Tmin", "Tmax", "ETP", "RGI"]]

output_file = output_dir / "climat_diohine_clean_global.csv"
df_clean.to_csv(output_file, sep=";", index=False)

print(f"Fichier nettoyé sauvegardé ici : {output_file}")

Fichier nettoyé sauvegardé ici : C:\Users\Cheikhou\Desktop\Ferlo_Sine\maelia-instanciation-ferlo-sine\data\climat\processed\climat_diohine_clean_global.csv


## 3. Ajout des `ID_PDG`

In [18]:
## Helper Functions

def extract_attributes_from_shapefile(
    shapefile_path: Union[str, Path],
    attributes: List[str],
    include_geometry: bool = False
) -> pd.DataFrame:
    """
    Reads a shapefile and returns a DataFrame with the specified attributes.

    Parameters:
    - shapefile_path (str or Path): Path to the shapefile.
    - attributes (List[str]): List of attribute column names to extract.
    - include_geometry (bool): If True, includes the geometry column in the result.

    Returns:
    - pd.DataFrame: DataFrame containing the specified attributes (and geometry if requested).
    """
    # Convert to Path object if necessary
    shapefile_path = Path(shapefile_path)

    # Read the shapefile
    gdf = gpd.read_file(shapefile_path)

    # Ensure all requested attributes exist in the shapefile
    missing_attrs = [attr for attr in attributes if attr not in gdf.columns]
    if missing_attrs:
        raise ValueError(f"The following attributes are not present in the shapefile: {missing_attrs}")

    # Select the desired columns
    selected_columns = attributes.copy()
    if include_geometry:
        selected_columns.append(gdf.geometry.name)  # Typically 'geometry'

    # Return the DataFrame with selected columns
    return gdf[selected_columns]

In [19]:
# Définir le chemin du shapefile (corrigé)
shapefile_path = base_dir / "data" / "climat" / "shapefiles" / "Parcellaire_Diohine_2.shp"

# Définir les attributs à extraire
attributes_to_extract = ["ID_PDG"]  # Adapter selon les besoins

# Appeler la fonction
df_attributes = extract_attributes_from_shapefile(shapefile_path, attributes_to_extract)

# Afficher les premiers résultats
df_attributes.head()

Unnamed: 0,ID_PDG
0,1
1,2
2,3
3,4
4,5


In [20]:
id_pdg_list = df_attributes["ID_PDG"].unique().tolist()

# Verificat
print(f"Number of unique ID_PDG: {len(id_pdg_list)}")
print("Sample ID_PDGs:", id_pdg_list[:5])

Number of unique ID_PDG: 9331
Sample ID_PDGs: [1, 2, 3, 4, 5]


In [21]:
# S'assurer que DATE est bien en datetime
df_clean["DATE"] = pd.to_datetime(df_clean["DATE"], format="%d/%m/%Y")

# Définir les années cibles (2018 à 2050)
target_years = list(range(2018, 2051))

# Dossier de sortie
simulee_dir = base_dir / "diohine" / "modeleCommun" / "meteo" / "simulee"
simulee_dir.mkdir(parents=True, exist_ok=True)

# Récupérer les années réellement disponibles
available_years = df_clean["DATE"].dt.year.unique()
available_years_set = set(available_years)

In [22]:
# Boucle sur les années cibles
for annee in target_years:
    if annee not in available_years_set:
        print(f"⚠️ {annee} non disponible dans les données, ignorée.")
        continue

    df_year = df_clean[df_clean["DATE"].dt.year == annee].copy()

    # Reformater la date
    df_year["DATE"] = df_year["DATE"].dt.strftime("%d/%m/%Y")

    # Dupliquer pour chaque ID_PDG
    df_list = []
    for id_pdg in id_pdg_list:
        temp_df = df_year.copy()
        temp_df["ID_PDG"] = id_pdg
        df_list.append(temp_df)

    df_final_year = pd.concat(df_list, ignore_index=True)

    # Réordonner les colonnes
    ordered_cols = ["ID_PDG", "DATE", "RRmm", "Tmin", "Tmax", "ETP", "RGI"]
    df_final_year = df_final_year[ordered_cols]

    # Vérification
    assert df_final_year.columns.tolist() == ordered_cols, f"Erreur de colonnes pour {annee}"
    print(f"✅ {annee} : {len(df_final_year)} lignes générées")

    # Sauvegarde
    output_file = simulee_dir / f"{annee}.csv"
    df_final_year.to_csv(output_file, sep=";", index=False)
    print(f"💾 {output_file} sauvegardé")

✅ 2018 : 3405815 lignes générées
💾 C:\Users\Cheikhou\Desktop\Ferlo_Sine\maelia-instanciation-ferlo-sine\diohine\modeleCommun\meteo\simulee\2018.csv sauvegardé
✅ 2019 : 3405815 lignes générées
💾 C:\Users\Cheikhou\Desktop\Ferlo_Sine\maelia-instanciation-ferlo-sine\diohine\modeleCommun\meteo\simulee\2019.csv sauvegardé
✅ 2020 : 3415146 lignes générées
💾 C:\Users\Cheikhou\Desktop\Ferlo_Sine\maelia-instanciation-ferlo-sine\diohine\modeleCommun\meteo\simulee\2020.csv sauvegardé
✅ 2021 : 3405815 lignes générées
💾 C:\Users\Cheikhou\Desktop\Ferlo_Sine\maelia-instanciation-ferlo-sine\diohine\modeleCommun\meteo\simulee\2021.csv sauvegardé
✅ 2022 : 3405815 lignes générées
💾 C:\Users\Cheikhou\Desktop\Ferlo_Sine\maelia-instanciation-ferlo-sine\diohine\modeleCommun\meteo\simulee\2022.csv sauvegardé
✅ 2023 : 3405815 lignes générées
💾 C:\Users\Cheikhou\Desktop\Ferlo_Sine\maelia-instanciation-ferlo-sine\diohine\modeleCommun\meteo\simulee\2023.csv sauvegardé
✅ 2024 : 3415146 lignes générées
💾 C:\Users\Ch