In [7]:
#!/usr/bin/python3

import cdsapi
import zipfile
import netCDF4
from netCDF4 import num2date
import numpy as np
import pandas as pd
import os
import tempfile

def load_era5_vent_dataframe():
    # === Étape 1 : Télécharger le fichier ZIP depuis CDS ===
    client = cdsapi.Client()
    zip_path = './vent_era5_2023_01.zip'

    client.retrieve(
        'reanalysis-era5-single-levels',
        {
            "product_type": "reanalysis",
            "variable": [
                "10m_u_component_of_wind",
                "10m_v_component_of_wind",
                "100m_u_component_of_wind",
                "100m_v_component_of_wind",
                "10m_u_component_of_neutral_wind",
                "10m_v_component_of_neutral_wind",
                "10m_wind_gust_since_previous_post_processing",
                "instantaneous_10m_wind_gust"
            ],
            "year": "2023",
            "month": ["01"],
            "day": ["01", "02"],
            "time": ["00:00", "06:00", "12:00", "18:00"],
            "format": "netcdf"
        },
        zip_path
    )

    # === Étape 2 : Extraction dans un dossier temporaire ===
    extract_dir = tempfile.mkdtemp(prefix="era5_extract_")
    print(f"📂 Extraction dans : {extract_dir}")

    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(extract_dir)
        nc_files = [f for f in zip_ref.namelist() if f.endswith('.nc')]
        if not nc_files:
            raise FileNotFoundError("Aucun fichier .nc trouvé dans l'archive.")
        nc_path = os.path.join(extract_dir, nc_files[0])

    # === Étape 3 : Lecture du fichier NetCDF ===
    f = netCDF4.Dataset(nc_path)
    print("📄 Variables NetCDF disponibles :", list(f.variables.keys()))

    # Détection automatique de la variable temporelle
    try:
        time_var_name = [k for k in f.variables if 'time' in k.lower()][0]
        print(f"✅ Variable temporelle détectée : '{time_var_name}'")
    except IndexError:
        raise KeyError("❌ Aucune variable temporelle ('time') trouvée dans ce fichier NetCDF.")

    time_var = f.variables[time_var_name]
    times = num2date(time_var[:], time_var.units)
    latitudes = f.variables['latitude'][:]
    longitudes = f.variables['longitude'][:]

    # === Étape 4 : Création du DataFrame tabulaire
    times_grid, lat_grid, lon_grid = [
        x.flatten() for x in np.meshgrid(times, latitudes, longitudes, indexing='ij')
    ]

    df = pd.DataFrame({
        'time': [t.isoformat() for t in times_grid],
        'latitude': lat_grid,
        'longitude': lon_grid
    })

    # === Étape 5 : Utiliser les noms réels dans le NetCDF
    variables = {
        "u10": "u10",
        "v10": "v10",
        "u100": "u100",
        "v100": "v100",
        "u10n": "u10n",
        "v10n": "v10n",
        "gust": "i10fg",  # instant gust (m/s)
    }

    data_arrays = {}

    for short_name, var_name in variables.items():
        if var_name in f.variables:
            print(f"📦 Lecture : {var_name}")
            data = f.variables[var_name][:]
            df[short_name] = data.flatten()
            data_arrays[short_name] = data
        else:
            print(f"⚠️ Variable absente : {var_name}")

    # === Étape 6 : Calcul des vitesses du vent
    def compute_speed(u, v):
        return np.sqrt(u**2 + v**2)

    if "u10" in data_arrays and "v10" in data_arrays:
        df["wind_speed_10m"] = compute_speed(data_arrays["u10"], data_arrays["v10"]).flatten()

    if "u100" in data_arrays and "v100" in data_arrays:
        df["wind_speed_100m"] = compute_speed(data_arrays["u100"], data_arrays["v100"]).flatten()

    if "u10n" in data_arrays and "v10n" in data_arrays:
        df["wind_speed_10m_neutral"] = compute_speed(data_arrays["u10n"], data_arrays["v10n"]).flatten()

    print("\n✅ Données chargées dans le DataFrame Pandas : df")
    return df

# === Exemple d'utilisation ===
df = load_era5_vent_dataframe()
print(df.head())


2025-05-12 06:38:32,413 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.
2025-05-12 06:38:32,802 INFO Request ID is 40040847-12b2-45d8-bfce-64b9cbba98c0
2025-05-12 06:38:32,877 INFO status has been updated to accepted
2025-05-12 06:38:54,435 INFO status has been updated to running
2025-05-12 06:39:05,911 INFO status has been updated to successful
                                                                                         

📂 Extraction dans : C:\Users\ASUS\AppData\Local\Temp\era5_extract_b85bs3h1
📄 Variables NetCDF disponibles : ['number', 'valid_time', 'latitude', 'longitude', 'expver', 'u10', 'v10', 'u100', 'v100', 'u10n', 'v10n', 'i10fg']
✅ Variable temporelle détectée : 'valid_time'
📦 Lecture : u10
📦 Lecture : v10
📦 Lecture : u100
📦 Lecture : v100
📦 Lecture : u10n
📦 Lecture : v10n
📦 Lecture : i10fg

✅ Données chargées dans le DataFrame Pandas : df
                  time  latitude  longitude       u10       v10      u100  \
0  2023-01-01T00:00:00      90.0       0.00  2.161926  5.936081  4.055939   
1  2023-01-01T00:00:00      90.0       0.25  2.161926  5.936081  4.055939   
2  2023-01-01T00:00:00      90.0       0.50  2.161926  5.936081  4.055939   
3  2023-01-01T00:00:00      90.0       0.75  2.161926  5.936081  4.055939   
4  2023-01-01T00:00:00      90.0       1.00  2.161926  5.936081  4.055939   

       v100      u10n      v10n      gust  wind_speed_10m  wind_speed_100m  \
0  8.184479  2.087875 