### Notebook Pour une rentrée en sciences 2026

In [1]:
import openmeteo_requests
import requests
import pandas as pd
import requests_cache
import os
from retry_requests import retry

In [3]:


def geocode_prefecture_ban(prefecture_name: str, department: str | None = None):
    """
    Geocode a French prefecture (city) using BAN API.
    Returns (lat, lon, label) or raises ValueError if not found.
    """
    q = prefecture_name if department is None else f"{prefecture_name}, {department}"
    url = "https://api-adresse.data.gouv.fr/search/"
    r = requests.get(url, params={"q": q, "limit": 1, "type": "municipality"}, timeout=30)
    r.raise_for_status()
    data = r.json()

    features = data.get("features", [])
    if not features:
        raise ValueError(f"No result for prefecture query: {q}")

    feat = features[0]
    lon, lat = feat["geometry"]["coordinates"]   # BAN returns [lon, lat]
    label = feat["properties"].get("label", q)
    return lat, lon, label


# --- Choose your prefecture here ---
prefecture = "Aurillac"          # example
department = "Cantal"         # optional but helps disambiguate

lat, lon, label = geocode_prefecture_ban(prefecture, department)
print(f"Prefecture geocoded: {label} -> lat={lat}, lon={lon}")


Prefecture geocoded: Aurillac -> lat=44.918771, lon=2.435053


In [2]:
pref = pd.read_csv("../data/pref_lat_lon.csv", sep=";")
pref["lat"] = pref["Geo Point"].apply(lambda x: float(x.split(",")[0]))
pref["lon"] = pref["Geo Point"].apply(lambda x: float(x.split(",")[1]))
pref = pref[['Code INSEE', 'Commune', 'Service', 'lat', 'lon']]
# pref = pref[pref.Service == 'Préfecture']

In [37]:
pref[pref['Code INSEE'].str.startswith('34')]

Unnamed: 0,Code INSEE,Commune,Service,lat,lon
185,34142,Lodève,Sous-préfecture,43.733155,3.319806
261,34172,Montpellier,Préfecture de région,43.606948,3.875238
309,34032,Béziers,Sous-préfecture,43.344687,3.230507


In [None]:
def get_open_meteo_url(longitude, latitude, year, hourly_variables):
    """
    Récupération de l'url de l'API Open-Météo

    Parameters
    ----------
    longitude : float
        DESCRIPTION.
    latitude : float
        DESCRIPTION.
    year : int
        DESCRIPTION.
    hourly_variables : list of str or str
        DESCRIPTION.

    Returns
    -------
    url : str
        DESCRIPTION.

    """
    if isinstance(hourly_variables, list):
        hourly_variables = ','.join(hourly_variables)
    tod = pd.Timestamp(date.today())
    
    # Si l'année demandée n'est pas terminée, il faut modifier les périodes requêtées
    end_month, end_day = 12, 31
    if year == tod.year:
        end_day = tod.strftime('%d')
        end_month = tod.strftime('%m')
        
    url = 'https://archive-api.open-meteo.com/v1/archive?latitude={}&longitude={}&start_date={}-01-01&end_date={}-{}-{}&hourly={}&timezone=Europe%2FBerlin'.format(latitude,longitude,year,year,end_month,end_day,hourly_variables)
    # print(url)
    return url


def open_meteo_historical_data(longitude, latitude, year, hourly_variables=['temperature_2m','direct_radiation_instant'], force=False):
    """
    Ouverture des fichiers meteo

    Parameters
    ----------
    longitude : float
        DESCRIPTION.
    latitude : float
        DESCRIPTION.
    year : int
        DESCRIPTION.
    hourly_variables : str or list of str, optional
        DESCRIPTION. The default is ['temperature_2m','direct_radiation_instant'].
    force : boolean, optional
        DESCRIPTION. The default is False.

    Returns
    -------
    data : pandas DataFrame
        DESCRIPTION.

    """
    # TODO : peut-etre mettre un nom de ville en entrée et en faire des nom de sauvegarde plus lisible
    if isinstance(hourly_variables, list):
        hourly_variables_str = ','.join(hourly_variables)
    else:
        hourly_variables_str = hourly_variables
        
    save_path = os.path.join('data','Open-Meteo')
    save_name = '{}_{}_{}_{}.csv'.format(hourly_variables_str, year, longitude, latitude)
    save_name_units = '{}_{}_{}_{}_units.txt'.format(hourly_variables_str, year, longitude, latitude)

    if save_name not in os.listdir(save_path) or force:
        url = get_open_meteo_url(longitude, latitude, year, hourly_variables)
        response = requests.get(url)
        # print(year, response)
        json_data = response.json()

        units = json_data.get('hourly_units')
        with open(os.path.join(save_path,save_name_units), 'w') as f:
            for col, unit in units.items():
                f.write('{} : {} \n'.format(col,unit))
        
        data = pd.DataFrame().from_dict(json_data.get('hourly'))
        data.to_csv(os.path.join(save_path,save_name), index=False)
        
    data = pd.read_csv(os.path.join(save_path,save_name))
    data = data.set_index('time')
    data.index = pd.to_datetime(data.index)
    return data

In [26]:
def fetch_daily_dataframe(
    variables,
    lon_list,
    lat_list,
    start_date="1950-01-05",
    end_date="2025-12-31",
    model="era5"
):
    """
    Fetch daily variables from Open-Meteo archive and return a DataFrame
    indexed by (date, lon, lat).
    """
    assert len(lon_list) == len(lat_list), "lon_list and lat_list must have same length"

    params = {
        "latitude": lat_list,
        "longitude": lon_list,
        "start_date": start_date,
        "end_date": end_date,
        "daily": variables,
        "models": model,
    }

    responses = openmeteo.weather_api(
        "https://archive-api.open-meteo.com/v1/archive",
        params=params
    )

    frames = []

    # One response per location
    for lon, lat, response in zip(lon_list, lat_list, responses):
        daily = response.Daily()

        dates = pd.date_range(
            start=pd.to_datetime(daily.Time(), unit="s", utc=True),
            end=pd.to_datetime(daily.TimeEnd(), unit="s", utc=True),
            freq=pd.Timedelta(seconds=daily.Interval()),
            inclusive="left",
        )

        data = {"date": dates}
        for i, var in enumerate(variables):
            data[var] = daily.Variables(i).ValuesAsNumpy()

        df_loc = (
            pd.DataFrame(data)
            .assign(lon=lon, lat=lat)
            .set_index(["date", "lon", "lat"])
        )

        frames.append(df_loc)

    return pd.concat(frames).sort_index()


In [28]:
lons = pref[pref.Service == 'Préfecture de région'].lon.to_list()
lats = pref[pref.Service == 'Préfecture de région'].lat.to_list()
lons = lons[:2]
lats = lats[:2]

df = fetch_daily_dataframe(
    variables=["snowfall_sum", "temperature_2m_max", "temperature_2m_min", "temperature_2m_mean"],
    lon_list=lons,
    lat_list=lats,)
seuils = pd.read_csv("../data/seuils_canicules.csv", sep=",")
seuils['dep'] = seuils['dep'].apply(lambda x: str(x).zfill(2))
seuils

OpenMeteoRequestsError: failed to request 'https://archive-api.open-meteo.com/v1/archive': {'reason': 'Daily API request limit exceeded. Please try again tomorrow.', 'error': True}

In [25]:
df['canicule'] = df

Unnamed: 0_level_0,snowfall_sum,temperature_2m_max,temperature_2m_min,temperature_2m_mean
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1950-01-05 00:00:00+00:00,0.0,8.721499,7.7215,8.056915
1950-01-06 00:00:00+00:00,0.0,8.071500,4.1215,6.248583
1950-01-07 00:00:00+00:00,0.0,9.371500,4.1715,6.338167
1950-01-08 00:00:00+00:00,0.0,7.071500,1.8215,4.023583
1950-01-09 00:00:00+00:00,0.0,10.521500,2.5215,5.954834
...,...,...,...,...
2025-12-27 00:00:00+00:00,0.0,0.021500,-2.7285,-1.366000
2025-12-28 00:00:00+00:00,0.0,3.171500,-2.3285,-0.295166
2025-12-29 00:00:00+00:00,0.0,0.871500,-3.5785,-1.532667
2025-12-30 00:00:00+00:00,0.0,5.321500,-0.8785,1.488166


In [None]:
os.makedirs("../output/", exist_ok=True)
for _, row in pref.iloc[:2].iterrows():
    code_insee = row['Code INSEE']
    dept = code_insee[:2]
    lon = row['lon']
    lat = row['lat']
    df = fetch_daily_dataframe(
        variables=["temperature_2m_max", "temperature_2m_min", "snowfall_sum"],
        lon=lon,
        lat=lat,
        start_date="1950-01-01",
        end_date="2025-12-31",
        model="era5"
    )
    df.index = pd.to_datetime(df.index)
    os.makedirs(f"../output/{dept}", exist_ok=True)
    output_path = f"../output/{dept}/{code_insee}_timeseries.csv"

In [None]:
df