### Notebook Pour une rentrée en sciences 2026

In [15]:
import openmeteo_requests

import requests
import pandas as pd
import requests_cache
import os
from retry_requests import retry

In [16]:


def geocode_prefecture_ban(prefecture_name: str, department: str | None = None):
    """
    Geocode a French prefecture (city) using BAN API.
    Returns (lat, lon, label) or raises ValueError if not found.
    """
    q = prefecture_name if department is None else f"{prefecture_name}, {department}"
    url = "https://api-adresse.data.gouv.fr/search/"
    r = requests.get(url, params={"q": q, "limit": 1, "type": "municipality"}, timeout=30)
    r.raise_for_status()
    data = r.json()

    features = data.get("features", [])
    if not features:
        raise ValueError(f"No result for prefecture query: {q}")

    feat = features[0]
    lon, lat = feat["geometry"]["coordinates"]   # BAN returns [lon, lat]
    label = feat["properties"].get("label", q)
    return lat, lon, label


# --- Choose your prefecture here ---
prefecture = "Aurillac"          # example
department = "Cantal"         # optional but helps disambiguate

lat, lon, label = geocode_prefecture_ban(prefecture, department)
print(f"Prefecture geocoded: {label} -> lat={lat}, lon={lon}")


Prefecture geocoded: Aurillac -> lat=44.918771, lon=2.435053


In [17]:
pref = pd.read_csv("data/pref_lat_lon.csv", sep=";")
pref["lat"] = pref["Geo Point"].apply(lambda x: float(x.split(",")[0]))
pref["lon"] = pref["Geo Point"].apply(lambda x: float(x.split(",")[1]))
pref = pref[['Code INSEE', 'Commune', 'Service', 'lat', 'lon']]
# pref = pref[pref.Service == 'Préfecture']

In [18]:
pref[pref['Code INSEE'].str.startswith('34')]

Unnamed: 0,Code INSEE,Commune,Service,lat,lon
185,34142,Lodève,Sous-préfecture,43.733155,3.319806
261,34172,Montpellier,Préfecture de région,43.606948,3.875238
309,34032,Béziers,Sous-préfecture,43.344687,3.230507


In [19]:
import os
import pandas as pd
import requests_cache
from retry_requests import retry
import openmeteo_requests

def fetch_daily_dataframe(
    variables,
    locations,  # list of (lat, lon) tuples
    start_date="1950-01-01",
    end_date="2025-12-31",
    model="era5",
    cache_csv="temp/daily_data.csv",
):
    """
    Returns DataFrame indexed by (date, lon, lat)
    locations: [(lat, lon), ...]
    """

    os.makedirs(os.path.dirname(cache_csv), exist_ok=True)

    # --- Try CSV cache ---
    if os.path.exists(cache_csv):
        df_existing = pd.read_csv(cache_csv, parse_dates=["date"])
        # Ensure it has the expected index columns
        required_cols = {"date", "lon", "lat"}
        if required_cols.issubset(df_existing.columns):
            # check locations match (as a set of pairs)
            existing_locs = set(zip(df_existing["lat"].unique(), df_existing["lon"].unique()))
            requested_locs = set(locations)

            # check variables present
            vars_ok = all(v in df_existing.columns for v in variables)

            if (requested_locs.issubset(existing_locs)
                and vars_ok):
                print(f"Using cached daily data from {cache_csv}")
                return df_existing.set_index(["date", "lon", "lat"]).sort_index()

    # --- Fetch from API ---
    lat_list = [lat for lat, lon in locations]
    lon_list = [lon for lat, lon in locations]

    params = {
        "latitude": lat_list,
        "longitude": lon_list,
        "start_date": start_date,
        "end_date": end_date,
        "daily": variables,
        "models": model,
    }

    cache_session = requests_cache.CachedSession('temp/.cache', expire_after=-1)
    retry_session = retry(cache_session, retries=5, backoff_factor=0.2)
    openmeteo = openmeteo_requests.Client(session=retry_session)

    responses = openmeteo.weather_api(
        "https://archive-api.open-meteo.com/v1/archive",
        params=params
    )

    frames = []
    # IMPORTANT: pair using the same `locations` ordering that was sent
    for (lat, lon), response in zip(locations, responses):
        daily = response.Daily()

        dates = pd.date_range(
            start=pd.to_datetime(daily.Time(), unit="s", utc=True),
            end=pd.to_datetime(daily.TimeEnd(), unit="s", utc=True),
            freq=pd.Timedelta(seconds=daily.Interval()),
            inclusive="left",
        )

        data = {"date": dates}
        for i, var in enumerate(variables):
            data[var] = daily.Variables(i).ValuesAsNumpy()

        df_loc = (
            pd.DataFrame(data)
            .assign(lon=lon, lat=lat)
            .set_index(["date", "lon", "lat"])
        )
        frames.append(df_loc)

    df = pd.concat(frames).sort_index()

    # save cache (as flat table for robust reload)
    df.reset_index().to_csv(cache_csv, index=False)

    return df


In [20]:
lons = pref[pref.Service == 'Préfecture de région'].lon.to_list()
lats = pref[pref.Service == 'Préfecture de région'].lat.to_list()
df = fetch_daily_dataframe(
    variables=["snowfall_sum", "temperature_2m_max", "temperature_2m_min", "temperature_2m_mean"],
    locations=list(zip(lats, lons)))
seuils = pd.read_csv("data/seuils_canicules.csv", sep=",")
seuils['dep'] = seuils['dep'].astype(str).str.zfill(2)

df = pd.read_csv("temp/daily_data.csv",)
df = df.reset_index()
pref_region = pref[pref.Service == 'Préfecture de région']
df.loc[:, 'lon'] = df['lon'].astype(float).round(4)
pref_region.loc[:, 'lon'] = pref_region['lon'].astype(float).round(4)
df = df.merge(
    pref_region[['Code INSEE', 'Commune', 'lon']],
    on=['lon'],
    how='left'
)

In [21]:
df['dept'] = df['Code INSEE'].apply(lambda x: str(x)[:2])
df = df.merge(seuils, left_on='dept', right_on='dep', how='left')
df['canicule'] = (df['temperature_2m_max'] >= df['smax'])&(df['temperature_2m_min'] >= df['smin'])

In [22]:
df.groupby('Commune')['temperature_2m_max'].mean().sort_values(ascending=False)

Commune
Orléans                 19.113201
Caen                    18.408519
Limoges                 18.405642
Châlons-en-Champagne    18.189463
Lyon                    18.013420
Lille                   15.566972
Poitiers                15.489894
Rouen                   15.481997
Amiens                  15.405394
Marseille               15.273907
Metz                    15.232579
Ajaccio                 15.198598
Nantes                  14.832908
Dijon                   14.778251
Clermont-Ferrand        14.605919
Rennes                  14.051954
Bordeaux                13.526669
Montpellier             13.347299
Besançon                13.052149
Strasbourg              12.645703
Toulouse                12.563209
Name: temperature_2m_max, dtype: float64

In [23]:
def make_plot(df, col, pref_name, out_path=None):
    import matplotlib.pyplot as plt

    dict_col = {
        'temperature_2m_mean': 'Tempature moyenne 2m (°C)',
        'snowfall_sum': 'Chute de neige annuelle (cm)',
        'canicule': 'Nombre de jours de canicule',
    }

    plt.figure(figsize=(12, 6))
    plt.plot(df['year'], df[col], label=col)
    plt.title(f"{dict_col.get(col, col)} ")
    plt.xlabel("Année")
    plt.ylabel(dict_col.get(col, col))
    plt.legend()
    plt.grid()
    if out_path is not None:
        plt.savefig(out_path)
    plt.close()

In [24]:
df_marseille = df[df['Commune'] == 'Marseille']
df_marseille

Unnamed: 0,index,date,lon,lat,snowfall_sum,temperature_2m_max,temperature_2m_min,temperature_2m_mean,Code INSEE,Commune,dept,dep,smin,smax,canicule
16,16,1950-01-01 00:00:00+00:00,5.3801,43.300213,0.0,4.658,-2.042,1.303833,13055,Marseille,13,13,24,35,False
37,37,1950-01-02 00:00:00+00:00,5.3801,43.300213,0.0,4.108,-2.092,0.333000,13055,Marseille,13,13,24,35,False
58,58,1950-01-03 00:00:00+00:00,5.3801,43.300213,0.0,7.408,0.008,4.355917,13055,Marseille,13,13,24,35,False
79,79,1950-01-04 00:00:00+00:00,5.3801,43.300213,0.0,6.858,6.108,6.514250,13055,Marseille,13,13,24,35,False
100,100,1950-01-05 00:00:00+00:00,5.3801,43.300213,0.0,7.158,6.058,6.389250,13055,Marseille,13,13,24,35,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
582850,582850,2025-12-27 00:00:00+00:00,5.3801,43.300213,0.0,12.208,0.058,4.597584,13055,Marseille,13,13,24,35,False
582871,582871,2025-12-28 00:00:00+00:00,5.3801,43.300213,0.0,9.958,-1.292,3.110084,13055,Marseille,13,13,24,35,False
582892,582892,2025-12-29 00:00:00+00:00,5.3801,43.300213,0.0,8.358,-1.892,1.147583,13055,Marseille,13,13,24,35,False
582913,582913,2025-12-30 00:00:00+00:00,5.3801,43.300213,0.0,2.108,-2.542,-0.731584,13055,Marseille,13,13,24,35,False


In [25]:
os.makedirs("output/", exist_ok=True)
df['Code INSEE'] = df['Code INSEE'].astype(str).str.zfill(5)
for code_insee in df['Code INSEE'].unique():
    dept = code_insee[:2]
    df_loc = df[df['Code INSEE'] == code_insee].copy()
    commune = df_loc['Commune'].iloc[0]
    print(f"Processing {commune} ({code_insee})")
    
    os.makedirs(f"../output/{dept}", exist_ok=True)
    #save the resampled to yearly data
    df_loc.index = pd.to_datetime(df_loc['date'])
    df_loc = df_loc.resample('YS').agg({
        'temperature_2m_mean': 'mean',
        'snowfall_sum': 'sum',
        'canicule': 'sum',
    })
    os.makedirs(f"output/Dept_{dept}", exist_ok=True)
    df_loc['year'] = df_loc.index.year
    make_plot(df_loc, 'temperature_2m_mean', commune, out_path=f"output/Dept_{dept}/{code_insee}_temperature_2m_mean.png")
    make_plot(df_loc, 'snowfall_sum', commune, out_path=f"output/Dept_{dept}/{code_insee}_snowfall_sum.png")
    make_plot(df_loc, 'canicule', commune, out_path=f"output/Dept_{dept}/{code_insee}_canicule.png")
    #save the yearly data to csv
    df_loc.to_csv(f"output/Dept_{dept}/{commune}_yearly_data.csv")
    

Processing Rennes (35238)
Processing Nantes (44109)
Processing Bordeaux (33063)
Processing Caen (14118)
Processing Poitiers (86194)
Processing Rouen (76540)
Processing Limoges (87085)
Processing Toulouse (31555)
Processing Orléans (45234)
Processing Amiens (80021)
Processing Lille (59350)
Processing Clermont-Ferrand (63113)
Processing Montpellier (34172)
Processing Châlons-en-Champagne (51108)
Processing Lyon (69123)
Processing Dijon (21231)
Processing Marseille (13055)
Processing Besançon (25056)
Processing Metz (57463)
Processing Strasbourg (67482)
Processing Ajaccio (2A004)


In [26]:
df_loc

Unnamed: 0_level_0,temperature_2m_mean,snowfall_sum,canicule,year
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1950-01-01 00:00:00+00:00,10.952290,17.430001,0,1950
1951-01-01 00:00:00+00:00,10.408637,2.100000,0,1951
1952-01-01 00:00:00+00:00,10.778400,36.260001,0,1952
1953-01-01 00:00:00+00:00,10.735703,16.590000,0,1953
1954-01-01 00:00:00+00:00,10.239504,9.520000,0,1954
...,...,...,...,...
2021-01-01 00:00:00+00:00,11.462175,5.880000,0,2021
2022-01-01 00:00:00+00:00,13.191742,1.400000,0,2022
2023-01-01 00:00:00+00:00,13.145143,5.040000,0,2023
2024-01-01 00:00:00+00:00,12.129145,6.440000,0,2024


In [27]:
df

Unnamed: 0,index,date,lon,lat,snowfall_sum,temperature_2m_max,temperature_2m_min,temperature_2m_mean,Code INSEE,Commune,dept,dep,smin,smax,canicule
0,0,1950-01-01 00:00:00+00:00,-1.6792,48.108595,0.0,1.7950,-4.9550,-1.136250,35238,Rennes,35,35,19,34,False
1,1,1950-01-01 00:00:00+00:00,-1.5515,47.216842,0.0,2.2405,-4.5095,-0.640750,44109,Nantes,44,44,20,34,False
2,2,1950-01-01 00:00:00+00:00,-0.5729,44.842679,0.0,1.0235,-4.6765,-1.776500,33063,Bordeaux,33,33,21,35,False
3,3,1950-01-01 00:00:00+00:00,-0.3565,49.181047,0.0,8.1055,5.4055,6.661751,14118,Caen,14,14,18,31,False
4,4,1950-01-01 00:00:00+00:00,0.3443,46.581776,0.0,2.7205,-4.6295,-0.314917,86194,Poitiers,86,86,19,35,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
582934,582934,2025-12-31 00:00:00+00:00,5.3801,43.300213,0.0,4.4580,-3.7920,-0.975333,13055,Marseille,13,13,24,35,False
582935,582935,2025-12-31 00:00:00+00:00,6.0250,47.238402,0.0,0.3645,-7.2855,-3.304250,25056,Besançon,25,25,19,33,False
582936,582936,2025-12-31 00:00:00+00:00,6.1755,49.118211,0.0,4.5940,-3.8060,-0.901833,57463,Metz,57,57,19,34,False
582937,582937,2025-12-31 00:00:00+00:00,7.7366,48.585217,0.0,2.2580,-4.8420,-0.173250,67482,Strasbourg,67,67,19,34,False
