In [19]:
import pandas as pd
import requests
import os
import xarray as xr
import numpy as np
import matplotlib.pyplot as plt

In [2]:
df = pd.read_csv('data/H_79_latest-2023-2024.csv', sep=';', usecols=['LAT', 'LON', 'AAAAMMJJHH', 'RR1', 'T'], dtype={'LAT':'float64', 'LON':'float64', 'AAAAMMJJHH':'int32', 'RR1':'float32', 'T':'float32'})
df['AAAAMMJJHH'] = pd.to_datetime(df['AAAAMMJJHH'], format='%Y%m%d%H')
datemax = pd.Timestamp.now().date() - pd.Timedelta(14, "d")
df = df[df['AAAAMMJJHH'].dt.date >= datemax].reset_index(drop=True)

In [3]:
def download_grib_025(date): #date format : 2024-08-26T06:00:00Z
    prevision_list = ['00H06H','07H12H','13H18H','19H24H','25H30H','31H36H','37H42H','43H48H']
    for i in prevision_list:
        url = f"https://object.data.gouv.fr/meteofrance-pnt/pnt/{date}/arome/0025/SP1/arome__0025__SP1__{i}__{date}.grib2"
        try:
            r = requests.get(url)
            if not os.path.isdir(f"C:/Users/alexl/Documents/GitHub/Meteo/AromeAccuracy/data/arome/{date.replace(':', '-')}"):
                os.mkdir(f"C:/Users/alexl/Documents/GitHub/Meteo/AromeAccuracy/data/arome/{date.replace(':', '-')}")
            with open(f"C:/Users/alexl/Documents/GitHub/Meteo/AromeAccuracy/data/arome/{date.replace(':', '-')}/arome__0025__SP1__{i}__{date.replace(':', '-')}.grib2", "wb") as f:
                f.write(r.content)
        except Exception as e:
                print(f"An error occurred: {e}")

In [4]:
for date in df['AAAAMMJJHH'].dt.date.unique():
    download_grib_025(str(date)+"T00:00:00Z")

In [6]:
def grib_to_dataframe(date, base_path):
    directory = f"{base_path}/{date.replace(':', '-')}"
    file_list = [os.path.join(directory, file.replace(':', '-')) for file in os.listdir(directory)]

    backend_kwargs = [
        {'filter_by_keys': {'paramId': 167, 'level': 2}}, #temperature
        {'filter_by_keys': {'paramId': 228228}} #precipitations
    ]

    datasets = []
    for file in file_list:
        for bk in backend_kwargs:
            ds = xr.open_dataset(file, engine='cfgrib', backend_kwargs=bk)
            datasets.append(ds)

    ds = xr.merge(datasets)
    return ds

In [10]:
base_path = "C:/Users/alexl/Documents/GitHub/Meteo/AromeAccuracy/data/arome"
date = str(df['AAAAMMJJHH'].dt.date.unique()[0]) + "T00:00:00Z"

dfArome = grib_to_dataframe(date, base_path)

In [12]:
#Make a custom dataframe since I cant make xarray.Dataset.to_dataframe() keep the step and timedelta into the dataframe
step = dfArome['step'].values #values of each step (48 times one hour)
valid_date = np.datetime64(dfArome['valid_time'].values[0]) #first date value as np datetime to work with np timedelta
lst = []
for i in range(49): #number of steps
    lat_grid, lon_grid = np.meshgrid(dfArome['latitude'].values.round(3), dfArome['longitude'].values.round(3), indexing='ij')
    # flatten array
    flat_temp = dfArome['t2m'][i].values.flatten()
    flat_precip = dfArome['tp'][i].values.flatten()
    flat_lat = lat_grid.flatten()
    flat_lon = lon_grid.flatten()
    
    # create dataframe
    tmp = pd.DataFrame({
        'Latitude': flat_lat,
        'Longitude': flat_lon,
        't2m': flat_temp,
        'tp': flat_precip, 
        'DateTime': valid_date + step[i]
    })
    lst.append(tmp)
data = pd.concat(lst).reset_index(drop=True)

In [13]:
#find the nearest Arome LAT, LON from each station
lat_lon_unique = [df['LAT'].unique(), df['LON'].unique()]
nearest_lat_lon = []
for lat, lon in zip(lat_lon_unique[0], lat_lon_unique[1]):    
    nearest_lat_idx = (data['Latitude'] - lat).abs().idxmin()
    nearest_lon_idx = (data['Longitude'] - lon).abs().idxmin()
    nearest_lat_lon.append([lat, lon, data.iloc[nearest_lat_idx]['Latitude'], data.iloc[nearest_lon_idx]['Longitude']])
nearest_lat_lon_df = pd.DataFrame(nearest_lat_lon, columns=['LAT', 'LON', 'Latitude', 'Longitude'])

In [14]:
df = pd.merge(df, nearest_lat_lon_df, on=['LAT', 'LON'], how='inner')
df = df.rename(columns={'AAAAMMJJHH':'DateTime'})
end = pd.merge(df, data, on=['Latitude', 'Longitude', 'DateTime'], how='inner')
end['t2m'] = end['t2m'] - 273.15

In [20]:
end

Unnamed: 0,LAT,LON,DateTime,RR1,T,Latitude,Longitude,t2m,tp
0,46.941167,-0.584167,2024-10-02 00:00:00,0.0,7.0,46.95,-0.575,7.623260,
1,46.941167,-0.584167,2024-10-02 01:00:00,0.2,6.5,46.95,-0.575,8.573120,0.000000
2,46.941167,-0.584167,2024-10-02 02:00:00,0.0,6.3,46.95,-0.575,8.008331,0.000000
3,46.941167,-0.584167,2024-10-02 03:00:00,0.0,7.7,46.95,-0.575,7.205139,0.000000
4,46.941167,-0.584167,2024-10-02 04:00:00,0.0,8.1,46.95,-0.575,7.023804,0.000000
...,...,...,...,...,...,...,...,...,...
828,46.989167,-0.207000,2024-10-03 20:00:00,0.0,11.8,47.00,-0.200,11.915375,0.070312
829,46.989167,-0.207000,2024-10-03 21:00:00,0.0,9.7,47.00,-0.200,11.559174,0.070312
830,46.989167,-0.207000,2024-10-03 22:00:00,0.0,7.5,47.00,-0.200,10.671600,0.070312
831,46.989167,-0.207000,2024-10-03 23:00:00,0.0,6.8,47.00,-0.200,10.079651,0.062500


In [39]:
rmse = np.sqrt(((end['t2m'] - end['T']) ** 2).mean())
rmse

1.1982584

In [38]:
bias = (end['t2m'] - end['T']).mean()
bias

-0.04287086

In [41]:
#Conclusion : 
#For a very (too) short period of time Arome model for Deux-Sèvres department have a mean gap of +/- 1.20°C (RMSE) and a very
#small underpredictive bias (-0.04) which makes it very good at predicting local evenements
#Now, we need more data to see how the model performs overall