In [1]:
import pandas as pd
import requests
import os
import xarray as xr
import numpy as np

In [13]:
df = pd.read_csv('data/H_79_latest-2023-2024.csv', sep=';', usecols=['LAT', 'LON', 'AAAAMMJJHH', 'RR1', 'T'], dtype={'LAT':'float64', 'LON':'float64', 'AAAAMMJJHH':'int32', 'RR1':'float32', 'T':'float32'})

In [14]:
df['AAAAMMJJHH'] = pd.to_datetime(df['AAAAMMJJHH'], format='%Y%m%d%H')

In [15]:
datemax = pd.Timestamp.now().date() - pd.Timedelta(14, "d")
datemax

datetime.date(2024, 10, 1)

In [16]:
df = df[df['AAAAMMJJHH'].dt.date >= datemax].reset_index(drop=True)

In [17]:
df

Unnamed: 0,LAT,LON,AAAAMMJJHH,RR1,T
0,46.941167,-0.584167,2024-10-01 00:00:00,0.0,13.5
1,46.941167,-0.584167,2024-10-01 01:00:00,0.6,13.4
2,46.941167,-0.584167,2024-10-01 02:00:00,0.2,13.3
3,46.941167,-0.584167,2024-10-01 03:00:00,0.4,13.1
4,46.941167,-0.584167,2024-10-01 04:00:00,0.6,12.5
...,...,...,...,...,...
5367,46.989167,-0.207000,2024-10-13 23:00:00,0.0,13.2
5368,46.989167,-0.207000,2024-10-14 00:00:00,0.2,14.0
5369,46.989167,-0.207000,2024-10-14 01:00:00,0.0,14.2
5370,46.989167,-0.207000,2024-10-14 02:00:00,0.0,14.2


In [7]:
def download_grib_025(date): #date format : 2024-08-26T06:00:00Z
    prevision_list = ['00H06H','07H12H','13H18H','19H24H','25H30H','31H36H','37H42H','43H48H']
    for i in prevision_list:
        url = f"https://object.data.gouv.fr/meteofrance-pnt/pnt/{date}/arome/0025/SP1/arome__0025__SP1__{i}__{date}.grib2"
        try:
            r = requests.get(url)
            if not os.path.isdir(f"C:/Users/alexl/Documents/GitHub/Meteo/AromeAccuracy/data/arome/{date.replace(':', '-')}"):
                os.mkdir(f"C:/Users/alexl/Documents/GitHub/Meteo/AromeAccuracy/data/arome/{date.replace(':', '-')}")
            with open(f"C:/Users/alexl/Documents/GitHub/Meteo/AromeAccuracy/data/arome/{date.replace(':', '-')}/arome__0025__SP1__{i}__{date.replace(':', '-')}.grib2", "wb") as f:
                f.write(r.content)
        except Exception as e:
                print(f"An error occurred: {e}")

In [18]:
for date in df['AAAAMMJJHH'].dt.date.unique():
    download_grib_025(str(date)+"T00:00:00Z")

KeyboardInterrupt: 

In [8]:
def grib_to_dataframe(date, base_path):
    directory = f"{base_path}/{date.replace(':', '-')}"
    file_list = [os.path.join(directory, file.replace(':', '-')) for file in os.listdir(directory)]

    backend_kwargs = [
        {'filter_by_keys': {'paramId': 167, 'level': 2}},
        {'filter_by_keys': {'paramId': 228228}}
    ]

    datasets = []
    for file in file_list:
        for bk in backend_kwargs:
            ds = xr.open_dataset(file, engine='cfgrib', backend_kwargs=bk)
            datasets.append(ds)

    ds = xr.merge(datasets)
    return ds

In [9]:
base_path = "C:/Users/alexl/Documents/GitHub/Meteo/AromeAccuracy/data/arome"
date = str(df['AAAAMMJJHH'].dt.date.unique()[0]) + "T00:00:00Z"

In [18]:
dfArome = grib_to_dataframe(date, base_path)

Can't create file 'C:/Users/alexl/Documents/GitHub/Meteo/AromeAccuracy/data/arome/2024-10-01T00-00-00Z\\arome__0025__SP1__00H06H__2024-10-01T00-00-00Z.grib2.5b7b6.idx.da267.idx'
Traceback (most recent call last):
  File "C:\Users\alexl\AppData\Local\Programs\Python\Python310\lib\site-packages\cfgrib\messages.py", line 273, in itervalues
    yield self.filestream.message_from_file(file, errors=errors)
  File "C:\Users\alexl\AppData\Local\Programs\Python\Python310\lib\site-packages\cfgrib\messages.py", line 340, in message_from_file
    return Message.from_file(file, offset, **kwargs)
  File "C:\Users\alexl\AppData\Local\Programs\Python\Python310\lib\site-packages\cfgrib\messages.py", line 104, in from_file
    raise EOFError("End of file: %r" % file)
EOFError: End of file: <_io.BufferedReader name='C:/Users/alexl/Documents/GitHub/Meteo/AromeAccuracy/data/arome/2024-10-01T00-00-00Z\\arome__0025__SP1__00H06H__2024-10-01T00-00-00Z.grib2.5b7b6.idx'>

During handling of the above exception, 

EOFError: No valid message found: 'C:/Users/alexl/Documents/GitHub/Meteo/AromeAccuracy/data/arome/2024-10-01T00-00-00Z\\arome__0025__SP1__00H06H__2024-10-01T00-00-00Z.grib2.5b7b6.idx'

In [19]:
#Make a custom dataframe since I cant make xarray.Dataset.to_dataframe() keep the step and timedelta into the dataframe
step = dfArome['step'].values #values of each step (48 times one hour)
valid_date = np.datetime64(dfArome['valid_time'].values[0]) #first date value as np datetime to work with np timedelta
lst = []
for i in range(49): #number of steps
    lat_grid, lon_grid = np.meshgrid(dfArome['latitude'].values.round(3), dfArome['longitude'].values.round(3), indexing='ij')
    # flatten array
    flat_temp = dfArome['t2m'][i].values.flatten()
    flat_precip = dfArome['tp'][i].values.flatten()
    flat_lat = lat_grid.flatten()
    flat_lon = lon_grid.flatten()
    
    # create dataframe
    tmp = pd.DataFrame({
        'Latitude': flat_lat,
        'Longitude': flat_lon,
        't2m': flat_temp,
        'tp': flat_precip, 
        'DateTime': valid_date + step[i]
    })
    lst.append(tmp)
data = pd.concat(lst).reset_index(drop=True)

In [20]:
lat_lon_unique = [df['LAT'].unique(), df['LON'].unique()]
nearest_lat_lon = []
for lat, lon in zip(lat_lon_unique[0], lat_lon_unique[1]):    
    nearest_lat_idx = (data['Latitude'] - lat).abs().idxmin()
    nearest_lon_idx = (data['Longitude'] - lon).abs().idxmin()
    nearest_lat_lon.append([lat, lon, data.iloc[nearest_lat_idx]['Latitude'], data.iloc[nearest_lon_idx]['Longitude']])
nearest_lat_lon_df = pd.DataFrame(nearest_lat_lon, columns=['LAT', 'LON', 'Latitude', 'Longitude'])

In [21]:
df = pd.merge(df, nearest_lat_lon_df, on=['LAT', 'LON'], how='inner')

In [22]:
df = df.rename(columns={'AAAAMMJJHH':'DateTime'})

In [23]:
end = pd.merge(df, data, on=['Latitude', 'Longitude', 'DateTime'], how='inner')

In [26]:
end['t2m'] = end['t2m'] - 273.15

In [27]:
end

Unnamed: 0,LAT,LON,DateTime,RR1,T,Latitude,Longitude,t2m,tp
0,46.941167,-0.584167,2024-10-01 00:00:00,0.0,13.5,46.95,-0.575,13.143036,
1,46.941167,-0.584167,2024-10-01 01:00:00,0.6,13.4,46.95,-0.575,12.773590,0.386475
2,46.941167,-0.584167,2024-10-01 02:00:00,0.2,13.3,46.95,-0.575,12.502930,0.442627
3,46.941167,-0.584167,2024-10-01 03:00:00,0.4,13.1,46.95,-0.575,12.652252,0.740723
4,46.941167,-0.584167,2024-10-01 04:00:00,0.6,12.5,46.95,-0.575,12.603149,2.315430
...,...,...,...,...,...,...,...,...,...
828,46.989167,-0.207000,2024-10-02 20:00:00,0.0,12.8,47.00,-0.200,13.946472,24.285156
829,46.989167,-0.207000,2024-10-02 21:00:00,0.0,11.8,47.00,-0.200,13.466675,24.351562
830,46.989167,-0.207000,2024-10-02 22:00:00,0.0,10.9,47.00,-0.200,13.019073,24.351562
831,46.989167,-0.207000,2024-10-02 23:00:00,0.0,11.5,47.00,-0.200,12.109070,24.351562
