In [9]:
import pandas as pd
import pygrib
import os
import numpy as np
import cfgrib 
import xarray as xr
import datetime
from datetime import datetime
from scipy.spatial import KDTree

In [10]:
station_file='STATION.csv'
station_df = pd.read_csv(station_file)

In [11]:
def read_grib_file(file_path):
	grbs = pygrib.open(file_path)
	for grb in grbs:
		print(grb)
	grbs.close()
	
	return grbs

In [12]:
def build_kdtree(df):
    """
    Build a KDTree from the gridded dataset.
    
    Args:
        df (pd.DataFrame): DataFrame containing the grib data with lat and lon as the index.

    Returns:
        KDTree: A KDTree built on lat/lon coordinates.
        np.array: Grid points as an array of lat/lon pairs.
    """
    # Ensure lat/lon values are floats and create an array of lat/lon pairs
    grid_points = np.array(list(zip(df.index.get_level_values('latitude').astype(float), df.index.get_level_values('longitude').astype(float))))
    
    # Build and return the KDTree
    return KDTree(grid_points), grid_points

In [None]:
for year in range(2023,2025):
    input_dir = f'{year}'
    output_dir = f'station_by_datetime_csv/{year}'
    print(year)
    # Check if file already exists
    existing_files = set()
    for filename in os.listdir(output_dir):
        if filename.endswith('.grib'):
            # Extract the date and time from the filename (assuming format: era5_land_YYYYMMDD_HHMM.grib)
            date_part = filename.split('_')[0]  # Extract YYYYMMDD part
            time_part = filename.split('_')[1].replace('.grib', '')  # Extract HHMM part
            existing_files.add(f"{date_part}_{time_part}")
    
    for file in os.listdir(input_dir):
        if file.endswith('.grib'):
            # print(file)
            grbs = read_grib_file(os.path.join(input_dir, file))
            
            date = file.split('_')[2]  # Extract YYYYMMDD part
            time = file.split('_')[3].replace('.grib', '')  # Extract HHMM part
            date_time = date+'_'+time
            
            if date_time in existing_files:
                continue

            else:
                
                with xr.open_dataset(os.path.join(input_dir, file)) as ds:
                    df_raw = ds.to_dataframe()
                    df = df_raw.drop(columns=['number', 'time', 'step', 'surface', 'valid_time'])
                    # get date and time
                    date = file.split('_')[2]
                    time = file.split('_')[3].split('.')[0]
                    
                    df['date'] = date
                    df['time'] = time
                    # build KDTree
                    tree, grid_points = build_kdtree(df)
        
                    station_coords = station_df[['Lat', 'Lon']].to_numpy()
        
                    _, nearest_grid_idx = tree.query(station_coords)
        
                    nearest_grid_points = grid_points[nearest_grid_idx]
                    
                    nearest_grid_points = [(round(lat, 3), round(lon, 3)) for lat, lon in nearest_grid_points]
        
                    df.index = pd.MultiIndex.from_tuples([(round(lat, 3), round(lon, 3)) for lat, lon in df.index], names=['lat', 'lon'])
        
                    nearest_grid_values = df.loc[nearest_grid_points].reset_index()
        
                    station_df_final = pd.concat([station_df.reset_index(drop=True), nearest_grid_values[['t2m', 'u10', 'v10','tp','date','time']]], axis=1)
        
                    station_df_final.to_csv(f'{output_dir}/{date}_{time}_station.csv') 

2023
1:2 metre temperature:K (instant):regular_ll:surface:level 0:fcst time 22 hrs:from 202310240000
2:10 metre U wind component:m s**-1 (instant):regular_ll:surface:level 0:fcst time 22 hrs:from 202310240000
3:10 metre V wind component:m s**-1 (instant):regular_ll:surface:level 0:fcst time 22 hrs:from 202310240000
4:Total precipitation:m (accum):regular_ll:surface:level 0:fcst time 21-22 hrs (accum):from 202310240000
1:2 metre temperature:K (instant):regular_ll:surface:level 0:fcst time 18 hrs:from 202308260000
2:10 metre U wind component:m s**-1 (instant):regular_ll:surface:level 0:fcst time 18 hrs:from 202308260000
3:10 metre V wind component:m s**-1 (instant):regular_ll:surface:level 0:fcst time 18 hrs:from 202308260000
4:Total precipitation:m (accum):regular_ll:surface:level 0:fcst time 17-18 hrs (accum):from 202308260000
1:2 metre temperature:K (instant):regular_ll:surface:level 0:fcst time 24 hrs:from 202305090000
2:10 metre U wind component:m s**-1 (instant):regular_ll:surface: