In [None]:
import glob
import xarray as xr
import datetime as dt
import argparse

In [None]:
def mkNestedDir(dirTree):
    from pathlib import Path
    Path(dirTree).mkdir(parents=True, exist_ok=True)

In [None]:
script = True

input_path = "/home/hydrological_forecasting/data/"
output_path = "/home/hydrological_forecasting/data/"

ensemble_number = 20

mkNestedDir(output_path)

In [None]:
parser = argparse.ArgumentParser()

parser.add_argument('bbox_lat_min', type=str)
parser.add_argument('bbox_lat_max', type=str)
parser.add_argument('bbox_lon_min', type=str)
parser.add_argument('bbox_lon_max', type=str)
parser.add_argument('basin', type=str)
# parser.add_argument('input_path', type=str)
# parser.add_argument('output_path', type=str)

args = parser.parse_args()
bbox_lat_min = float(args.bbox_lat_min)
bbox_lat_max = float(args.bbox_lat_max)
bbox_lon_min = float(args.bbox_lon_min)
bbox_lon_max = float(args.bbox_lon_max)
# input_path = float(args.input_path)
# output_path = float(args.output_path)

basin = args.basin

In [None]:
lat_min,lat_max,lon_min,lon_max = bbox_extraction( 
    bbox_lat_min=bbox_lat_min,
    bbox_lat_max=bbox_lat_max,
    bbox_lon_min=bbox_lon_min,
    bbox_lon_max=bbox_lon_max,
    spatial_resolution=0.1,
    lon_factor=360)

In [None]:
files_to_convert = glob.glob( input_path + 'regridded*.grib2' ) + glob.glob( input_path + '*regular*.grib2' )

In [None]:
print(input_path)

In [None]:
for file in files_to_convert:

    print("Converting: " + file)
    
    ds = xr.open_dataset(file, engine="pynio")
    # <xarray.Dataset>
    # Dimensions:           (ensemble0: 20, lat_0: 745, lon_0: 1214)
    # Coordinates:
    # * lat_0             (lat_0) float32 43.18 43.2 43.22 ... 58.02 58.04 58.06
    # * lon_0             (lon_0) float32 356.1 356.1 356.1 ... 380.3 380.3 380.3
    # * ensemble0         (ensemble0) int32 0 1 2 3 4 5 6 7 ... 13 14 15 16 17 18 19
    # Data variables:
    #     TMP_P1_L103_GLL0  (ensemble0, lat_0, lon_0) float32 ...
    #     ensemble0_info    (ensemble0) |S0 ...

    variable_mapped_meta = ds.variables.get(list(ds.keys())[0]).attrs
    # {
	# 'center': 'Offenbach (RSMC)',
	# 'production_status': 'Operational products',
	# 'long_name': 'Temperature',
	# 'units': 'K',
	# 'grid_type': 'Latitude/longitude',
	# 'parameter_discipline_and_category': 'Meteorological products, Temperature',
	# 'parameter_template_discipline_category_number': array([1, 0, 0, 0], dtype = int32),
	# 'level_type': 'Specified height level above ground (m)',
	# 'level': array([2.], dtype = float32),
	# 'forecast_time': array([60], dtype = int32),
	# 'forecast_time_units': 'minutes',
	# 'initial_time': '02/17/2021 (00:00)'
    # }

    variable_mapped_name = list(ds.keys())[0]

    ds_red = ds.sel(lat_0=slice(lat_min, lat_max),lon_0=slice(lon_min,lon_max))
    del [ds]

    df = ds_red.to_dataframe()
    del [ds_red]
    
    df.reset_index(inplace=True)
    df['lon'] = [round(idx-360,6) for idx in df['lon_0']]
    df['lat'] = [round(idx,6) for idx in df['lat_0']]
    if variable_mapped_meta['long_name'] == 'Temperature':
        df['values'] = [round(val-273.15,2) for val in df[variable_mapped_name]]
        variable = 'temperature'
        ens_keyname = 'ensemble0'
    elif variable_mapped_meta['long_name'] == 'Total precipitation rate':
        df['values'] = [round(val,2) for val in df[variable_mapped_name]]
        variable = 'precipitation'
        ens_keyname = 'ensemble0'
    elif variable_mapped_meta['long_name'] == 'Snow depth water equivalent':
        df['values'] = [round(val,2) for val in df[variable_mapped_name]]
        variable = 'snow'
        ens_keyname = 'ensemble0'
    else:
        raise KeyError
    
    for ens in range(ensemble_number):
        current_ens = str(ens+1).zfill(3)

        print(current_ens)
        print("Dataframe keys: \n")
        print(df.keys())

        print("Dataframe: \n")
        print(df)

        current_ens_data = df[df[ens_keyname]==ens]

        current_ens_data.insert(0, 'id', range(1, 1 + len(current_ens_data)))
        current_ens_data.set_index(['id','lat','lon'], inplace=True)
        current_ens_data = current_ens_data.loc[:, ['values']]

        current_datetime = dt.datetime.strptime(variable_mapped_meta['initial_time'], '%m/%d/%Y (%H:%M)')
        output_file_path = output_path + "{lead_hour}/{basin}/{variable}/{ensemble}/".format(
            date = dt.datetime.strftime( current_datetime, format='%Y%m%d' ),
            lead_hour = str( int(variable_mapped_meta['forecast_time']/60) ).zfill(3),
            basin = basin,
            variable = variable,
            ensemble = current_ens
        )
        mkNestedDir(output_file_path)

        ## to save on DB
        current_ens_data.to_csv(output_file_path + 'output.csv')
    
    del [df]