### READ DATA AND CONVERT INTO CSV
1. setup the wdir to the directory where the grib2 data is collected
2. each directory of the wdir will be opened and the grib inside it processed
    a. each grib file will be converted in a grid, based on DWD weights
    b. 

In [None]:
wdir = "/home/daniele/documents/github/ftt01/phd/projects/hydrological_forecasting/"

In [None]:
# IMPORTs
import sys, os
import glob
import subprocess
import pandas as pd
from dask import dataframe as dd

# to link the lib in py scripts as well
os.chdir( wdir )
sys.path.insert( 0, os.path.join(os.path.abspath(os.getcwd()),'lib') )

from lib import *

In [None]:
# SETUP
data_path = "/media/windows/projects/hydrological_forecasting/machine_learning/data/forecast/icon-d2-eps/"

# variable = 'tot_prec'
variable = 't_2m'
lead_hours = 2
ensemble_number = 20

current_hour = dt.datetime.now().hour

if current_hour >= 3 and current_hour < 9:
    h = '00'
elif current_hour >= 9 and current_hour < 15:
    h = '06'
elif current_hour >= 15 and current_hour < 21:
    h = '12'
elif current_hour >= 21:
    h = '18'
else:
    h = '18'

init_time = dt.datetime.strftime( dt.datetime.now(), format='%Y-%m-%d' ) + 'T' + h + ':00:00'

## Passirio basin
lat = ( 46.68, 46.945 )
lon = ( 11.015, 11.38 )

In [None]:
init_time

In [None]:
dir_to_save = data_path + 'tmp/'

mkNestedDir(dir_to_save)
os.chdir(dir_to_save)

download_process = '''docker run --rm --volume $(pwd):/local \
    deutscherwetterdienst/downloader downloader \
    --model icon-d2-eps \
    --single-level-fields t_2m \
    --max-time-step {lead_hours} \
    --timestamp {init_time} \
    --directory /local'''.format(lead_hours=lead_hours, init_time=init_time)

print(download_process)

# download the data
subprocess.run(download_process,
               shell=True, check=True,
               executable='/bin/bash')


In [None]:
dirs = glob.glob( data_path + '*/' )

In [None]:
dirs

In [None]:
for el in dirs:

    os.chdir(el)

    # regrid of the GRIBs
    subprocess.run('''docker run --rm \
        --volume $(pwd):/local \
        --env INPUT_FILE=/local \
        --env OUTPUT_FILE=/local \
        deutscherwetterdienst/regrid:icon-d2-eps \
        /convert.sh''',
                   shell=True, check=True,
                   executable='/bin/bash')

    for n in range(1, lead_hours+1):

        n = str(n).zfill(3)

        # create and move into the current_file_path
        current_file_path = el + n + '/'
        mkNestedDir(current_file_path)
        os.chdir(current_file_path)

        # identify and move current_file into current_file_path
        current_file = glob.glob(
            el + '*regridded*' + n + '*' + variable + '*.grib2')[0]

        # print(current_file)

        mv_process = "mv {} {}".format(
            current_file, current_file_path + os.path.basename(current_file))
        subprocess.run(mv_process, shell=True,
                       check=True, executable='/bin/bash')

        # extract data to output.csv
        extraction_process = '''docker run --rm --volume $(pwd):/local \
            deutscherwetterdienst/python-eccodes grib_get_data -p date,time,stepRange,shortName {} > output.csv'''
        extraction_process = extraction_process.format(
            os.path.basename(current_file))
        subprocess.run(extraction_process, shell=True,
                       check=True, executable='/bin/bash')


In [None]:
lead_time_array = []
for n in range(1, lead_hours+1):
    n = str(n).zfill(3)
    lead_time_array.append(n)

full_data = pd.DataFrame(columns=['ID', 'lat', 'lon'] + lead_time_array)

for el in dirs:

    os.chdir(el)

    for n in range(1, lead_hours+1):

        n = str(n).zfill(3)

        # create and move into the current_file_path
        current_file_path = el + n + '/'

        # read exported data and cut to the ROI
        data_df = dd.read_csv(current_file_path + 'output.csv', sep='\s+', header=None, skiprows=1,
                              names=['lat', 'lon', 'values', 'date', 'time', 'step_range', 'name'], comment="L")
        
        data_df = data_df.astype({'lat': float, 'lon': float, 'values': float,
                                  'date': str, 'time': str, 'step_range': str, 'name': str})

        data_df = data_df[data_df['lat'] >= lat[0]]
        data_df = data_df[data_df['lat'] <= lat[1]]
        data_df = data_df[data_df['lon'] >= lon[0]]
        data_df = data_df[data_df['lon'] <= lon[1]]

        data_df = data_df.compute()

        # export data to a new structure
        interruptor = int(len(data_df) / ensemble_number)
        # print(interruptor)

        full_data['ID'] = range(1, interruptor+1)
        full_data.set_index('ID', inplace=True)

        metadata = True
        for ens in range(ensemble_number):

            m = str(ens+1).zfill(3)
            ens_file_path = current_file_path + m + '/'
            mkNestedDir(ens_file_path)
    #
            # current_data = pd.DataFrame(columns=['ID', 'lat', 'lon', 'values'])
            # ids = []
            # lats = []
            # lons = []
            # vals = []
    #
            for i in range(interruptor*ens, interruptor*(1+ens)):

                point_id = i + 1 - interruptor*ens
    #           
                # ids.append( point_id )
                # lats.append(data_df.iloc[i]['lat'])
                # lons.append(data_df.iloc[i]['lon'])
                # vals.append(data_df.iloc[i]['values'])
    #
                if metadata == True:
                    full_data.loc[point_id]['lat'] = data_df.iloc[i]['lat']
                    full_data.loc[point_id]['lon'] = data_df.iloc[i]['lon']

                if ens == 0:
                    full_data.loc[point_id][n] = [data_df.iloc[i]['values']]
                else:
                    full_data.loc[point_id][n] = full_data.loc[point_id][n] + \
                        [data_df.iloc[i]['values']]

                # print(data_df.iloc[i]['values'])
    #
            # current_data['ID'] = ids
            # current_data['lat'] = lats
            # current_data['lon'] = lons
            # current_data['values'] = vals

            # current_data.to_csv(ens_file_path + 'output.csv')

            metadata = False


In [None]:
full_data

In [None]:
dt.datetime.strptime( init_time, '%Y-%m-%dT%H:%M:%S' )

In [None]:
series = []
dates = []

for n in range(1, lead_hours+1):
    
    dates.append( dt.datetime.strptime( init_time, '%Y-%m-%dT%H:%M:%S' ) + dt.timedelta(hours=n) )

    values = []
    for el in full_data[str(n).zfill(3)]:
        values = values + el

    series.append( np.mean( values ) - 273.15 )

test = pd.DataFrame( series, index=dates, columns=['values'] )
test.plot()

In [None]:
# test.to_csv( 'output_ariele.csv', sep=';')

In [None]:
# !docker run --rm --volume $(pwd):/local \
#     deutscherwetterdienst/downloader downloader \
#     --model icon \
#     --single-level-fields t_2m,tot_prec \
#     --max-time-step 5 \
#     --directory /local

In [None]:
# docker run --rm \
#     --volume $(pwd):/local \
#     --env INPUT_FILE=/local \
#     --env OUTPUT_FILE=/local \
#     deutscherwetterdienst/regrid:icon-eu-eps \
#     /convert.sh

In [None]:
# os.chdir("/media/windows/projects/hydrological_forecasting/machine_learning/data/forecast/icon-d2-eps/tmp/")
# os.getcwd()

In [None]:
# !docker run --rm --mount type=bind,source="$(pwd)"/,target=/local deutscherwetterdienst/python-eccodes grib_ls icon-d2-eps_germany_icosahedral_single-level_2021102700_000_2d_t_2m.grib2