### READ DATA AND CONVERT INTO CSV
1. setup the wdir to the directory where the grib2 data is collected
2. each directory of the wdir will be opened and the grib inside it processed
    a. each grib file will be converted in a grid, based on DWD weights
    b. 

In [None]:
wdir = "/home/daniele/documents/github/ftt01/phd/projects/hydrological_forecasting/"

In [None]:
# IMPORTs
import sys, os
import glob
import subprocess
import pandas as pd

# to link the lib in py scripts as well
os.chdir( wdir )
sys.path.insert( 0, os.path.join(os.path.abspath(os.getcwd()),'lib') )

from lib import *

In [None]:
# SETUP
data_path = "/media/windows/projects/hydrological_forecasting/machine_learning/data/forecast/icon-eu/"

# variable = 'TOT_PREC'
variable = 'T_2M'
lead_hours = 8
ensemble_number = 1

### READ THIS FROM FILE!!!!!
init_date = dt.datetime.strptime('20210614T06:00:00', '%Y%m%dT%H:%M:%S')

lat = ( 46.2, 47.2 )
lon = ( 10.2, 12.5 )

In [None]:
dir_to_save = data_path + 'tmp/'

print(dir_to_save)

mkNestedDir(dir_to_save)
os.chdir(dir_to_save)


In [None]:
os.getcwd()

In [None]:

# download the data
subprocess.run('''docker run --rm --volume $(pwd):/local \
    deutscherwetterdienst/downloader downloader \
    --model icon-eu \
    --single-level-fields t_2m,tot_prec \
    --max-time-step {lead_hours} \
    --directory /local'''.format(lead_hours=lead_hours),
                shell=True, check=True,
                executable='/bin/bash')

In [None]:
dirs = glob.glob( data_path + '*/' )

In [None]:
dirs

In [None]:
for el in dirs:

    os.chdir(el)

    lead_time_array = []
    for n in range(1, lead_hours+1):
        n = str(n).zfill(3)
        lead_time_array.append(n)

    full_data = pd.DataFrame(columns=['ID', 'lat', 'lon'] + lead_time_array)

    for n in range(1, lead_hours+1):

        n = str(n).zfill(3)
        print(n)

        # create and move into the current_file_path
        current_file_path = el + n + '/'
        mkNestedDir(current_file_path)
        os.chdir(current_file_path)

        # identify and move current_file into current_file_path
        print( el + '*' + variable + '*.grib2' )
        current_file = glob.glob(el + '*' + variable + '*.grib2')[0]
        mv_process = "mv {} {}".format(
            current_file, current_file_path + os.path.basename(current_file))
        subprocess.run(mv_process, shell=True,
                       check=True, executable='/bin/bash')

        # extract data to output.csv
        extraction_process = '''docker run --rm --volume $(pwd):/local \
            deutscherwetterdienst/python-eccodes grib_get_data -p date,time,stepRange,shortName {} > output.csv'''
        extraction_process = extraction_process.format(
            os.path.basename(current_file))
        subprocess.run(extraction_process, shell=True,
                       check=True, executable='/bin/bash')

        # read exported data and cut to the ROI
        data_df = pd.read_csv('output.csv', sep='\s+', skiprows=0, header=None,
                              names=['lat', 'lon', 'values', 'date', 'time', 'step_range', 'name'], low_memory=False)
        data_df = data_df[data_df["lat"] != 'Latitude,']
        data_df = data_df.astype({'lat': float, 'lon': float, 'values': float,
                                 'date': str, 'time': str, 'step_range': str, 'name': str})

        data_df = data_df[data_df['lat'] >= lat[0]]
        data_df = data_df[data_df['lat'] <= lat[1]]
        data_df = data_df[data_df['lon'] >= lon[0]]
        data_df = data_df[data_df['lon'] <= lon[1]]

        # export data to a new structure
        interruptor = int(len(data_df) / ensemble_number)
        # print(interruptor)

        full_data['ID'] = range(1, interruptor+1)
        full_data.set_index('ID', inplace=True)

        metadata = True
        for ens in range(ensemble_number):

            m = str(ens+1).zfill(3)
            ens_file_path = current_file_path + m + '/'
            mkNestedDir(ens_file_path)

            current_data = pd.DataFrame(columns=['lat','lon','values'])
            lats = []
            lons = []
            vals = []
            for i in range(interruptor*ens, interruptor*(1+ens)):

                point_id = i + 1 - interruptor*ens

                lats.append( data_df.iloc[i]['lat'] )
                lons.append( data_df.iloc[i]['lon'] )
                vals.append( data_df.iloc[i]['values'] )
                
                
                ## full_data creation
                if metadata == True:
                    full_data.loc[point_id]['lat'] = data_df.iloc[i]['lat']
                    full_data.loc[point_id]['lon'] = data_df.iloc[i]['lon']

                if ens == 0:
                    full_data.loc[point_id][n] = [data_df.iloc[i]['values']]
                else:
                    full_data.loc[point_id][n] = full_data.loc[point_id][n] + \
                        [data_df.iloc[i]['values']]

                # print(data_df.iloc[i]['values'])

            current_data['lat'] = lats
            current_data['lon'] = lons
            current_data['values'] = vals

            current_data.to_csv( ens_file_path + 'output.csv' )

            print( ens_file_path )

            metadata = False


In [None]:
full_data.loc[128]['008'][0] - 273.15

In [None]:
tmp = full_data[ full_data['lat'] >= 46.75 ]
tmp = tmp[ full_data['lat'] <= 46.85 ]
tmp = tmp[ full_data['lon'] >= 11.1 ]
tmp = tmp[ full_data['lon'] <= 11.35 ]

In [None]:
tmp

In [None]:
series = []
dates = []

for n in range(1, lead_hours+1):

    dates.append( init_date + dt.timedelta(hours=n) )
    
    series.append( tmp.iloc[0][0] - 273.15 )

    # n = str(n).zfill(3)
    # if variable == 't_2m':
    #     series.append( tmp.iloc[0][0] - 273.15 )
    # else:
    #     series.append( tmp.iloc[0][0] )

In [None]:
test = pd.DataFrame( series, index=dates)

In [None]:
test.plot()

In [None]:
# !docker run --rm --volume $(pwd):/local \
#     deutscherwetterdienst/downloader downloader \
#     --model icon \
#     --single-level-fields t_2m,tot_prec \
#     --max-time-step 5 \
#     --directory /local

In [None]:
# docker run --rm \
#     --volume $(pwd):/local \
#     --env INPUT_FILE=/local \
#     --env OUTPUT_FILE=/local \
#     deutscherwetterdienst/regrid:icon-eu-eps \
#     /convert.sh

In [None]:
os.chdir("/media/windows/projects/hydrological_forecasting/machine_learning/data/forecast/icon-eu/tmp/001/")
os.getcwd()

In [None]:
!docker run --rm --mount type=bind,source="$(pwd)"/,target=/local deutscherwetterdienst/python-eccodes grib_ls icon-eu_europe_regular-lat-lon_single-level_2021102703_000_TOT_PREC.grib2