In [None]:
import sys

lib_dir = "/home/daniele/documents/github/ftt01/phd/share/lib"
sys.path.insert( 0, lib_dir )

In [None]:
from lib import *

In [None]:
wdir = "/media/windows/projects/meteo_forecast_water_demand/"

output_path = wdir + "meteo/era5/postprocessed/"

variables = ['2m_temperature', 'surface_net_solar_radiation',
             'total_precipitation', 'relative_humidity', 'surface_solar_radiation_downwards']

# variables = ['total_precipitation']

files = ['adaptor.mars.internal-1649846628.7378805-12259-5-c3bd14ea-6275-49b2-ad93-ce719c52b729.grib',
         'adaptor.mars.internal-1649846659.2984114-2675-7-cb7f4150-4ef2-4b09-839c-45d52e5a702a.grib',
         'adaptor.mars.internal-1649847657.5886977-4143-18-fb0a7dff-6e38-485c-8cb0-c7861a500c77.grib',
         'adaptor.mars.internal-1649863332.5546749-19721-6-59931d26-79e4-456f-a979-db42e29c55d7.grib']

tolerance = 0.001

# meta = [{'id': 1, 'coordinates': (46.15, 11.0)},
#         {'id': 2, 'coordinates': (46.15, 11.1)},
#         {'id': 3, 'coordinates': (46.15, 11.2)},
#         {'id': 4, 'coordinates': (46.05, 11.0)},
#         {'id': 5, 'coordinates': (46.05, 11.1)},
#         {'id': 6, 'coordinates': (46.05, 11.2)},
#         {'id': 7, 'coordinates': (45.95, 11.0)},
#         {'id': 8, 'coordinates': (45.95, 11.1)},
#         {'id': 9, 'coordinates': (45.95, 11.2)}]

meta = [{'id': 10, 'coordinates': (46.05, 11.1)}]


In [None]:
def rel_humidity( t, td ):
    return round( 10**( 2 - (t - td)/31.25 ),2 )

In [None]:
from eccodes import *
import xarray as xr

In [None]:
for file in files:

    ds = xr.open_dataset(wdir + 'meteo/era5/original/' + file, engine='cfgrib')

    dd = ds.to_dask_dataframe()

    tmp = dd.compute()
    # tmp.dropna(inplace=True)

    for m in meta:
        current_id = m['id']
        local = tmp[tmp['latitude'] >= m['coordinates'][0] - tolerance]
        local = local[local['latitude'] <= m['coordinates'][0] + tolerance]
        local = local[local['longitude'] >= m['coordinates'][1] - tolerance]
        local = local[local['longitude'] <= m['coordinates'][1] + tolerance]
        # local.dropna(inplace=True)

        for variable in variables:
            if variable == '2m_temperature':
                var = 't2m'
                correction_factor = -273.15
                var_name = 'temperature'

                current_data = local[['valid_time', var]]
                current_data = current_data.rename(
                    columns={'valid_time': 'datetime', var: 'values'})
                current_data = current_data.set_index('datetime')

                current_data['values'] = [ round(float(c)+correction_factor,2) for c in current_data['values'] ]

            elif variable == 'total_precipitation':
                var = 'tp'
                var_name = 'precipitation'

                current_data = local[['valid_time', var]]
                current_data = current_data.rename(
                    columns={'valid_time': 'datetime', var: 'values'})
                current_data = current_data.set_index('datetime')

                # transform from cumulate to instant precipitation
                cum = current_data['values'].replace(np.nan,0)
                cum = [ round(c*1000,2) for c in cum ]
 
                i = 0
                inst = []
                while i < len(cum):
                    if i % 24 == 0:
                        inst.append( float(cum[i]) )
                    else:
                        inst.append( abs(float(cum[i] - cum[i-1])) )
                    i = i + 1
                
                current_data['values'] = [round(el,2) for el in inst]

                del [cum,inst]

            elif variable == 'surface_net_solar_radiation':
                var = 'ssr'
                var_name = 'surface_net_solar_radiation'
                upper_threshold = 1500.0

                current_data = local[['valid_time', var]]
                current_data = current_data.rename(
                    columns={'valid_time': 'datetime', var: 'values'})
                current_data = current_data.set_index('datetime')

                # transform from cumulate to instant radiation
                cum = current_data['values'].replace(np.nan,0)
                cum = [ c/1000 for c in cum ]
                i = 0
                inst = []
                while i < len(cum):
                    if i % 24 == 0:
                        t_data = float(cum[i])
                    else:
                        t_data = abs(float(cum[i] - cum[i-1]))
                    
                    if t_data > upper_threshold:
                        t_data = 0.0
                    inst.append( t_data )

                    i = i + 1

                current_data['values'] = [round(el,2) for el in inst]

                del [cum,inst]
            
            elif variable == 'surface_solar_radiation_downwards':
                var = 'ssrd'
                var_name = 'surface_solar_radiation_downwards'
                upper_threshold = 1500.0

                current_data = local[['valid_time', var]]
                current_data = current_data.rename(
                    columns={'valid_time': 'datetime', var: 'values'})
                current_data = current_data.set_index('datetime')

                # transform from cumulate to instant precipitation
                cum = current_data['values'].replace(np.nan,0)
                cum = [ c/1000 for c in cum ]
                i = 0
                inst = []
                while i < len(cum):
                    if i % 24 == 0:
                        t_data = float(cum[i])
                    else:
                        t_data = abs(float(cum[i] - cum[i-1]))
                    
                    if t_data > upper_threshold:
                        t_data = 0.0
                    inst.append( t_data )

                    i = i + 1

                current_data['values'] = [round(el,2) for el in inst]

                del [cum,inst]

            elif variable == 'relative_humidity':
                var_td = 'd2m'
                var_t = 't2m'
                var_name = 'relative_humidity'

                current_data_td = local[['valid_time', var_td]]
                current_data_td = current_data_td.rename(
                    columns={'valid_time': 'datetime', var_td: 'values'})
                current_data_td = current_data_td.set_index('datetime')

                current_data_t = local[['valid_time', var_t]]
                current_data_t = current_data_t.rename(
                    columns={'valid_time': 'datetime', var_t: 'values'})
                current_data_t = current_data_t.set_index('datetime')

                current_data = pd.DataFrame(index=current_data_t.index)
                current_data['values'] = [ rel_humidity(float(current_data_t.loc[idx]),float(current_data_td.loc[idx])) for idx in current_data.index ]

            else:
                continue

            current_output_path = output_path + var_name + '/'
            mkNestedDir(current_output_path)
            current_data.to_csv(current_output_path + '{id}_{start_date}_{end_date}.csv'.format(id=current_id,
            start_date=str(current_data.index[0]).replace('-', '').replace(':', '').replace(' ', ''),
            end_date=str(current_data.index[-1]).replace('-', '').replace(':', '').replace(' ', '')))

        del [current_data,local]
    del [ds,dd,tmp]

In [None]:
for m in meta:
    current_id = m['id']
    print(current_id)

    data = pd.DataFrame()

    output_dirs = glob.glob( output_path + '*/' )
    for var_dirs in output_dirs:
        var = var_dirs.split('/')[-2]
        print(var)
        tmp = glob.glob( var_dirs + '/{id}_*'.format(id=current_id))
        var_data = None
        for f in tmp:
            # print('Reading: ' + f)
            c_data = pd.read_csv( f, parse_dates=True, index_col=0 )
            c_data.rename(columns={'values':var})

            try:
                var_data = append_data(var_data, c_data)
            except:
                # print('here')
                var_data = c_data
                var_data.rename(columns={'values':var})

            del [c_data]
        
        data[var] = var_data
        del [var_data]
    
    data.to_csv( output_path + '{id}.csv'.format(id=current_id), sep=';' )
    del [data]

In [None]:
# data.describe()

In [None]:
# data_obs = pd.DataFrame()
# laste_precipitation = pd.read_csv(wdir + '/meteo/historical/precipitation.csv', header=None)
# laste_precipitation[laste_precipitation[0]== '00:00:00 01/01/2013']
# laste_precipitation[laste_precipitation[0]== '23:00:00 31/12/2020']

# data_laste = pd.DataFrame( laste_precipitation.loc[455811:1296542][1] )
# data_laste.index = pd.to_datetime(laste_precipitation.loc[455811:1296542][0],format='%H:%M:%S %d/%m/%Y')

# data_laste = data_laste.resample('H').sum()

# data_obs['precipitation'] = data_laste

In [None]:
# laste_relative_humidity = pd.read_csv(wdir + '/meteo/historical/relative_humidity.csv', header=None)
# laste_relative_humidity[laste_relative_humidity[0]== '00:00:00 01/01/2013']
# laste_relative_humidity[laste_relative_humidity[0]== '23:00:00 31/12/2020']

# data_laste = pd.DataFrame( laste_relative_humidity.loc[314889:593758][1] )
# data_laste.index = pd.to_datetime( laste_relative_humidity.loc[314889:593758][0],format='%H:%M:%S %d/%m/%Y' )

# data_laste = data_laste.resample('H').mean()

# data_obs['relative_humidity'] = data_laste

In [None]:
# laste_radiazione = pd.read_csv(wdir + '/meteo/historical/radiazione.csv', header=None)
# laste_radiazione[laste_radiazione[0]== '00:00:00 01/01/2013']
# laste_radiazione[laste_radiazione[0]== '23:00:00 31/12/2020']

# data_laste = pd.DataFrame( laste_radiazione.loc[47705:327982][1] )
# data_laste.index = pd.to_datetime( laste_radiazione.loc[47705:327982][0],format='%H:%M:%S %d/%m/%Y' )

# # data_laste = data_laste.resample('H').sum()
# # data_laste[1] = data_laste[1]*3600

# data_laste = data_laste.resample('H').sum()
# # data_laste[1] = data_laste[1]*24

# data_obs['surface_net_solar_radiation'] = data_laste

In [None]:
# laste_temperature = pd.read_csv(wdir + '/meteo/historical/temperature.csv', header=None)
# laste_temperature[laste_temperature[0]== '00:00:00 01/01/2013']
# laste_temperature[laste_temperature[0]== '23:00:00 31/12/2020']

# data_laste = pd.DataFrame( laste_temperature.loc[316497:596788][1] )
# data_laste.index = pd.to_datetime( laste_temperature.loc[316497:596788][0],format='%H:%M:%S %d/%m/%Y' )

# data_laste = data_laste.resample('H').mean()

# data_obs['temperature'] = data_laste

In [None]:
# data[['surface_net_solar_radiation']][dt.date(2013,1,1):dt.date(2016,12,31)].describe()
# # era5_laste = era5_laste.resample('H').mean()
# # era5_laste.plot()

In [None]:
# era5_laste = data[['precipitation']][dt.date(2013,6,9):dt.date(2013,6,11)]
# # era5_laste = era5_laste.resample('H').mean()
# era5_laste.plot()