In [10]:
import datetime, pickle, sys, os, shutil
from ftplib import FTP
from tqdm import tqdm
import xarray as xr
import numpy as np
import pandas as pd

sys.path.append("./src/")
from dataio import load_dam_details
from easymore_interpolation_TE import map_nc
from merit_process import *

In [None]:
def generate_dates(frm_year=2018, frm_month=9, frm_day=1,
                   to_year=2021, to_month=9, to_day=16,
                   delta=datetime.timedelta(hours=1)):
    """
    """
    interval = 0
    current_date = datetime.datetime(year=frm_year, month=frm_month, day=frm_day)
    end_date = datetime.datetime(year=to_year, month=to_month, day=to_day)
    date_list = []
    while current_date < end_date:
        current_date = current_date + delta
        date_list.append(datetime_to_str(current_date))
    return date_list

def datetime_to_str(x):
    """
    """
    x = [x.year, x.month, x.day, x.hour]
    x = [str(y) for y in x]
    return ["0"+y if len(y)==1 else y for y in x]


def get_values(path):
    if os.path.isfile(path):
        df = pd.read_csv(path)
        return df.iloc[3:6,1:].set_index("Unnamed: 1").values
    else:
        return np.array([[-1]*766])
    
def extract_hourly_values(root, variable, dates):
    """
    """
    values,timestamps = [],[]
    for (year,month,day,hour) in tqdm(dates):
        path = f"{download_hourly}/{variable}/TE-JPN01M_MSM_H{year}{month}{day}{hour}_{variable}.csv"
        print(path)
        values.append(get_values(path))
        date = datetime.datetime(year=int(year), month=int(month), 
                                day=int(day), hour=int(hour))
        timestamps.append(date)
    columns = gpd.read_file(dam_basins_path)["COMID"].values.tolist()
    return pd.DataFrame(np.concatenate(values), columns=columns, index=timestamps)

def extract_daily_values(root, variable, dates):
    """
    """
    values,timestamps = [],[]
    for (year,month,day,hour) in tqdm(dates):
        path = f"{root}/{variable}/TE-JPN01M_MSM_H{year}{month}{day}_{variable}.csv"
        values.append(get_values(path))
        date = datetime.datetime(year=int(year), month=int(month), 
                                day=int(day), hour=int(hour))
        timestamps.append(date)
    columns = gpd.read_file(dam_basins_path)["COMID"].values.tolist()
    return pd.DataFrame(np.concatenate(values), index=timestamps, columns=columns)

In [None]:
target_variables = ["ICESUB", "SNMLT", "GLSNW", 
                    "SNFRZ", "SNSUB", "ICEMLT", "RIVOUT"]

download_hourly = "/media/tristan/Elements/Hydro/TE/TE-Japan/Hourly/Download/"
csv_hourly = "/media/tristan/Elements/Hydro/TE/TE-Japan/Hourly/CSV/"
download_daily = "/media/tristan/Elements/Hydro/TE/TE-Japan/Daily/Download/"
csv_daily = "/media/tristan/Elements/Hydro/TE/TE-Japan/Daily/CSV/"

dates_daily =  generate_dates(frm_year=2007, frm_month=1, frm_day=1,
                        to_year=2021, to_month=9, to_day=1,
                        delta=datetime.timedelta(hours=24))

dates_hourly =  generate_dates()


### Hourly

In [None]:
for variable in target_variables:
    outdir = f"{out_root}/{variable}/"
    for (year,month,day,hour) in tqdm(dates_hourly):
        path = f"{root}/{variable}/TE-JPN01M_MSM_H{year}{month}{day}{hour}_{variable}.nc"
        map_nc(path, [variable], varout=[variable], outdir=outdir,
               var_time="time", var_lat="lat", var_lon="lon")

In [None]:
for variable in target_variables:
    results[variable]=extract_hourly_values(out_root, variable, dates_hourly)

In [None]:
for variable in target_variables:
    out[variable].to_pickle(f"/media/tristan/Elements/Hydro/TE/TE-Japan/Hourly/DF/{variable}.pkl")

### Daily

In [None]:
for variable in target_variables:
    outdir = f"{out_root}/{variable}/"
    for (year,month,day,hour) in tqdm(dates_daily):
        path = f"{root}/{variable}/TE-JPN01M_MSM_H{year}{month}{day}_{variable}.nc"
        map_nc(path, [variable], varout=[variable], outdir=outdir,
               var_time="time", var_lat="lat", var_lon="lon")

In [None]:
for variable in target_variables:
    results[variable]=extract_daily_values(out_root, variable, dates_daily)

In [None]:
for variable in target_variables:
    out[variable].to_pickle(f"/media/tristan/Elements/Hydro/TE/TE-Japan/Daily/DF/{variable}.pkl")