In [None]:
import glob
import re
from pathlib import Path
import numpy as np
import pandas as pd
import xarray as xr
from tqdm.notebook import tqdm

## Configuration

In [None]:
path = Path('Z:/nahaUsers/casadje/datasets/hDMS/reservoirs')

## Data

### Reservoirs

#### CEDEX

In [None]:
path_CEDEX = Path('Z:/nahaUsers/casadje/datasets/CEDEX/processed/reservoirs')

In [None]:
starts = {}
variables = {}
for file in (path_CEDEX / 'timeseries').glob('*.csv'):
    ID = int(file.stem)
    df = pd.read_csv(file)#, usecols=['date'])
    starts[ID] = pd.to_datetime(df.date).min()
    var = df.columns.tolist()
    var.remove('date')
    var.remove('type')
    variables[ID] = var
starts = pd.Series(starts)

In [None]:
for ID, var in variables.items():
    if len(var) == 1:
        print(ID, var)

In [None]:
var.remove('date')

In [None]:
var

In [None]:
starts.describe()

#### HDCC

In [None]:
reservoirs = pd.read_csv(list(path.glob('*.csv'))[0], sep=';', index_col='EFAS_ID')

# correct reservoir names
names = []
for name in reservoirs.Name:
    name = name.upper()
    # remove code from reservoir name
    name = re.sub(r'E\d{2}\s*', '', name)
    # remove string 'embalse' from name
    name = re.sub(r'EMBALSE DE\s+', '', name)
    name = re.sub(r'EMBALSE DEL\s+', 'EL ', name)
    # remove string at the of the name
    name = re.sub(r'\s*\(PIE DE PRESA\)', '', name)
    # place articles ('el', 'la'...) at the beginning
    name = re.sub(r'(\w+), (\w+)', r'\2 \1', name)
    names.append(name)
reservoirs.Name = names

print('total no. reservoirs:\t\t\t{0}'.format(reservoirs.shape[0]))

reservoirs.head()

In [None]:
reservoirs.loc[reservoirs.Name == 'LA BARCA']

### Time series

#### CEDEX

#### HDCC

In [None]:
path

In [None]:
data = {}
for file in tqdm(list((path / 'nhoperational24hw').glob('*.nc'))):
    ID = int(file.stem)

    ds = xr.open_dataset(file)
    
    try:
        variables = list(ds.keys())
        if 'outflow' not in variables:
            ds['outflow'] = xr.zeros_like(ds['volume']) * np.nan
        elif 'volume' not in variables:
            ds['volume'] = xr.zeros_like(ds['outflow']) * np.nan

        data[ID] = ds
    except:
        print(ID)
        continue

In [None]:
data1 = xr.open_dataset(f'{path_datasets}/nhoperational24hw/9422.nc')
data1.close()
# data1['volume'] = xr.zeros_like(data1['outflow']) * np.nan

data1

In [None]:
data2 = xr.open_dataset(f'{path_datasets}/hDMS/reservoirs/nhoperational24hw/9126.nc')
data2.close()

In [None]:
data1['volume'].plot()

In [None]:
data2['volume'].plot()

In [None]:
data = xr.open_mfdataset(f'{path_datasets}/hDMS/reservoirs/nhoperational24hw/*.nc',
                         combine='nested',
                         concat_dim='ID')
data.close()

data

In [None]:
ID = '9142'

In [None]:
data.ID.data

In [None]:
data['outflow'].sel(ID=ID).plot()