## This notebook creates data chunks to facilitate HTCondor processing

In [1]:
import os
from pathlib import Path
import numpy as np
import pandas as pd
from tools import loadmat, multi_equal, save_hdf, climdictToDf, split

### Global climate drivers

In [2]:
# Load climate data
#TODO: UPDATE FROM RAW DATA
dictdata = loadmat(os.path.join('data', 'lscidx.mat'))
for k in list(dictdata.keys()):
    if not k.endswith('Linr'):
        dictdata.pop(k)
for k in list(dictdata.keys()):
    dictdata[k.replace('Linr', '')] = dictdata.pop(k)
dfPredGlob = climdictToDf(dictdata, length='short')
dfPredGlob.columns = ['amo', 'nao', 'oni', 'pdo']
# Save reference data (dfPredGlob)
filn = './data/chtc_in/dfPredGlob.hdf'
save_hdf(filn, dfPredGlob)

./data/chtc_in/dfPredGlob.hdf is saved.


### GRAND Dams: Streamflow, Soil Moisture, and Snowfall

In [3]:
# Load Dam Inflow data from SUTD
dfFlowDams = pd.read_hdf('./data/dfFlowDams.hdf')
ind_dams = np.load('./data/ind_dams.npz')['ind_dams']
# Load ERA40 swvl data
dfSwvlDams = pd.read_hdf('./data/dfSwvlDams.hdf')
# Load WFD snowfall data
dfSnowDams = pd.read_hdf('./data/dfSnowDams.hdf')
# Validate order of dam ID
multi_equal([dfFlowDams.columns,ind_dams[0,:],dfSwvlDams.columns, dfSnowDams.columns])

In [5]:
# Number of divison
ndivDams = 1593
listDams = list(split(list(dfFlowDams.columns), ndivDams))

# Creat data chunks with multi-index
for i in range(ndivDams):
    # Create emtpy Series and DataFrame
    predList1 = ['flow']
    predList2 = ['flow', 'swvl', 'snow']
    mcols1 = pd.MultiIndex.from_product([listDams[i], predList1], names=['point_no', ''])
    dfPredLocl1 = pd.DataFrame(index = dfFlowDams.index, columns=mcols1)
    mcols2 = pd.MultiIndex.from_product([listDams[i], predList2], names=['point_no', ''])
    dfPredLocl2 = pd.DataFrame(index = dfFlowDams.index, columns=mcols2)
    
    # Assign data to each point
    for j in listDams[i]:
        dfTemp1 = dfFlowDams[j].copy()
        dfTemp1.columns = predList1
        dfPredLocl1[j] = dfTemp1
        dfTemp2 = pd.concat([dfFlowDams[j], dfSwvlDams[j], dfSnowDams[j]], axis=1)
        dfTemp2.columns = predList2
        dfPredLocl2[j] = dfTemp2
        
    # Save as HDF format
    save_hdf('./data/chtc_in/dfFlowDams{:d}.hdf'.format(i), dfFlowDams[listDams[i]], set_print=False)
    save_hdf('./data/chtc_in/dfPredDamsLocl{:d}.hdf'.format(i), dfPredLocl2, set_print=False)