## This notebook summarize the prediction results

In [1]:
import numpy as np
import pandas as pd
from pandas.tseries.offsets import MonthEnd
from scipy.stats.stats import pearsonr
from tools import MBOX, OUTBOX, save_hdf

In [13]:
# Load prediction output files
mp1=[]; mp2=[]; mp3=[]; mp4=[]; mp5=[]
for i in range(1593):
    # Multiple file results
    filn = './data/chtc_out/dfFlowDams{}_manu.npz'.format(i)
    mp1.extend(np.load(filn,allow_pickle=True)['mp1'].tolist())
    mp2.extend(np.load(filn,allow_pickle=True)['mp2'].tolist())
    mp3.extend(np.load(filn,allow_pickle=True)['mp3'].tolist())
    mp4.extend(np.load(filn,allow_pickle=True)['mp4'].tolist())
    mp5.extend(np.load(filn,allow_pickle=True)['mp5'].tolist())
print('All results are loaded.')

# Load Dam Inflow data from SUTD
dfFlowDams = pd.read_hdf('/Users/dlee/gdrive/gpower/data/dfFlowDams.hdf')
ind_dams = np.load('/Users/dlee/gdrive/gpower/data/ind_dams.npz')['ind_dams']
damList = ind_dams[0,:]
ndam = len(damList)
assert np.all([ybox['m01']['point_no'] for ybox in mp1] == damList)

All results are loaded.


### Align prediction results

In [14]:
# Summary table
arrMP1 = np.zeros(dfFlowDams.shape)     # MP1
arrMP2 = arrMP1.copy()                  # MP2
arrMP3 = arrMP1.copy()                  # MP3
arrMP4 = arrMP1.copy()                  # MP4
arrMP5 = arrMP1.copy()                  # MP5
signMP1 = np.zeros([12, ndam])
signMP2 = signMP1.copy()
signMP3 = signMP1.copy()
signMP4 = signMP1.copy()
signMP5 = signMP1.copy()

for i in range(ndam):
    # Data preparation
    point_no = mp1[i]['m01']['point_no']
    y = dfFlowDams[point_no]
    # Results tables
    data = np.zeros([516, 6])       # Flow [obs, mp1, mp2, mp3, mp4, mp5]
    sign = np.zeros([5, 13])        # isFcst [5mp, 12 months + sum] ("not forecasted" as default)
    for j in range(1, 13):
        # Monthly Prediction
        # - Observed Inflow
        yObs = y[y.index.month == j]
        # yClm = yObs[:30].mean()                           # 30yr mean
        yClm = yObs.mean()                                  # All-year mean
        data[range(j-1,516,12), 0] = yObs
        data[np.ix_(range(j-1,516,12), [1,2,3,4,5])] = yClm # Assign climatology first
        # - M1 prediction
        mbox = mp1[i]['m%02d' % j]
        if 'gss' in mbox:
            gss, msess = mbox['gss'], mbox['msess']
            if (gss >= 0) & (msess >= 0):
                # If prediction is better than climatology
                yHat = np.exp(np.hstack((mbox['yTranHat'], mbox['yTestHat'])))
                data[range(j-1,516,12), 1] = yHat       # prediction
                sign[0,j-1] = 1                         # isFcst
        # - M2 prediction
        mbox = mp2[i]['m%02d' % j]
        if 'gss' in mbox:
            gss, msess = mbox['gss'], mbox['msess']
            if (gss >= 0) & (msess >= 0):
                # If prediction is better than climatology
                yHat = np.exp(np.hstack((mbox['yTranHat'], mbox['yTestHat'])))
                data[range(j-1,516,12), 2] = yHat       # prediction
                sign[1,j-1] = 1                         # isFcst
        # - M3 prediction
        mbox = mp3[i]['m%02d' % j]
        if 'gss' in mbox:
            gss, msess = mbox['gss'], mbox['msess']
            if (gss >= 0) & (msess >= 0):
                # If prediction is better than climatology
                yHat = np.exp(np.hstack((mbox['yTranHat'], mbox['yTestHat'])))
                data[range(j-1,516,12), 3] = yHat       # prediction
                sign[2,j-1] = 1                         # isFcst        
        # - M4 prediction
        mbox = mp4[i]['m%02d' % j]
        if 'gss' in mbox:
            gss, msess = mbox['gss'], mbox['msess']
            if (gss >= 0) & (msess >= 0):
                # If prediction is better than climatology
                yHat = np.exp(np.hstack((mbox['yTranHat'], mbox['yTestHat'])))
                data[range(j-1,516,12), 4] = yHat       # prediction
                sign[3,j-1] = 1                         # isFcst
        # - M5 prediction
        mbox = mp5[i]['m%02d' % j]
        if 'gss' in mbox:
            gss, msess = mbox['gss'], mbox['msess']
            if (gss >= 0) & (msess >= 0):
                # If prediction is better than climatology
                yHat = np.exp(np.hstack((mbox['yTranHat'], mbox['yTestHat'])))
                data[range(j-1,516,12), 5] = yHat       # prediction
                sign[4,j-1] = 1                         # isFcst
        
    
    # Finish result tables
    sign[:,-1] = np.sum(sign, axis=1)

    # Copy time-series and signs(isFcst) of monthly prediction to Numpy array
    arrMP1[:,i] = data[:,1]
    arrMP2[:,i] = data[:,2]
    arrMP3[:,i] = data[:,3]
    arrMP4[:,i] = data[:,4]
    arrMP5[:,i] = data[:,5]
    signMP1[:,i] = sign[0,:-1]
    signMP2[:,i] = sign[1,:-1]
    signMP3[:,i] = sign[2,:-1]
    signMP4[:,i] = sign[3,:-1]
    signMP5[:,i] = sign[4,:-1]
    
    # Export prediction series as csv format
    if True:
        filn = './data/prediction/%s_series.xlsx' % point_no
        index = dfFlowDams.index
        dfFcst = pd.concat([pd.Series(data[:,0],index), 
                            pd.Series(data[:,1],index - MonthEnd(1)),
                            pd.Series(data[:,2],index - MonthEnd(2)), 
                            pd.Series(data[:,3],index - MonthEnd(3)),
                            pd.Series(data[:,4],index - MonthEnd(4)),
                            pd.Series(data[:,5],index - MonthEnd(5)),
                           ],
                           axis=1)
        dfFcst.columns = ['OBS', 'MP1', 'MP2', 'MP3', 'MP4', 'MP5']
        dfFcst.to_excel(filn)
        
# Export time-series and signs of monthly forecasts
dfMP1 = pd.DataFrame(arrMP1, dfFlowDams.index, dfFlowDams.columns)
dfMP2 = pd.DataFrame(arrMP2, dfFlowDams.index, dfFlowDams.columns)
dfMP3 = pd.DataFrame(arrMP3, dfFlowDams.index, dfFlowDams.columns)
dfMP4 = pd.DataFrame(arrMP4, dfFlowDams.index, dfFlowDams.columns)
dfMP5 = pd.DataFrame(arrMP5, dfFlowDams.index, dfFlowDams.columns)
dfMP1_sign = pd.DataFrame(signMP1, range(1,13), dfFlowDams.columns)
dfMP2_sign = pd.DataFrame(signMP2, range(1,13), dfFlowDams.columns)
dfMP3_sign = pd.DataFrame(signMP3, range(1,13), dfFlowDams.columns)
dfMP4_sign = pd.DataFrame(signMP4, range(1,13), dfFlowDams.columns)
dfMP5_sign = pd.DataFrame(signMP5, range(1,13), dfFlowDams.columns)
if True:
    # Save time-series and plot
    save_hdf('./data/prediction/dfMP1.hdf', dfMP1)
    save_hdf('./data/prediction/dfMP2.hdf', dfMP2)
    save_hdf('./data/prediction/dfMP3.hdf', dfMP3)
    save_hdf('./data/prediction/dfMP4.hdf', dfMP4)
    save_hdf('./data/prediction/dfMP5.hdf', dfMP5)
    save_hdf('./data/prediction/dfMP1_sign.hdf', dfMP1_sign)
    save_hdf('./data/prediction/dfMP2_sign.hdf', dfMP2_sign)
    save_hdf('./data/prediction/dfMP3_sign.hdf', dfMP3_sign)
    save_hdf('./data/prediction/dfMP4_sign.hdf', dfMP4_sign)
    save_hdf('./data/prediction/dfMP5_sign.hdf', dfMP5_sign)

./data/prediction/dfMP1.hdf is saved.
./data/prediction/dfMP2.hdf is saved.
./data/prediction/dfMP3.hdf is saved.
./data/prediction/dfMP4.hdf is saved.
./data/prediction/dfMP5.hdf is saved.
./data/prediction/dfMP1_sign.hdf is saved.
./data/prediction/dfMP2_sign.hdf is saved.
./data/prediction/dfMP3_sign.hdf is saved.
./data/prediction/dfMP4_sign.hdf is saved.
./data/prediction/dfMP5_sign.hdf is saved.


### Single output test

In [17]:
filn = './data/chtc_out/dfFlowDams0_manu.npz'
mp = np.load(filn,allow_pickle=True)['mp1'].tolist()