In [None]:
import sys
sys.path.append('../python')
import numpy as np
import pandas as pd
import xarray as xr
from data import Data
from geo import cells
import matplotlib.pyplot as plt
%matplotlib nbagg

In [None]:
D = Data('../python/data.cfg')
D.open('d','station_data.h5')
r = D.d['pp_mm'].xs('prom', 1, 'aggr')
sta = D.sta.loc[r.columns.get_level_values('station')]

In [None]:
with xr.open_dataset('../../data/WRF/3d/geo_em.d03.nc') as d3:
    lm = d3['LANDMASK'].squeeze().load()
    lon, lat = d3['XLONG_C'].squeeze().load(), d3['XLAT_C'].squeeze().load()

i, j, k = cells(lon, lat, *sta[['lon', 'lat']].astype(float).values.T)
sta = sta.drop(sta.index[k])

In [None]:
d5 = xr.open_dataset('../python/RAINNC_5days.nc')
x = d5['RAINNC'].isel(Time = np.arange(24, 121, 24))
x5 = xr.concat((x.isel(Time = 0), x.diff('Time')), 'Time')
m5 = x5.isel_points(south_north=i, west_east=j).mean('points')

d6 = xr.open_dataset('../python/RAINNC_6.nc')
x = d6['RAINNC'].isel(Time = np.arange(24, 145, 24))
x6 = xr.concat((x.isel(Time = 0), x.diff('Time')), 'Time')
m6 = x6.isel_points(south_north=i, west_east=j).mean('points')

In [None]:
sta.shape

In [None]:
m6 = x6.isel_points(south_north=i, west_east=j)

In [None]:
m6

In [None]:
def offset_daily(x, delta='-8h'):
    """
    Resample hourly observations to daily, matching WRF simulation days (which start at 8:00h local time / previously 20:00h). The default -8h offset means the timestamp on the resampled series refers to the beginning of a 24h period starting at 8:00.
    """
    y = x.copy()
    y.index += pd.Timedelta(delta)
    return y.resample('D').mean() * 24

def loss(obs, mod, offset='MS'):
    o = obs.resample(offset).mean().mean(1).dropna()
    o.name = 'obs'
    def lead(n):
        m = mod.isel(Time=n)
        m['start'] = m.start + pd.Timedelta(n, 'd')
        return m.resample(offset, 'start', how='mean').to_series()
    r = np.arange(len(mod.Time))
    return pd.concat([lead(n) for n in r], 1, keys=r).dropna(0, 'all').join(o, how='inner')

In [None]:
d5 = xr.open_dataset('../python/RAINNC_5days.nc')
x = d5['RAINNC']
a5 = x.diff('Time')
a5 = a5.isel_points(south_north=i, west_east=j).mean('points')

In [None]:
d6 = xr.open_dataset('../python/RAINNC_6.nc')
x = d6['RAINNC']
a6 = x.diff('Time')
a6 = a6.isel_points(south_north=i, west_east=j).mean('points')

In [None]:
def hourly(mod, obs):
    o = obs.copy()
    o.name = 'obs'
    def lead(y, n):
        x = y[:, n*24:(n+1)*24].stack(t=('start', 'Time'))
        return pd.DataFrame(x.values, index=x.XTIME, columns=[n]).dropna()
    return pd.concat([lead(mod, n) for n in range(mod.shape[1]//24)], 1).join(o, how='inner')

In [None]:
lm = [loss(offset_daily(r, o), k) for o, k in [('4h', m5), ('-8h', m6)]]
ld = [loss(offset_daily(r, o), k, 'D') for o, k in [('4h', m5), ('-8h', m6)]]
lh = [hourly(a.sel(start=slice(None, '2017-07-21')), r.mean(1).dropna()) * 24 for a in [a5, a6]]

In [None]:
plt.figure(figsize=(9,5))
p = plt.plot(pd.concat([l[['obs', 0]] for l in ld]).sort_index())
plt.legend(p, ['obs', 'WRF'])

In [None]:
d5 = xr.open_dataset('../../data/WRF/3d/RAINNC_5days.nc')
x = d5['RAINNC'].isel(Time = np.arange(24, 121, 24))
X5 = xr.concat((x.isel(Time = 0), x.diff('Time')), 'Time')
M5 = X5.isel_points(south_north=i, west_east=j).mean('points')

d6 = xr.open_dataset('../../data/WRF/3d/RAINNC_6days.nc')
x = d6['RAINNC'].isel(Time = np.arange(24, 145, 24))
X6 = xr.concat((x.isel(Time = 0), x.diff('Time')), 'Time')
M6 = X6.isel_points(south_north=i, west_east=j).mean('points')

In [None]:
M5 = X5.isel_points(south_north=i, west_east=j).mean('points')
M6 = X6.isel_points(south_north=i, west_east=j).mean('points')

In [None]:
lm = [loss(offset_daily(r, o), k) for o, k in [('4h', M5), ('-8h', M6)]]
ld = [loss(offset_daily(r, o), k, 'D') for o, k in [('4h', M5), ('-8h', M6)]]

In [None]:
plt.figure(figsize=(9,5))
p = plt.plot(pd.concat([l[['obs', 0]] for l in ld]).sort_index())
plt.legend(p, ['obs', 'WRF'])

In [None]:
plt.figure(figsize=(15,8))
p = plt.plot(pd.concat((l5d[0],l6d[0])))
plt.legend(p, ['obs', 'WRF'])

In [None]:
plt.figure()


In [None]:
mly5 = [a.diff(1,1).abs().mean()[1] for a in l5m]
mly6 = [a.diff(1,1).abs().mean()[1] for a in l6m]
dly5 = [a.diff(1,1).abs().mean()[1] for a in l5d]
dly6 = [a.diff(1,1).abs().mean()[1] for a in l6d]

In [None]:
mly5

In [None]:
mly6

In [None]:
dly5

In [None]:
dly6

In [None]:
def mae(df):
    return df.drop('obs', 1).add(-df['obs'], 0).abs().mean()

In [None]:
lm = [loss(offset_daily(r, o), k) for o, k in [('4h', m5), ('-8h', m6)]]
ld = [loss(offset_daily(r, o), k, 'D') for o, k in [('4h', m5), ('-8h', m6)]]

In [None]:
fig, ax = plt.subplots(figsize=(9, 6))
ps = []
for k, l in enumerate([lm[0], ld[0], lh[0], lm[1], ld[1], lh[1]]):
    ps.append(plt.bar(np.arange(l.shape[1] - 1) * 7 + k, mae(l)))
ax.set_xticks(np.arange(6) * 7 + 1.5)
ax.set_xticklabels(np.arange(1, 7))
ax.set_xlabel('lead day')
plt.legend(ps, ['monthly 0h', 'daily 0h', 'hourly 0h', 'monthly 12h', 'daily 12h', 'hourly 12h'], loc='upper right')

In [None]:
r.mean().mean()*24

In [None]:
a.mean()*24

In [None]:
lh[0].mean()

In [None]:
lh[1].mean()

In [None]:
lh[1]