In [None]:
import sys
sys.path.append('../python')
import numpy as np
import pandas as pd
import xarray as xr
from data import Data
from geo import cells
from mapping import matts
import matplotlib.pyplot as plt
%matplotlib nbagg

In [None]:
D = Data('../python/data.cfg')
D.open('d','station_data.h5')
r = D.d['pp_mm'].xs('prom', 1, 'aggr')
sta = D.sta.loc[r.columns.get_level_values('station')]

In [None]:
with xr.open_dataset('../../data/WRF/3d/geo_em.d03.nc') as d3:
    lm = d3['LANDMASK'].squeeze().load()
    clon, clat = d3['XLONG_C'].squeeze().load(), d3['XLAT_C'].squeeze().load()
    lon, lat = d3['XLONG_M'].squeeze().load(), d3['XLAT_M'].squeeze().load()
    dom3 = matts(d3)

i, j, k = cells(clon, clat, *sta[['lon', 'lat']].astype(float).values.T)
sta = sta.drop(sta.index[k])
r = r[sta.index]

def pm(x):
    return x.sel_points(south_north=i, west_east=j).mean('points')

In [None]:
# d5 = xr.open_dataset('../../data/WRF/3d/RAINNC_5.nc')
d5 = xr.open_dataset('../python/RAINNC_5.nc')
x = d5['RAINNC'].isel(Time = np.arange(24, 121, 24))
x5 = xr.concat((x.isel(Time = 0), x.diff('Time')), 'Time')
a5 = d5['RAINNC'].diff('Time')

# d6 = xr.open_dataset('../../data/WRF/3d/RAINNC_6.nc')
d6 = xr.open_dataset('../python/RAINNC_6.nc')
x = d6['RAINNC'].sel(start=slice(None, '2017-07-21'))
x6 = x.isel(Time = np.arange(24, 145, 24))
x6 = xr.concat((x6.isel(Time = 0), x6.diff('Time')), 'Time')
a6 = x.diff('Time')

In [None]:
from scipy import interpolate as ip
from mapping import affine

def interp2Dn(lon, lat, x, sta, coords=['XTIME']):
    g = affine(lon.values, lat.values)
    xy = (np.arange(lon.shape[0]), np.arange(lon.shape[1]))
    order = x.dims.index('south_north') < x.dims.index('west_east')
    co = np.roll(g(sta[['lon', 'lat']].astype(float).values.T).T, int(order), 1)
    d = list(set(x.dims) - {'south_north', 'west_east'})
    y = xr.concat([
        xr.concat([
            xr.DataArray(
                ip.interpn(xy, x.loc[{d[0]:a, d[1]:b}].values, co, 'linear'),
                coords=[('stations', sta.index)]
            ) for b in x[d[1]]], 
            pd.Index(x[d[1]], name = d[1])
        ) for a in x[d[0]]],
        pd.Index(x[d[0]], name = d[0])
    )
    for c in coords:
        y[c] = x[c]
    return y

In [None]:
def offset_daily(x, delta='-8h'):
    """
    Resample hourly observations to daily, matching WRF simulation days (which start at 8:00h local time / previously 20:00h). The default -8h offset means the timestamp on the resampled series refers to the beginning of a 24h period starting at 8:00.
    """
    y = x.copy()
    y.index += pd.Timedelta(delta)
    return y.resample('D').mean() * 24

def loss(obs, mod, offset='MS'):
    o = obs.resample(offset).mean().mean(1).dropna()
    o.name = 'obs'
    def lead(n):
        m = mod.isel(Time=n)
        m['start'] = m.start + pd.Timedelta(n, 'd')
        return m.resample(offset, 'start', how='mean').to_series()
    r = np.arange(len(mod.Time))
    return pd.concat([lead(n) for n in r], 1, keys=r).dropna(0, 'all').join(o, how='inner')

In [None]:
def hourly(mod, obs):
    o = obs.copy()
    o.name = 'obs'
    def lead(y, n):
        x = y.isel(Time=slice(n*24, (n+1)*24)).stack(t=('start', 'Time'))
        return pd.DataFrame(x.values, index=x.XTIME, columns=[n]).dropna()
    return pd.concat([lead(mod, n) for n in range(len(mod.Time)//24)], 1).join(o, how='inner')

In [None]:
rs = r.mean(1).dropna()
m5 = pm(x5)
m6 = pm(x6)
lm = [loss(offset_daily(r, o), k) for o, k in [('-20h', m5), ('-8h', m6)]]
ld = [loss(offset_daily(r, o), k, 'D') for o, k in [('-20h', m5), ('-8h', m6)]]
lh = [hourly(a, rs) * 24 for a in [pm(a5), pm(a6)]]

In [None]:
plt.figure(figsize=(9,5))
p = plt.plot(pd.concat([l[['obs', 0]] for l in ld]).sort_index())
plt.legend(p, ['obs', 'WRF'])

In [None]:
def mae(x):
    if isinstance(x, pd.DataFrame):
        return x.drop('obs', 1).add(-x['obs'], 0).abs().mean()
    else:
        return np.abs(x.mean('points').diff('type')).mean('time').values.flatten()

In [None]:
fig, ax = plt.subplots(figsize=(9, 6))
ps = []
for k, l in enumerate([lm[0], ld[0], lh[0], lm[1], ld[1], lh[1]]):
    ps.append(plt.bar(np.arange(l.shape[1] - 1) * 7 + k, mae(l)))
ax.set_xticks(np.arange(6) * 7 + 1.5)
ax.set_xticklabels(np.arange(1, 7))
ax.set_xlabel('lead day')
plt.legend(ps, ['monthly 0h', 'daily 0h', 'hourly 0h', 'monthly 12h', 'daily 12h', 'hourly 12h'], loc='upper left')

In [None]:
p5 = x5.isel_points(south_north=i, west_east=j)
p6 = x6.isel_points(south_north=i, west_east=j)
lm = [point_loss(offset_daily(r, o), k, 'MS') for o, k in [('4h', p5), ('16h', p6)]]
ld = [point_loss(offset_daily(r, o), k) for o, k in [('4h', p5), ('16h', p6)]]

In [None]:
fig, ax = plt.subplots(figsize=(9, 6))
ps = []
for k, l in enumerate([lm[0], ld[0], lh[0], lm[1], ld[1], lh[1]]):
    e = mae(l)
    ps.append(plt.bar(np.arange(len(e)) * 7 + k, e))
ax.set_xticks(np.arange(6) * 7 + 1.5)
ax.set_xticklabels(np.arange(1, 7))
ax.set_xlabel('lead day')
plt.legend(ps, ['monthly 0h', 'daily 0h', 'hourly 0h', 'monthly 12h', 'daily 12h', 'hourly 12h'], loc='upper left')

In [None]:
id5 = interp2Dn(lon, lat, x5, sta).mean('stations')
id6 = interp2Dn(lon, lat, x6, sta).mean('stations')
ih5 = interp2Dn(lon, lat, d5['RAINNC'], sta).mean('stations').diff('Time')
ih6 = interp2Dn(lon, lat, d6['RAINNC'].sel(start=slice(None, '2017-07-21')), sta).mean('stations').diff('Time')

In [None]:
lid = [loss(offset_daily(r, o), k, 'D') for o, k in [('-20h', id5), ('-8h', id6)]]
lih = [hourly(a, rs) * 24 for a in [ih5, ih6]]

In [None]:
fig, ax = plt.subplots(figsize=(9, 6))
bars = [ld[0], lid[0], lh[0], lih[0], ld[1], lid[1], lh[1], lih[1]]
ps = []
for k, l in enumerate(bars):
    ps.append(plt.bar(np.arange(l.shape[1] - 1) * (len(bars) + 1) + k, mae(l)))
ax.set_xticks(np.arange(6) * (len(bars) + 1) + 1.5)
ax.set_xticklabels(np.arange(1, 7))
ax.set_xlabel('lead day')
plt.legend(ps, ['daily 0h', 'daily interp. 0h', 'hourly 0h', 'hourly interp. 0h',
               'daily 12h', 'daily interp. 12h', 'hourly 12h', 'hourly interp. 12h'], loc='lower right')

In [None]:
# NOTE: the timestap on model data now refers to the *end* of a period - adjust offset_daily accordingly
def point_loss(obs, mod, sample='D'):
    def lead(x, n):
        y = x.isel(Time=n).dropna('start','all')
        return xr.DataArray(y.values, coords=[('points', y.points), ('time', y.XTIME)])
    m = xr.concat([lead(mod, n) for n in range(len(mod.Time))], 'lead').resample(sample, 'time', how='mean')
    o = xr.DataArray(obs.resample(sample).mean())
    o.coords['points'] = ('dim_1', np.arange(len(o.dim_1)))
    o = o.swap_dims({'dim_1': 'points'}).rename({'ultima_lectura':'time'}).drop('dim_1')
    i = pd.DatetimeIndex(m.time.values).intersection(pd.DatetimeIndex(o.time.values))
    return xr.concat((o.sel(time=i), m.sel(time=i)), pd.Index(['obs', 'mod'], name='type'))

In [None]:
p5 = point_loss(offset_daily(r,'4h'), x5.isel_points(south_north=i, west_east=j))
p6 = point_loss(offset_daily(r,'16h'), x6.isel_points(south_north=i, west_east=j))

In [None]:
from mapping import map_plot

In [None]:
from mpl_toolkits import basemap

In [None]:
Map = basemap.Basemap(projection='lcc', **dom3)

In [None]:
e = np.abs(p5.isel(lead=0).diff('type')).mean('time').squeeze()

In [None]:
plt.figure()
x, y = sta[['lon', 'lat']].values.T
Map.scatter(x, y, c=e.values, latlon=True)
plt.colorbar()

In [None]:
e = np.abs(p6.isel(lead=0).diff('type')).mean('time').squeeze()
plt.figure()
x, y = sta[['lon', 'lat']].values.T
Map.scatter(x, y, c=e.values, latlon=True)
plt.colorbar()

In [None]:
f = a5[:,:24, i[20], j[20]].stack(t=('start', 'Time'))
g = a6[:,:24, i[20], j[20]].stack(t=('start', 'Time'))

In [None]:
plt.figure()
plt.plot(r['LAGHU'])
plt.plot(g.XTIME, g.values)
plt.plot(f.XTIME, f.values)
plt.plot()

In [None]:
r.columns.get_loc('LAGHU')