# UNSEEN conditioning for HydroTas 2020-2021 workplan:
- condition SON SST and h500 on particularly wet / dry spring conditions 

In [14]:
import cftime
import numpy as np
import xarray as xr
import myfuncs as my
import matplotlib.pyplot as plt
from dask.distributed import Client
from dask_jobqueue import SLURMCluster

import warnings
warnings.filterwarnings("ignore")

# Parameters

In [2]:
VARIABLE = 'h500'

In [6]:
def resample_forecasts_monthly(ds, n_months, end_month, method='sum', 
                               lead_time_func='max'):
    """ Resample forecasts to a set of months for each year"""
    def _resample_grouped_monthly_forecasts(ds, method):
        month = np.unique(ds.init_date.dt.month)
        assert len(month) == 1
        start_month = (end_month - (n_months-1)) % 12
        i_start = (start_month - month[0]) % 12
        rs = ds.isel(lead_time=slice(i_start, None)).coarsen(
            lead_time=n_months, boundary='trim',
            coord_func={'lead_time': lead_time_func, 
                        'time': 'max'})
        if method == 'sum':
            return rs.sum()
        elif method == 'mean':
            return rs.mean()
        else:
            raise InputError('Unrecognised method')
    
    rs = ds.groupby('init_date.month').map(
        _resample_grouped_monthly_forecasts, method=method)
    rs['time'].attrs = ds['time'].attrs
    return rs

# Stack the data

In [3]:
fcst = my.open_zarr(f'data/f6_{VARIABLE}_NATIVE_raw_anom.zarr.zip')

In [15]:
fcst_acc = resample_forecasts_monthly(
    fcst, n_months=3, end_month=11,
    lead_time_func='max',
    method='mean')
# Fill nans in time with dummy times so that time operations work nicely
fcst_acc = fcst_acc.assign_coords({
    'time': fcst_acc.time.fillna(cftime.DatetimeJulian(1800, 1, 1))})

### Keep only a certain month

In [20]:
month = 11

fcst_acc = fcst_acc.where(fcst_acc.time.dt.month == month, drop=True)

In [31]:
fcst_acc['h500']

Unnamed: 0,Array,Chunk
Bytes,6.05 GB,4.98 MB
Shape,"(64, 19, 96, 90, 144)","(1, 1, 96, 90, 144)"
Count,136776 Tasks,7239 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 6.05 GB 4.98 MB Shape (64, 19, 96, 90, 144) (1, 1, 96, 90, 144) Count 136776 Tasks 7239 Chunks Type float32 numpy.ndarray",19  64  144  90  96,

Unnamed: 0,Array,Chunk
Bytes,6.05 GB,4.98 MB
Shape,"(64, 19, 96, 90, 144)","(1, 1, 96, 90, 144)"
Count,136776 Tasks,7239 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,9.73 kB,2.43 kB
Shape,"(64, 19)","(16, 19)"
Count,285 Tasks,49 Chunks
Type,object,numpy.ndarray
"Array Chunk Bytes 9.73 kB 2.43 kB Shape (64, 19) (16, 19) Count 285 Tasks 49 Chunks Type object numpy.ndarray",19  64,

Unnamed: 0,Array,Chunk
Bytes,9.73 kB,2.43 kB
Shape,"(64, 19)","(16, 19)"
Count,285 Tasks,49 Chunks
Type,object,numpy.ndarray


### Keep only a certain month

In [95]:
month = 11

fcst_acc = fcst_acc.where(fcst_acc.time.dt.month == month, drop=True)
obsv_acc = obsv_acc.where(obsv_acc.time.dt.month == month, drop=True)