In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import dask
import dask.distributed
import dask_jobqueue
import pathlib
import xarray as xr

from crims2s.distribution import fit_normal_xarray
from crims2s.mldataset import read_flat_fields, ecmwf_datestring_to_ncep_datestring, trim_ncep_forecast
from crims2s.util import fix_dataset_dims, add_biweekly_dim

In [3]:
TRAINING_INPUT = '***BASEDIR***training-input'

In [4]:
train_input_path = pathlib.Path(TRAINING_INPUT)

In [None]:
files_list = []
for f in list(train_input_path.iterdir()):
    if 'eccc' in f.name and ('-t2m-' in f.name or '-tp-' in f.name):
        files_list.append(f)

In [None]:
len(files_list)

In [None]:
xr.open_dataset(files_list[0])

In [None]:
sample = add_biweekly_dim(xr.open_dataset(files_list[0]), weeks_12=False)

In [None]:
sample

In [None]:
sample.lead_time

In [None]:
parameters = fit_normal_xarray(sample.t2m, dim=['lead_time', 'realization'])

In [None]:
parameters

In [None]:
parameters.isel(forecast_time=0, biweekly_forecast=0).t2m_sigma.plot()

In [None]:
sample.isel(biweekly_forecast=2, lead_time=5, realization=0, forecast_time=0).t2m.plot()

In [None]:
d = xr.open_mfdataset(files_list[0])

In [None]:
d

In [None]:
cluster = dask_jobqueue.SLURMCluster(
    env_extra=['source ***HOME***.bash_profile','conda activate s2s'],
    name='s2s',
)

In [None]:
cluster.scale(jobs=2)

In [None]:
client = dask.distributed.Client(cluster)

In [None]:
client

In [None]:
d = xr.open_mfdataset(files_list, preprocess=fix_dataset_dims)

In [None]:
d.forecast_time

In [None]:
d.forecast_year

In [None]:
nan_counts = d.tp.isnull().mean(dim=['forecast_year', 'forecast_monthday', 'lead_time', 'latitude', 'longitude']).compute()

In [None]:
n_null = d.tp.isnull().sum(dim=['realization']).persist()

In [None]:
(n_null >= 1).mean().compute()

In [None]:
(n_null >= 2).mean().compute()

In [None]:
(n_null >= 3).mean().compute()

In [None]:
(n_null == 4).mean().compute()

In [None]:
nan_counts.compute()

## Example part maker

In [5]:
ecmwf_datestring = '20201231'

In [6]:
#ncep_datestring = ecmwf_datestring_to_ncep_datestring('20190109')

In [None]:
ncep_datestring = '201012'

In [7]:
ncep_datestring

'20100107'

In [8]:
ncep = read_flat_fields(
    train_input_path,
    "ncep",
    ["t2m", "tp"],
    ncep_datestring,
    file_label='hindcast',
)
ncep = ncep.where(ncep.forecast_year>=2000, drop=True).compute()

In [9]:
ncep

In [10]:
ecmwf = read_flat_fields(
    train_input_path,
    "ecmwf",
    ["t2m", "tp"],
    ecmwf_datestring,
    file_label='hindcast',
)
ecmwf = ecmwf.sel(forecast_year=ncep.forecast_year).compute()

In [11]:
ecmwf

In [12]:
ncep.valid_time >= ecmwf.valid_time[:,0]

In [13]:
ncep.where(ncep.valid_time >= ecmwf.valid_time[:,0], drop=True)

In [16]:
trim_ncep_forecast(ncep, ecmwf)

In [15]:
trimmed_ncep

In [None]:
trimmed_ncep.valid_time

In [None]:
ncep.where(begins).valid_time.compute()

In [None]:
ncep.isel(lead_time=slice(begins, None))

In [None]:
ncep.lead_time < delta

In [None]:
ncep.where(ncep.valid_time in )

# Test

In [26]:
TEST_INPUT = '***BASEDIR***test-input'
test_input_path = pathlib.Path(TEST_INPUT)

In [27]:
files_list = []
for f in list(test_input_path.iterdir()):
    if 'ncep' in f.name and ('-t2m-' in f.name or '-tp-' in f.name):
        files_list.append(f)

In [28]:
files_list

[PosixPath('***BASEDIR***test-input/ncep-forecast-tp-20200227.nc'),
 PosixPath('***BASEDIR***test-input/ncep-forecast-t2m-20200430.nc'),
 PosixPath('***BASEDIR***test-input/ncep-forecast-tp-20200312.nc'),
 PosixPath('***BASEDIR***test-input/ncep-forecast-tp-20200319.nc'),
 PosixPath('***BASEDIR***test-input/ncep-forecast-tp-20200910.nc'),
 PosixPath('***BASEDIR***test-input/ncep-forecast-tp-20200213.nc'),
 PosixPath('***BASEDIR***test-input/ncep-forecast-t2m-20200206.nc'),
 PosixPath('***BASEDIR***test-input/ncep-forecast-t2m-20201119.nc'),
 PosixPath('***BASEDIR***test-input/ncep-forecast-tp-20200827.nc'),
 PosixPath('***BASEDIR***test-input/ncep-forecast-tp-20201001.nc'),
 PosixPath('***BASEDIR***test-input/ncep-forecast-tp-20200528.nc'),
 PosixPath('***BASEDIR***test-input/ncep-forecast-tp-20200604.nc'),
 PosixPath('***BASEDIR***test-input/ncep-forecast-tp-20200917.nc'),
 PosixPath('***BASEDIR***test-input/ncep-forecast-tp-20200130.nc'),
 PosixPath('***BASEDIR***test-input/ncep-fore

In [29]:
d = xr.open_mfdataset(files_list, preprocess=fix_dataset_dims)

In [30]:
d

Unnamed: 0,Array,Chunk
Bytes,18.22 kiB,352 B
Shape,"(53, 1, 44)","(1, 1, 44)"
Count,475 Tasks,53 Chunks
Type,datetime64[ns],numpy.ndarray
"Array Chunk Bytes 18.22 kiB 352 B Shape (53, 1, 44) (1, 1, 44) Count 475 Tasks 53 Chunks Type datetime64[ns] numpy.ndarray",44  1  53,

Unnamed: 0,Array,Chunk
Bytes,18.22 kiB,352 B
Shape,"(53, 1, 44)","(1, 1, 44)"
Count,475 Tasks,53 Chunks
Type,datetime64[ns],numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,4.04 GiB,26.59 MiB
Shape,"(1, 53, 16, 44, 121, 240)","(1, 1, 16, 15, 121, 240)"
Count,1445 Tasks,159 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 4.04 GiB 26.59 MiB Shape (1, 53, 16, 44, 121, 240) (1, 1, 16, 15, 121, 240) Count 1445 Tasks 159 Chunks Type float32 numpy.ndarray",16  53  1  240  121  44,

Unnamed: 0,Array,Chunk
Bytes,4.04 GiB,26.59 MiB
Shape,"(1, 53, 16, 44, 121, 240)","(1, 1, 16, 15, 121, 240)"
Count,1445 Tasks,159 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,4.04 GiB,77.99 MiB
Shape,"(1, 53, 16, 44, 121, 240)","(1, 1, 16, 44, 121, 240)"
Count,265 Tasks,53 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 4.04 GiB 77.99 MiB Shape (1, 53, 16, 44, 121, 240) (1, 1, 16, 44, 121, 240) Count 265 Tasks 53 Chunks Type float32 numpy.ndarray",16  53  1  240  121  44,

Unnamed: 0,Array,Chunk
Bytes,4.04 GiB,77.99 MiB
Shape,"(1, 53, 16, 44, 121, 240)","(1, 1, 16, 44, 121, 240)"
Count,265 Tasks,53 Chunks
Type,float32,numpy.ndarray
