In [None]:
%load_ext autoreload
%autoreload 2

# Forecast like observations

Use observation files to produce new files that fit the shape of a forecast file.
That makes them easier to use for ML purposes.
At the core of this task is the forecast_like_observations provided by the organizers.
This notebooks loads the appropriate forecasts and calls this function to generate corresponding obs, from our own set of obs files.
The obs files were modified to make them more consisten w/r to nans, see *land-mask-investigate.ipybn*.

In [None]:
import climetlab as cml
import climetlab_s2s_ai_challenge
import dask
import dask.array as da
import dask.distributed
import dask_jobqueue
import pathlib
import xarray as xr

from crims2s.util import fix_dataset_dims

In [None]:
DATA_PATH = '***BASEDIR***'
data_path = pathlib.Path(DATA_PATH)

## Boot dask cluster

In [None]:
cluster = dask_jobqueue.SLURMCluster(env_extra=['source ***HOME***.bash_profile','conda activate s2s'])

In [None]:
cluster.scale(jobs=4)

In [None]:
client = dask.distributed.Client(cluster)

In [None]:
client

## Temperature

In [None]:
forecast_dir = data_path / 'training-input'

In [None]:
forecast_files = [f for f in forecast_dir.iterdir() if 'ecmwf' in f.stem and 't2m' in f.stem]

In [None]:
forecast_files[:10]

In [None]:
forecast = xr.open_mfdataset(forecast_files, preprocess=fix_dataset_dims)

In [None]:
obs = xr.open_dataset(data_path / 'obs_t2m_interp_remask.nc')

In [None]:
forecast_shaped_t2m = climetlab_s2s_ai_challenge.extra.forecast_like_observations(forecast, obs)

In [None]:
forecast_shaped_t2m

In [None]:
sample = forecast_shaped_t2m.isel(forecast_dayofyear=0, forecast_year=10, lead_time=40)

In [None]:
sample.valid_time.item()

In [None]:
(sample == obs.sel(time=sample.valid_time)).t2m.plot()

Seems legit!

In [None]:
forecast_shaped_t2m.isel(forecast_year=0).to_netcdf(data_path / 'processed' / 'training-output-reference' / f'obs_t2m_forecast_shape_2000.nc')

In [None]:
forecast_shaped_t2m.isel(forecast_year=[0])

In [None]:
forecast_files[:10]

In [None]:
for f in forecast_files:
    print(f)
    forecast = fix_dataset_dims(xr.open_dataset(f))
    forecast_shaped_t2m = climetlab_s2s_ai_challenge.extra.forecast_like_observations(forecast, obs)

    day_of_year = forecast_shaped_t2m.forecast_time.dt.dayofyear[0].item()
    
    forecast_shaped_t2m = forecast_shaped_t2m.expand_dims('forecast_dayofyear').assign_coords(forecast_dayofyear=[day_of_year])
    forecast_shaped_t2m.to_netcdf(data_path / 'processed' / 'training-output-reference' / f'obs_t2m_forecast_shape_{day_of_year:03}.nc')

In [None]:
for y in forecast_shaped_t2m.forecast_year:
    print(y.item())

In [None]:
for y in forecast_shaped_t2m.forecast_year:
    print(y.item())
    forecast_shaped_t2m.sel(forecast_year=[y]).to_netcdf(data_path / 'processed' / 'training-output-reference' / f'obs_t2m_forecast_shape_{y.item()}.nc')

In [None]:
forecast_shaped_t2m.to_netcdf(data_path / 'obs_t2m_forecast_shape.nc')

In [None]:
forecast_shaped_t2m.to_netcdf('***BASEDIR***obs_t2m_forecast_shape.nc')

In [None]:
del obs
del forecast
del forecast_shaped_t2m

## Precipitation

In [None]:
forecast_dir = data_path / 'training-input'

In [None]:
forecast_files = [f for f in forecast_dir.iterdir() if 'ecmwf' in f.stem and 'tp' in f.stem]

In [None]:
forecast_files[:10]

In [None]:
obs = xr.open_dataset(data_path / 'obs_pr_interp_remask.nc')


In [None]:
for f in forecast_files:
    forecast = fix_dataset_dims(xr.open_dataset(f))
    forecast_shaped_tp = climetlab_s2s_ai_challenge.extra.forecast_like_observations(forecast, obs)

    day_of_year = forecast_shaped_tp.forecast_time.dt.dayofyear[0].item()
    
    forecast_shaped_tp = forecast_shaped_tp.expand_dims('forecast_dayofyear').assign_coords(forecast_dayofyear=[day_of_year])
    forecast_shaped_tp.to_netcdf(data_path / 'processed' / 'training-output-reference' / f'obs_tp_forecast_shape_{day_of_year:03}.nc')

In [None]:
forecast_shaped_tp.forecast_time.dt.day[0].item()

In [None]:
day_of_year = 289
forecast_shaped_tp.to_netcdf(data_path / 'processed' / 'training-output-reference' / f'obs_tp_forecast_shape_{day_of_year:03}.nc')

In [None]:
forecast_shaped_tp

In [None]:
sample = forecast.isel(forecast_year=10, lead_time=10)

In [None]:
sample

In [None]:
obs

In [None]:
forecast_shaped_tp

In [None]:
sample = forecast_shaped_tp.isel(forecast_year=10, lead_time=15)

In [None]:
sample

In [None]:
obs_of_sample = obs.sel(time=slice(sample.forecast_time, sample.forecast_time + sample.lead_time)).isel(time=slice(None, -1))

In [None]:
obs_of_sample

In [None]:
(obs_of_sample.sum(dim='time').pr == sample.tp).plot()

seems legit! don't forget to exclude the last day when computing the cumsum