# Analyze EMOS weights

We are curious about the effect of a large forecast ID window on the rolling EMOS model.
We hypothesize that the models with large windows will have much smoother weights through the year.

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import hydra
import torch
import os
import pathlib
import pandas as pd
import xarray as xr

from smc01.postprocessing.dataset import SMCParquetDataset
from smc01.postprocessing.util import load_checkpoint_from_run

## Collect station data

In [None]:
DATA_DIR = pathlib.Path(os.getenv("DATA_DIR"))
dataset_dir = DATA_DIR / 'interpolated/2021-12-20-gdps-metar/'
dataset = SMCParquetDataset(dataset_dir)

In [None]:
sample = dataset[0]

In [None]:
stations = sample.groupby('station').agg({'latitude': 'first', 'longitude': 'first', 'elevation': 'first'}).reset_index()

In [None]:
stations.head()

In [None]:
stations

In [None]:
stations.index[stations['station'] == 'CYUL']

In [None]:
SMC01_RUNS_DIR = os.getenv("SMC01_RUNS_DIR")
if SMC01_RUNS_DIR:
    SMC01_RUNS_DIR = pathlib.Path(SMC01_RUNS_DIR)
    RUNS_DIR = SMC01_RUNS_DIR / 'postprocessing/multirun/2022-02-08/12-55-28/'
else:
    RUNS_DIR = DATA_DIR / 'runs/2022-02-08/12-55-28/'
RUNS_BY_FILTER_SIZE = {
    1: str(RUNS_DIR / '0'),
    7: str(RUNS_DIR / '1'),
    15: str(RUNS_DIR / '2'),
    29: str(RUNS_DIR / '3'),
    61: str(RUNS_DIR / '4'),
    121: str(RUNS_DIR / '5'),
}

In [None]:
models = {k: load_checkpoint_from_run(RUNS_BY_FILTER_SIZE[k]) for k in RUNS_BY_FILTER_SIZE}

# Analyse biases

In [None]:
model_biases = []

filter_sizes = sorted(list(models))

for s in filter_sizes:
    m = models[s]
    
    data_array = xr.DataArray(
        m.biases.squeeze().detach().numpy().reshape(1226, 365, 2, 81),
        dims=['station', 'forecast_day', 'forecast_hour', 'lead_time'],
        coords={
            'station': stations['station'],
            'lead_time': [pd.Timedelta(3*i, unit='h') for i in range(81)],
            'forecast_hour': [pd.Timedelta(0, unit='h'), pd.Timedelta(12, unit='h')]
        }
    )
    
    model_biases.append(data_array)
                              
        
biases = xr.concat(model_biases, dim='filter_size').assign_coords(filter_size=filter_sizes)

In [None]:
biases

In [None]:
biases.sel(station='CYUL').isel(lead_time=12, forecast_hour=0).plot(col='filter_size')

In [None]:
biases.isel(forecast_hour=0, lead_time=24).std(dim='forecast_day').mean(dim='station').plot()

In [None]:
biases.isel(forecast_hour=0, lead_time=20).std(dim=['forecast_day'])

In [None]:
biases.std(dim=['forecast_day', 'forecast_hour', 'lead_time']).assign_coords(station=range(len(biases.station))).plot(col='filter_size')

# Analyze weights

In [None]:
model_weights = []
filter_sizes = sorted(list(models))

for s in filter_sizes:
    m = models[s]
    
    data_array = xr.DataArray(
        m.weights[..., 0].squeeze().detach().numpy().reshape(1226, 365, 2, 81),
        dims=['station', 'forecast_day', 'forecast_hour', 'lead_time'],
        coords={
            'station': stations['station'],
            'lead_time': [pd.Timedelta(3*i, unit='h') for i in range(81)],
            'forecast_hour': [pd.Timedelta(0, unit='h'), pd.Timedelta(12, unit='h')]
        }
    )
    
    model_weights.append(data_array)
                              
        
weights = xr.concat(model_weights, dim='filter_size').assign_coords(filter_size=filter_sizes)

In [None]:
weights

In [None]:
weights.sel(station='CYUL').isel(lead_time=24, forecast_hour=0).plot(col='filter_size')

In [None]:
weights.std(dim='forecast_day').mean(dim=['station', 'lead_time', 'forecast_hour']).plot()

In [None]:
weights.sel(station='0E0').isel(forecast_hour=0, forecast_day=12).plot(col='filter_size')