# Track-to-track comparison of ERA5 and ERA-Interim

In [1]:
from IPython.display import clear_output
import numpy as np
import pandas as pd
from tqdm import tqdm_notebook as tqdm

from common_defs import winters, nyr, winter_dates, datasets, cat_kw, aliases, conf_key_typeset, runs_grid_formatter
import mypaths

from octant.core import TrackRun, OctantTrack
from octant.misc import SUBSETS
import octant
octant.__version__

'0.0.11'

In [2]:
subsets = SUBSETS[1:]  # only PMC and IC

## Compare only three runs

In [3]:
dset_names = (
    ('era5_run000', 'ERA5, CTRL'),
    ('interim_run106', 'ERA-Interim, CTRL'),
    ('interim_run100', 'ERA-Interim, LVT')
)

In [4]:
track_runs = dict()
for (dset_name, _) in tqdm(dset_names):
    TR = TrackRun()
    TR.data = OctantTrack.from_mux_df(pd.read_parquet(mypaths.procdir / f'{dset_name}_2008_2017_top10.parquet', engine='pyarrow'))
    TR.is_categorised = True
    track_runs[dset_name] = TR
clear_output()

HBox(children=(IntProgress(value=0, max=3), HTML(value='')))




In [7]:
match_kw = dict(method='bs2000', beta=50., return_dist_matrix=False)

In [5]:
ref_dset = track_runs['era5_run000']

In [20]:
probability_of_coincidence = pd.DataFrame(index=[i[0] for i in dset_names if i[0] != 'era5_run000'], columns=subsets)
ratio_of_missing_tracks = probability_of_coincidence.copy()

for dset_name in tqdm(probability_of_coincidence.index):
    for subset in tqdm(probability_of_coincidence.columns):
        match_pairs = ref_dset.match_tracks(track_runs[dset_name],
                                            subset=subset,
                                            **match_kw)

        probability_of_coincidence.loc[dset_name, subset] = len(match_pairs)
        ratio_of_missing_tracks.loc[dset_name, subset] = (ref_dset.size(subset) - len(match_pairs)) / track_runs[dset_name].size(subset)
clear_output()

HBox(children=(IntProgress(value=0, max=2), HTML(value='')))

HBox(children=(IntProgress(value=0, max=2), HTML(value='')))

HBox(children=(IntProgress(value=0, max=2), HTML(value='')))




In [35]:
print(probability_of_coincidence.to_latex())

\begin{tabular}{lrr}
\toprule
{} &  moderate &  strong \\
\midrule
interim\_run106 &       271 &      30 \\
interim\_run100 &      1112 &     126 \\
\bottomrule
\end{tabular}



In [22]:
ratio_of_missing_tracks

Unnamed: 0,moderate,strong
interim_run106,0.0153257,0.00383142
interim_run100,0.73908,0.681992


In [38]:
for dset_name in tqdm(probability_of_coincidence.index):
    for subset in tqdm(probability_of_coincidence.columns):
        ratio_of_missing_tracks.loc[dset_name, subset] = (ref_dset.size(subset) - probability_of_coincidence.loc[dset_name, subset]) / track_runs[dset_name].size(subset)

HBox(children=(IntProgress(value=0, max=2), HTML(value='')))

HBox(children=(IntProgress(value=0, max=2), HTML(value='')))

HBox(children=(IntProgress(value=0, max=2), HTML(value='')))




In [41]:
print(ratio_of_missing_tracks.to_latex())

\begin{tabular}{lll}
\toprule
{} &  moderate &    strong \\
\midrule
interim\_run106 &    7.5209 &   7.45161 \\
interim\_run100 &  0.492601 &  0.444079 \\
\bottomrule
\end{tabular}



In [33]:
(261 - 30) / 31

7.451612903225806

In [40]:
for subset in tqdm(probability_of_coincidence.columns):
    print(ref_dset.size(subset))

HBox(children=(IntProgress(value=0, max=2), HTML(value='')))

2610
261



### All PMCTRACK runs, split into two groups

In [5]:
# RUNS = dict()
# RUNS['vort_thresh'] = dict()
# RUNS['diff_params'] = dict()
# for dataset in datasets:
#     _runs = []
#     for run_id_start in [0, 100]:
#         with (mypaths.trackresdir / f'{dataset}_{run_id_start:03d}_runs_grid.json').open('r') as f:
#             for run_id, run_dict in enumerate(json.load(f), run_id_start):
#                 _runs.append( (run_id, run_dict) )

#     RUNS['vort_thresh'][dataset] = []
#     RUNS['diff_params'][dataset] = []
#     for run_id, run_dict in _runs:
#         if  len(run_dict) == 0 and run_id < 100:
#             RUNS['diff_params'][dataset].append( (run_id, run_dict) )
#         if 'zeta_max0' in run_dict or len(run_dict) == 0:
#             if  run_id >= 100:
#                 if run_dict != {'zeta_max0': 0.0001, 'zeta_min0': 9e-05}:
#                     RUNS['vort_thresh'][dataset].append( (run_id, run_dict) )
#         else:
#             RUNS['diff_params'][dataset].append( (run_id, run_dict) )

In [18]:
# for run_id, run_dict in RUNS['vort_thresh']['era5']:
#     for run_id_2, run_dict_2 in RUNS['vort_thresh']['interim']:
#         if run_dict == run_dict_2:
#             print(run_id, run_id_2)

100 105
101 106
102 107
103 108
104 109


In [30]:
# stars_winters = winters[:3]

# TR = TrackRun()
# for winter in tqdm(stars_winters, desc='winter', leave=False):
#     track_res_dir = mypaths.trackresdir / dataset / f'run{run_id:03d}' / winter
#     _TR = TrackRun(track_res_dir)
#     _TR.categorise(lsm=lsm, **cat_kw)
#     TR += _TR

HBox(children=(IntProgress(value=0, description='winters', max=3), HTML(value='')))