In [1]:
import pandas as pd
import fsspec
import yaml
from fv3net.diagnostics.prognostic_run import ComputedDiagnosticsList

In [2]:
with open('run_urls.yaml') as f:
    run_urls = yaml.safe_load(f)

### Load metrics from ensemble of 10-day forecasts, for 3-7 day RMSE metrics

In [3]:
start_dates = ['0805', '0813', '0821', '0829']
urls_baseline = {
    f'baseline-{ic}': run_urls[f'baseline-10day-ic-{ic}']['url'] + '_diagnostics'
    for ic in start_dates
}
urls_ml_physics = {
    f'ml-physics-{ic}': run_urls[f'prog-10day-ic-{ic}']['url'] + '_diagnostics'
    for ic in start_dates
}
urls_nudge_to_fine = {
    f'nudge-to-fine-{ic}': run_urls[f'n2f-10day-ic-{ic}']['url'] + '_diagnostics'
    for ic in start_dates
}
urls = {**urls_baseline, **urls_ml_physics, **urls_nudge_to_fine}

In [4]:
cd = ComputedDiagnosticsList.from_dict(urls)
metrics = cd.load_metrics()

In [5]:
metrics.types

{'percentile_25',
 'percentile_50',
 'percentile_75',
 'percentile_90',
 'percentile_99',
 'percentile_99.9',
 'rmse_3day',
 'rmse_5day',
 'rmse_days_3to7_avg',
 'rmse_of_time_mean',
 'rmse_of_time_mean_land',
 'rmse_of_time_mean_sea',
 'time_and_global_mean_bias',
 'time_and_global_mean_value',
 'time_and_land_mean_bias',
 'time_and_land_mean_value',
 'time_and_sea_mean_bias',
 'time_and_sea_mean_value',
 'tropical_ascent_region_mean',
 'tropics_max_minus_min'}

In [6]:
def metric_with_improvements(metrics, metric_type, variable):
    output = metrics.get_metric_all_runs(metric_type, variable)
    output.loc[:, 'run_type'] = [x[:-5] for x in output.loc[:, 'run']]

    improvements = []
    for run in output.run:
        corresponding_baseline = 'baseline-' + run[-4:]
        run_value = output.loc[output['run'] == run]['value']
        baseline_value = output.loc[output['run'] == corresponding_baseline]['value']
        improvements.append(run_value.item() - baseline_value.item())
    output['improvement'] = improvements
    
    return output

In [7]:
rmse_h500 = metric_with_improvements(metrics, 'rmse_days_3to7_avg', 'h500')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = value
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  output['improvement'] = improvements


In [8]:
rmse_h500

Unnamed: 0,run,baseline,metric,value,units,run_type,improvement
38,baseline-0805,True,rmse_days_3to7_avg/h500,56.206383,m,baseline,0.0
329,baseline-0813,True,rmse_days_3to7_avg/h500,54.000507,m,baseline,0.0
620,baseline-0821,True,rmse_days_3to7_avg/h500,63.744751,m,baseline,0.0
911,baseline-0829,True,rmse_days_3to7_avg/h500,55.898529,m,baseline,0.0
1202,ml-physics-0805,False,rmse_days_3to7_avg/h500,55.23468,m,ml-physics,-0.971703
1493,ml-physics-0813,False,rmse_days_3to7_avg/h500,55.747902,m,ml-physics,1.747395
1784,ml-physics-0821,False,rmse_days_3to7_avg/h500,62.331226,m,ml-physics,-1.413525
2075,ml-physics-0829,False,rmse_days_3to7_avg/h500,55.530334,m,ml-physics,-0.368195
2366,nudge-to-fine-0805,False,rmse_days_3to7_avg/h500,53.547924,m,nudge-to-fine,-2.658459
2657,nudge-to-fine-0813,False,rmse_days_3to7_avg/h500,52.827892,m,nudge-to-fine,-1.172615


#### Mean

In [9]:
rmse_h500.groupby('run_type').mean()

Unnamed: 0_level_0,baseline,value,improvement
run_type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
baseline,1.0,57.462543,0.0
ml-physics,0.0,57.211036,-0.251507
nudge-to-fine,0.0,56.106157,-1.356385


#### Standard deviation

In [10]:
rmse_h500.groupby('run_type').std()

Unnamed: 0_level_0,baseline,value,improvement
run_type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
baseline,0.0,4.300229,0.0
ml-physics,0.0,3.419934,1.399785
nudge-to-fine,0.0,4.679694,0.886434


In [11]:
rmse_tmp850 = metric_with_improvements(metrics, 'rmse_days_3to7_avg', 'tmp850')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = value
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  output['improvement'] = improvements


In [12]:
rmse_tmp850.groupby('run_type').mean()

Unnamed: 0_level_0,baseline,value,improvement
run_type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
baseline,1.0,2.867559,0.0
ml-physics,0.0,2.771214,-0.096345
nudge-to-fine,0.0,2.571139,-0.29642


In [13]:
rmse_tmp850.groupby('run_type').std()

Unnamed: 0_level_0,baseline,value,improvement
run_type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
baseline,0.0,0.10891,0.0
ml-physics,0.0,0.090272,0.057262
nudge-to-fine,0.0,0.106239,0.037411


### Load metrics from 35-day forecasts, for time-mean metrics

In [14]:
urls_35day = {
    "baseline": run_urls["baseline-35day"]["url"] + "_diagnostics",
    "ML-physics": run_urls["prog-35day-seed5"]["url"] + "_diagnostics",
    "nudge-to-fine": run_urls["n2f-35day"]["url"] + "_diagnostics",
}

In [15]:
cd = ComputedDiagnosticsList.from_dict(urls_35day)
metrics_35day = cd.load_metrics()

In [16]:
metrics_35day.get_metric_all_runs('rmse_of_time_mean', 'total_precip_to_surface')

Unnamed: 0,run,baseline,metric,value,units
183,baseline,True,rmse_of_time_mean/total_precip_to_surface,3.246038,mm/day
474,ML-physics,False,rmse_of_time_mean/total_precip_to_surface,2.737714,mm/day
765,nudge-to-fine,False,rmse_of_time_mean/total_precip_to_surface,2.393489,mm/day


In [17]:
metrics_35day.get_metric_all_runs('time_and_land_mean_bias', 'total_precip_to_surface')

Unnamed: 0,run,baseline,metric,value,units
145,baseline,True,time_and_land_mean_bias/total_precip_to_surface,0.950176,mm/day
436,ML-physics,False,time_and_land_mean_bias/total_precip_to_surface,0.585575,mm/day
727,nudge-to-fine,False,time_and_land_mean_bias/total_precip_to_surface,0.021713,mm/day


In [18]:
metrics_35day.get_metric_all_runs('rmse_of_time_mean', 'tmp850')

Unnamed: 0,run,baseline,metric,value,units
181,baseline,True,rmse_of_time_mean/tmp850,2.064156,K
472,ML-physics,False,rmse_of_time_mean/tmp850,1.835317,K
763,nudge-to-fine,False,rmse_of_time_mean/tmp850,2.268574,K


In [19]:
metrics_35day.get_metric_all_runs('rmse_of_time_mean', 'tmp200')

Unnamed: 0,run,baseline,metric,value,units
180,baseline,True,rmse_of_time_mean/tmp200,2.084215,K
471,ML-physics,False,rmse_of_time_mean/tmp200,2.254584,K
762,nudge-to-fine,False,rmse_of_time_mean/tmp200,2.023804,K
