This notebook compares model outputs for a suite of reaches by producing statistical plots. The generate_report_data.ipynb notebook needs to be run before this will work.

In [None]:
import dotenv
from dotenv import load_dotenv
load_dotenv()
import os
import logging
logging.getLogger().setLevel('WARNING')
output_folder = os.environ['OUTPUT_FOLDER']
highres = os.environ['HIGHRES'] == 'TRUE'
if highres:
    dpi=600
    hextent="_high.tif"
    fig_args = {'format': "tiff", 'pil_kwargs': {"compression": "tiff_lzw"}}
else:
    dpi=300
    hextent=".png"
    fig_args = {'format':"png"}


In [None]:
import pickle

output = open(
    f'{output_folder}{os.path.sep}product_results.pkl', 'rb')
product_results = pickle.load(output)
output.close()
output = open(
    f'{output_folder}{os.path.sep}overall_results.pkl', 'rb')
overall_results = pickle.load(output)
output.close()
output = open(
    f'{output_folder}{os.path.sep}overall_metrics.pkl', 'rb')
overall_metrics = pickle.load(output)
output.close()


In [None]:
import pandas
from matplotlib import pyplot
overall_results_simple={}
for (k,v) in overall_results.items():
    params_str = ", ".join([f"{val if type(val) is not dict else ' '.join(val.keys())}" for (
        att, val) in k.depth_model_params.items()])
    if len(params_str)>0:
        params_str = f" ({params_str})"
    simple_str = f"{k.depth_model_type.name}{params_str}"
    overall_results_simple[simple_str] = v

result_synth = pandas.DataFrame(
    overall_results_simple).transpose().sort_values('std')

percentiles = pandas.DataFrame(result_synth.perc.tolist(), columns=[
                               '2.5', '15.9', '25', '50', '75', '84.1', '97.5'], index=result_synth.index)

exploded = result_synth.join(percentiles)
del exploded['perc']
display(exploded)
exploded.to_csv(output_folder + f'{os.path.sep}overall.csv')

fig, ax = pyplot.subplots(figsize=((11.7-1)/2, (8.3-1)/2), dpi=dpi)
boxes = []
for model in exploded.iterrows():
    #display(model[1]['2.5'])
    boxes.append(
        {
            'label': model[0],
            'whislo': model[1]['2.5'],    # Bottom whisker position
            'q1': model[1]['25'],    # First quartile (25th percentile)
            'med': model[1]['50'],    # Median         (50th percentile)
            'q3': model[1]['75'],    # Third quartile (75th percentile)
            'whishi': model[1]['97.5'],    # Top whisker position
            'fliers': []        # Outliers
        }
    )

    
ax.bxp(boxes, showfliers=False)
pyplot.xticks(rotation=90)
ax.set_ylabel("m")

pyplot.tight_layout()
pyplot.savefig(output_folder + f'{os.path.sep}overall_box'+hextent, **fig_args)
pyplot.cla()
pyplot.clf()
pyplot.close('all')
pyplot.close(fig)


In [None]:
cols = ['2.5', '15.9', '25', '50', '75', '84.1', '97.5']
product_results_simple = []
for (k, v) in product_results.items():
    params_str = ", ".join([f"{val if type(val) is not dict else ' '.join(val.keys())}" for (
        att, val) in k.depth_model_params.items()])
    if len(params_str) > 0:
        params_str = f" ({params_str})"

    for (k2,v2)  in v.items():
        simple_str = f"{k.depth_model_type.name}{params_str}"
        v2['simulation'] = k2
        v2['model'] = simple_str
        for col in cols:
            v2[col] = v2['perc'][cols.index(col)]
        
        product_results_simple.append(v2)

product_results_simple_df = pandas.DataFrame(product_results_simple)[
    ['model', 'simulation', 'mean', 'std', 'b']+ cols]

pandas.set_option('display.max_rows', 50)
display(product_results_simple_df.sort_values(['model', 'std']))

# product_results_simple_df.to_csv(output_folder + f'{os.path.sep}model_run_outputs.csv')


In [None]:
# Create a boxplot:

# reach1, reach2, reac.... | HAND
# reach1, reach2, reac.... | TVD
# reach1, reach2, reac.... | FwDET
# with boxes showing the error
from matplotlib import ticker
import seaborn
seaborn.set(font_scale=1)
pyplot.style.use('seaborn-whitegrid')
accepted_metrics = ['root_mean_squared_error', 'mean_absolute_error']
accepted_models = ['HAND (best MAE)', 'TVD (ind)', 'FwDET']
accepted_model_labels = ['HAND', 'TVD', 'FwDET']
accepted_model_labels_lookup = {
    'HAND (best MAE)': 'HAND', 'TVD (ind)': 'TVD', 'FwDET': 'FwDET'}

fig, axes_list = pyplot.subplots(
    nrows=3, ncols=1, figsize=((8.3-1), (11.7-1)*0.9), dpi=dpi, sharey='all')
boxes = {mod_def:[] for mod_def in accepted_models}
for model_result in product_results_simple_df.loc[(product_results_simple_df['model'].isin(accepted_models))].iterrows():
    #display(model[1]['2.5'])
    boxes[model_result[1]['model']].append(
        {
            'label': model_result[1]['simulation'],
            'whislo': model_result[1]['2.5'],    # Bottom whisker position
            'q1': model_result[1]['25'],    # First quartile (25th percentile)
            'med': model_result[1]['50'],    # Median         (50th percentile)
            'q3': model_result[1]['75'],    # Third quartile (75th percentile)
            'whishi': model_result[1]['97.5'],    # Top whisker position
            'fliers': []        # Outliers
        }
    )

i=0
for ax in axes_list:
    ax.bxp(boxes[accepted_models[i]], showfliers=False)
    ax.grid(True)
    if (i==2):
        # , rotation_mode='anchor')
        pyplot.xticks(rotation=75, horizontalalignment='right',
                      rotation_mode='anchor')
    else:
        ax.set_xticklabels([])
    ax.set_ylabel("Truth minus Predicted (m)")
    ax.yaxis.set_major_formatter(ticker.StrMethodFormatter("{x:02}"))
    ax.set_title(accepted_model_labels[i], rotation=-90,y=0.5,
                 position=(1.01, 0), ha='left', va='center')
    i=i+1
pyplot.tight_layout()
pyplot.savefig(output_folder +
               f'{os.path.sep}fig4_by_model_box' + hextent, **fig_args)
pyplot.cla()
pyplot.clf()
pyplot.close('all')
pyplot.close(fig)


In [None]:
# Create a boxplot:

# reach1, reach2, reac.... | HAND
# reach1, reach2, reac.... | TVD
# reach1, reach2, reac.... | FwDET
# with boxes showing the error
from matplotlib.lines import Line2D
from matplotlib.patches import Patch
import matplotlib.ticker as plticker
from matplotlib import ticker
from colorsys import rgb_to_hls
from matplotlib import colors, transforms
import seaborn
import numpy
seaborn.set(font_scale=1)
pyplot.style.use('seaborn-whitegrid')
accepted_metrics = ['root_mean_squared_error', 'mean_absolute_error']
accepted_models = ['HAND (best MAE)', 'TVD (ind)', 'FwDET']
accepted_model_labels = ['HAND', 'TVD', 'FwDET']
accepted_model_labels_lookup = {
    'HAND (best MAE)': 'HAND', 'TVD (ind)': 'TVD', 'FwDET': 'FwDET'}

fig, axes_list = pyplot.subplots(
    nrows=1, ncols=1, figsize=((11.7-1), (8.3-1)*0.9), dpi=dpi, sharey='all')
boxes = {mod_def:[] for mod_def in accepted_models}
for model_result in product_results_simple_df.loc[(product_results_simple_df['model'].isin(accepted_models))].iterrows():
    #display(model[1]['2.5'])
    boxes[model_result[1]['model']].append(
        {
            'label': model_result[1]['simulation'],
            'whislo': model_result[1]['2.5'],    # Bottom whisker position
            'q1': model_result[1]['25'],    # First quartile (25th percentile)
            'med': model_result[1]['50'],    # Median         (50th percentile)
            'q3': model_result[1]['75'],    # Third quartile (75th percentile)
            'whishi': model_result[1]['97.5'],    # Top whisker position
            'fliers': []        # Outliers
        }
    )

colours = {j:seaborn.utils.get_color_cycle()[j] for j in range(3)}

light_vals = [rgb_to_hls(*c)[1] for c in colours.values()]
lum = min(light_vals) * .6
gray = colors.rgb2hex((lum, lum, lum))

legend_elements = [
    Patch(facecolor=colours[accepted_model_labels.index(label)], edgecolor=gray,
          label=label) for label in accepted_model_labels]


i=0
for model_index in range(3):
    boxprops = dict(edgecolor=gray, facecolor=colours[model_index])
    medianprops = dict(color=gray)
    whiskerprops = dict(color=gray)
    capprops = dict(color=gray)
    ax = axes_list
    ax.bxp(boxes[accepted_models[i]], showfliers=False,
           positions=[r*3 + model_index + 1 for r in range(len(boxes[accepted_models[i]]))], boxprops=boxprops, medianprops=medianprops, whiskerprops=whiskerprops, capprops=capprops, patch_artist=True)
    ax.grid(True)
    
    if (i==2):
        # , rotation_mode='anchor')
        pyplot.xticks(rotation=75, horizontalalignment='right',
                     rotation_mode='anchor')
        dx = 10/72.
        dy = 0/72.
        offset = transforms.ScaledTranslation(dx, dy, fig.dpi_scale_trans)

        # apply offset transform to all x ticklabels.
        for label in ax.xaxis.get_majorticklabels():
            label.set_transform(label.get_transform() + offset)

    ax.set_ylabel("Truth minus Predicted (m)")
    ax.yaxis.set_major_formatter(ticker.StrMethodFormatter("{x:02}"))
    i=i+1


# this locator puts ticks at regular intervals
#loc = plticker.MultipleLocator(base=3.0)
loc = plticker.FixedLocator(numpy.arange(26)*3 +0.5)
ax.xaxis.set_major_locator(loc)

fig.suptitle("Model accuracy by flood scene")


# Create the figure
frame = pyplot.legend(handles=legend_elements, loc='lower right',
                  facecolor='white', framealpha=1, edgecolor=gray)

ax.set_axisbelow(True)

frame.set_frame_on(True)

pyplot.tight_layout()
pyplot.savefig(output_folder +
               f'{os.path.sep}alt_fig4_by_model_box' + hextent, **fig_args)
pyplot.cla()
pyplot.clf()
pyplot.close('all')
pyplot.close(fig)


In [None]:
display(product_results_simple_df.groupby(
    ['model', product_results_simple_df['50'] < 0])['model'].count()[accepted_models]
)

In [None]:
display(product_results_simple_df.groupby(
    ['model'])['50'].min()[accepted_models])
display(product_results_simple_df[product_results_simple_df['50'].isin(product_results_simple_df.groupby(
    ['model'])['50'].min()[accepted_models])])


In [None]:
import numpy
import seaborn
import pandas
import scikits.bootstrap as boot
import numpy
numpy.random.seed(42)

algs = {
    "25": lambda x: numpy.quantile(x, 0.25),
    "50": lambda x: numpy.quantile(x, 0.50),
    "75": lambda x: numpy.quantile(x, 0.75),
    "25 to 75%": lambda x: numpy.quantile(x, 0.75)-numpy.quantile(x, 0.25)
}

quantiles_for_analysis = [0.05, 0.5, 0.95]
disp_results={}
alg_results={}
for accepted_model in accepted_models:
    model_set = product_results_simple_df.loc[ product_results_simple_df['model'] == accepted_model]
    model_results = {}
    disp_model_results = {}
    alg_results[accepted_model]=model_results
    disp_results[accepted_model_labels_lookup[accepted_model]
                 ] = disp_model_results
    for (alg_name, alg_function) in algs.items():
        model_values = model_set[["50"]]
        if len(model_values)>1:
            av_ci = boot.ci(model_values, alg_function,
                            alpha=quantiles_for_analysis, n_samples=10000)
        else:
            av_ci = [numpy.nan, model_values.values[0][0], numpy.nan]
        for (key, value) in zip(quantiles_for_analysis, av_ci):
            #alg_results[(accepted_model, alg_name, key)] = value
            model_results[(alg_name, key)] = value
            if numpy.isnan(av_ci[0]) or numpy.isnan(av_ci[2]):
                disp_model_results[alg_name] = f"{av_ci[1]:0.2f}"
            else:
                disp_model_results[alg_name] = f"{av_ci[1]:0.2f} ({av_ci[0]:0.2f}, {av_ci[2]:0.2f})"
        
alg_results_df = pandas.DataFrame(alg_results).T
disp_results_df = pandas.DataFrame(disp_results).T
display(disp_results_df)


In [None]:
product_metrics_simple = []
for (k, v) in overall_metrics.items():
    params_str = ", ".join([f"{val if type(val) is not dict else ' '.join(val.keys())}" for (
        att, val) in k.depth_model_params.items()])
    if len(params_str) > 0:
        params_str = f" ({params_str})"

    simple_str = f"{k.depth_model_type.name}{params_str}"
    for (k2, v2) in v.items():
        for (k3,v3) in v2.items():
            if type(v3) is dict:
                for (k4, v4) in v3.items():
                    rec = {}
                    rec['metric'] = k4
                    rec['value'] = v4
                    rec['depth-range'] = k3
                    rec['simulation'] = k2
                    rec['model'] = simple_str
                    product_metrics_simple.append(rec)
                    if k4 == 'mean_squared_error':
                        rec2={}
                        rec2['metric'] = 'root_mean_squared_error'
                        rec2['value'] = rec['value'] ** 0.5
                        rec2['depth-range'] = k3
                        rec2['simulation'] = k2
                        rec2['model'] = simple_str
                        product_metrics_simple.append(rec2)

product_metrics_simple_df = pandas.DataFrame(product_metrics_simple)[
    ['model', 'simulation', 'depth-range', 'metric','value']]

#pandas.set_option('display.max_rows', 500)
display(product_metrics_simple_df)

#product_metrics_simple_df
#overall_metrics

product_metrics_simple_df.to_csv(
    output_folder + f'{os.path.sep}rmse_resulst.csv')


In [None]:
import numpy
import seaborn
import pandas
import scikits.bootstrap as boot
import numpy
numpy.random.seed(42)
accepted_metrics = ['root_mean_squared_error', 'mean_absolute_error']

algs = {
    "root_mean_squared_error": lambda x: numpy.quantile(x, 0.50),
    "mean_absolute_error": lambda x: numpy.quantile(x, 0.50)
}
quantiles_for_analysis = [0.05, 0.5, 0.95]

regional_results={}
for accepted_model in accepted_models:

    model_set = product_metrics_simple_df.loc[(product_metrics_simple_df['model']
                                                == accepted_model) & (product_metrics_simple_df['depth-range'] == 'all')]

    regions = set(model_set['simulation'].str.replace(" .*", "", regex=True))
    region_analysis = {}
    for region in regions:
        disp_model_results = {}
        for (alg_name, alg_function) in algs.items():
            model_values = model_set.loc[(model_set['metric']
                                                == alg_name) & (model_set['simulation'].str.startswith(region))]['value']
            if len(model_values)>1:
                av_ci = boot.ci(model_values, alg_function,
                                alpha=quantiles_for_analysis, n_samples=10000)
            else:
                av_ci = [numpy.nan, model_values.values[0], numpy.nan]
            for (key, value) in zip(quantiles_for_analysis, av_ci):
                if numpy.isnan(av_ci[0]) or numpy.isnan(av_ci[2]):
                    disp_model_results[alg_name] = f"{av_ci[1]:0.2f}"
                else:
                    disp_model_results[alg_name] = f"{av_ci[1]:0.2f} ({av_ci[0]:0.2f}, {av_ci[2]:0.2f})"
        region_analysis[region]=disp_model_results
    regional_results[accepted_model]=pandas.DataFrame(region_analysis)


In [None]:
pandas.concat(regional_results.values(),keys=regional_results.keys()).T

In [None]:
import numpy
import seaborn
import pandas
import scikits.bootstrap as boot
import numpy
numpy.random.seed(42)
accepted_metrics = ['root_mean_squared_error', 'mean_absolute_error']

algs = {
    "root_mean_squared_error": lambda x: numpy.quantile(x, 0.50),
    "mean_absolute_error": lambda x: numpy.quantile(x, 0.50)
}

quantiles_for_analysis = [0.05, 0.5, 0.95]
rms_results = {}
for accepted_model in accepted_models:
    model_set = product_metrics_simple_df.loc[(product_metrics_simple_df['model']
                                              == accepted_model) & (product_metrics_simple_df['depth-range'] == 'all')]
    disp_model_results = {}
    rms_results[accepted_model_labels_lookup[accepted_model]
                 ] = disp_model_results
    for (alg_name, alg_function) in algs.items():
        model_values = model_set.loc[model_set['metric'] == alg_name]['value']
        if len(model_values) > 1:
            av_ci = boot.ci(model_values, alg_function,
                        alpha=quantiles_for_analysis, n_samples=10000)
        else:
            av_ci = [numpy.nan, model_values.values[0], numpy.nan]
        for (key, value) in zip(quantiles_for_analysis, av_ci):
            if numpy.isnan(av_ci[0]) or numpy.isnan(av_ci[2]):
                disp_model_results[alg_name] = f"{av_ci[1]:0.2f}"
            else:
                disp_model_results[alg_name] = f"{av_ci[1]:0.2f} ({av_ci[0]:0.2f}, {av_ci[2]:0.2f})"

rms_results_df = pandas.DataFrame(rms_results).T
display(rms_results_df)


In [None]:
# Table 2

display(rms_results_df.join(disp_results_df))
rms_results_df.join(disp_results_df).to_csv(
    output_folder + f'{os.path.sep}a_table_2_median_results_by_model.csv')


In [None]:


dt =product_metrics_simple_df.loc[(
    product_metrics_simple_df['metric'].isin(accepted_metrics)) &
    (product_metrics_simple_df['model'].isin(accepted_models))
    & (product_metrics_simple_df['depth-range'] == 'all')].groupby(by=["model", 'metric']).median()

product_results_simple_df_ranged = product_results_simple_df
product_results_simple_df_ranged['25 to 75%'] = product_results_simple_df_ranged['75'] - \
    product_results_simple_df_ranged['25']

dt3=[]
for mod in accepted_models:
    dt_mod = product_metrics_simple_df.loc[(
        product_metrics_simple_df['metric'].isin(accepted_metrics)) &
        (product_metrics_simple_df['model']==mod)
        & (product_metrics_simple_df['depth-range'] == 'all')].groupby(by=["model", 'metric']).median()

    dt_2 = product_results_simple_df_ranged.loc[
        (product_results_simple_df_ranged['model'] == mod)].groupby(by=["model"]).quantile([0.50])

    for multi_index in dt_mod.iterrows():
        dt_2[multi_index[0][1]] = multi_index[1]['value']

    #dt_2['model'] = dt_2['model'].apply(lambda x: accepted_model_labels[accepted_models.index(mod)])
    #label = accepted_model_labels[accepted_models.index(mod)]
    #dt_mod[label] = dt_mod['value'].apply(lambda x: f"{x:.2f}")
    #del dt_mod['value']
    #display(dt_mod.T)
    
    dt3.append(dt_2)
dt4 = pandas.concat(dt3)
dt4.rename(accepted_model_labels_lookup, inplace=True, axis='index')
default_precision = pandas.get_option('float_format')
pandas.set_option('float_format', '{:,.2f}'.format)
display(dt4[['root_mean_squared_error', 'mean_absolute_error',
        '25', '50', '75', '25 to 75%']])
pandas.set_option('float_format', default_precision)
#dt5 = pandas.DataFrame(dt4, index=accepted_model_labels)


In [None]:
import seaborn
from matplotlib.patches import PathPatch
seaborn.set(font_scale=2)

pyplot.style.use('seaborn-whitegrid')
sample_df = pandas.DataFrame(product_metrics_simple_df.loc[(product_metrics_simple_df['metric'] == 'root_mean_squared_error') & (
    product_metrics_simple_df['model'].isin(accepted_models))])

sample_df['model'] = sample_df['model'].apply(lambda x: accepted_model_labels_lookup[x]) 

display(sample_df.groupby(['model','depth-range']).median())

height=(11.7-1)/2
width=4.5
sb_plot = seaborn.catplot(x="metric", y="value",
                          hue="depth-range", kind="box", col='model', col_wrap=3, col_order=['HAND','TVD','FwDET'],
                          data=sample_df, showfliers=False, hue_order=["d < 2", "2 <= d < 4", "d >= 4", 'all'], height=height, aspect=width/height)
#sb_plot.figure.set_size_inches(height, width)
sb_plot.figure.set_dpi(dpi)

fac=0.7
for ax in sb_plot.axes:
    # iterating through axes artists:
    for c in ax.get_children():

        # searching for PathPatches
        if isinstance(c, PathPatch):
            # getting current width of box:
            p = c.get_path()
            verts = p.vertices
            verts_sub = verts[:-1]
            xmin = numpy.min(verts_sub[:, 0])
            xmax = numpy.max(verts_sub[:, 0])
            xmid = 0.5*(xmin+xmax)
            xhalf = 0.5*(xmax - xmin)

            # setting new width of box
            xmin_new = xmid-fac*xhalf
            xmax_new = xmid+fac*xhalf
            verts_sub[verts_sub[:, 0] == xmin, 0] = xmin_new
            verts_sub[verts_sub[:, 0] == xmax, 0] = xmax_new

            # setting new width of median line
            for l in ax.lines:
                if numpy.all(l.get_xdata() == [xmin, xmax]):
                    l.set_xdata([xmin_new, xmax_new])


sb_plot.set_ylabels('RMSE (m)')
sb_plot.set_xlabels('')
sb_plot.set_xticklabels([''])
sb_plot.set_titles("{col_name}")
seaborn.move_legend(sb_plot, "lower center", bbox_to_anchor=(0.5, 0),
                ncol=4, title=None)

pyplot.savefig(output_folder + f'{os.path.sep}fig_6_error_by_model_by_depth_range_RMSE'+hextent,
               bbox_inches='tight', pad_inches=0, **fig_args)


In [None]:
import seaborn
#fig, ax = pyplot.subplots(figsize=((11.7-1)/2, (8.3-1)/2), dpi=300)
seaborn.catplot(x="depth-range", y="value", col_wrap=2, sharey=False,
                kind="box", col='metric', hue='model',
                data=product_metrics_simple_df.loc[(product_metrics_simple_df['depth-range'] == 'all') & (product_metrics_simple_df['model'].isin(accepted_models))])

seaborn.catplot(x="depth-range", y="value", col_wrap=2, sharey=False,
                kind="box", col='metric', hue='model',
                data=product_metrics_simple_df.loc[(product_metrics_simple_df['depth-range'] == 'all') & (product_metrics_simple_df['model'].isin(accepted_models))])

pyplot.savefig(output_folder +
               f'{os.path.sep}model_by_error'+hextent, **fig_args)

seaborn.catplot(x="metric", y="value",
                hue="depth-range", kind="box", col='model', col_wrap=3, sharey=False,
                data=product_metrics_simple_df.loc[(product_metrics_simple_df['metric'] == 'mean_absolute_error') & (product_metrics_simple_df['model'].isin(accepted_models))])

pyplot.savefig(output_folder +
               f'{os.path.sep}error_by_model_by_depth_range_MAE'+hextent, **fig_args)
#pyplot.tight_layout()
#pyplot.show()
