This notebook compares model outputs for a suite of reaches by producing statistical plots. The generate_report_data.ipynb notebook needs to be run before this will work.

In [None]:
import dotenv
from dotenv import load_dotenv
load_dotenv()
import os
import logging
import cmcrameri.cm as cmc
import numpy

logging.getLogger().setLevel('WARNING')
output_folder = os.environ['OUTPUT_FOLDER']
highres = os.environ['HIGHRES'] == 'TRUE'
if highres:
    dpi=600
    hextent="_high.tif"
    fig_args = {'format': "tiff", 'pil_kwargs': {"compression": "tiff_lzw"}}
else:
    dpi=300
    hextent=".png"
    fig_args = {'format':"png"}


# colour_scheme = cmc.batlowS.colors
colour_scheme = numpy.array([[0x1F, 0x77, 0xB4], [0xF2, 0x71, 0x07], [0xAA, 0xE0, 0x07], [0x8C, 0x56, 0x4B], [0xDB, 0x72, 0xBC]])/256.0

In [None]:
import pickle

output = open(
    f'{output_folder}{os.path.sep}product_results.pkl', 'rb')
product_results = pickle.load(output)
output.close()
output = open(
    f'{output_folder}{os.path.sep}overall_results.pkl', 'rb')
overall_results = pickle.load(output)
output.close()
output = open(
    f'{output_folder}{os.path.sep}overall_metrics.pkl', 'rb')
overall_metrics = pickle.load(output)
output.close()


In [None]:
import pandas
from matplotlib import pyplot
overall_results_simple={}
for (k,v) in overall_results.items():
    params_str = ", ".join([f"{val if type(val) is not dict else ' '.join(val.keys())}" for (
        att, val) in k.depth_model_params.items()])
    if len(params_str)>0:
        params_str = f" ({params_str})"
    simple_str = f"{k.depth_model_type.name}{params_str}"
    overall_results_simple[simple_str] = v

result_synth = pandas.DataFrame(
    overall_results_simple).transpose().sort_values('std')

percentiles = pandas.DataFrame(result_synth.perc.tolist(), columns=[
                               '2.5', '15.9', '25', '50', '75', '84.1', '97.5'], index=result_synth.index)

exploded = result_synth.join(percentiles)
del exploded['perc']
display(exploded)
exploded.to_csv(output_folder + f'{os.path.sep}overall.csv')

fig, ax = pyplot.subplots(figsize=((11.7-1)/2, (8.3-1)/2), dpi=dpi)
boxes = []
for model in exploded.iterrows():
    #display(model[1]['2.5'])
    boxes.append(
        {
            'label': model[0],
            'whislo': model[1]['2.5'],    # Bottom whisker position
            'q1': model[1]['25'],    # First quartile (25th percentile)
            'med': model[1]['50'],    # Median         (50th percentile)
            'q3': model[1]['75'],    # Third quartile (75th percentile)
            'whishi': model[1]['97.5'],    # Top whisker position
            'fliers': []        # Outliers
        }
    )

    
ax.bxp(boxes, showfliers=False)
pyplot.xticks(rotation=90)
ax.set_ylabel("m")

pyplot.tight_layout()
pyplot.savefig(output_folder + f'{os.path.sep}overall_box'+hextent, **fig_args)
pyplot.cla()
pyplot.clf()
pyplot.close('all')
pyplot.close(fig)


In [None]:
cols = ['2.5', '15.9', '25', '50', '75', '84.1', '97.5']
product_results_simple = []
for (k, v) in product_results.items():
    params_str = ", ".join([f"{val if type(val) is not dict else ' '.join(val.keys())}" for (
        att, val) in k.depth_model_params.items()])
    if len(params_str) > 0:
        params_str = f" ({params_str})"

    for (k2,v2)  in v.items():
        simple_str = f"{k.depth_model_type.name}{params_str}"
        v2['simulation'] = k2
        v2['model'] = simple_str
        for col in cols:
            v2[col] = v2['perc'][cols.index(col)]
        
        product_results_simple.append(v2)

product_results_simple_df = pandas.DataFrame(product_results_simple)[
    ['model', 'simulation', 'mean', 'std', 'b']+ cols]

pandas.set_option('display.max_rows', 50)
display(product_results_simple_df.sort_values(['model', 'std']))

# product_results_simple_df.to_csv(output_folder + f'{os.path.sep}model_run_outputs.csv')


In [None]:
# Create a boxplot:

# reach1, reach2, reac.... | HAND
# reach1, reach2, reac.... | TVD
# reach1, reach2, reac.... | FwDET
# with boxes showing the error
from matplotlib import ticker
import seaborn
seaborn.set(font_scale=1)
pyplot.style.use('seaborn-whitegrid')
accepted_metrics = ['root_mean_squared_error', 'mean_absolute_error']
accepted_models = ['HAND (best MAE)', 'TVD (ind)', 'FwDET']
accepted_model_labels = ['HAND', 'TVD', 'FwDET']
accepted_model_labels_lookup = {
    'HAND (best MAE)': 'HAND', 'TVD (ind)': 'TVD', 'FwDET': 'FwDET'}

fig, axes_list = pyplot.subplots(
    nrows=len(accepted_model_labels), ncols=1, figsize=((8.3-1), (11.7-1)*0.9), dpi=dpi, sharey='all')
boxes = {mod_def:[] for mod_def in accepted_models}
for model_result in product_results_simple_df.loc[(product_results_simple_df['model'].isin(accepted_models))].iterrows():
    #display(model[1]['2.5'])
    boxes[model_result[1]['model']].append(
        {
            'label': model_result[1]['simulation'],
            'whislo': model_result[1]['2.5'],    # Bottom whisker position
            'q1': model_result[1]['25'],    # First quartile (25th percentile)
            'med': model_result[1]['50'],    # Median         (50th percentile)
            'q3': model_result[1]['75'],    # Third quartile (75th percentile)
            'whishi': model_result[1]['97.5'],    # Top whisker position
            'fliers': []        # Outliers
        }
    )

i=0
for ax in axes_list if len(accepted_model_labels)>1 else [axes_list]:
    ax.bxp(boxes[accepted_models[i]], showfliers=False)
    ax.grid(True)
    if (i == len(accepted_model_labels)-1):
        # , rotation_mode='anchor')
        pyplot.xticks(rotation=75, horizontalalignment='right',
                      rotation_mode='anchor')
    else:
        ax.set_xticklabels([])
    ax.set_ylabel("Benchmark minus Predicted (m)")
    ax.yaxis.set_major_formatter(ticker.StrMethodFormatter("{x:02}"))
    ax.set_title(accepted_model_labels[i], rotation=-90,y=0.5,
                 position=(1.01, 0), ha='left', va='center')
    i=i+1
pyplot.tight_layout()
pyplot.savefig(output_folder +
               f'{os.path.sep}fig4_by_model_box' + hextent, **fig_args)
pyplot.cla()
pyplot.clf()
pyplot.close('all')
pyplot.close(fig)


In [None]:
# Create a boxplot:

# reach1, reach2, reac.... | HAND
# reach1, reach2, reac.... | TVD
# reach1, reach2, reac.... | FwDET
# with boxes showing the error
from matplotlib.lines import Line2D
from matplotlib.patches import Patch
import matplotlib.ticker as plticker
from matplotlib import ticker
from colorsys import rgb_to_hls
from matplotlib import colors, transforms
import seaborn
import numpy
seaborn.set(font_scale=1)
pyplot.style.use('seaborn-whitegrid')

fig, axes_list = pyplot.subplots(
    nrows=1, ncols=1, figsize=((11.7-1), (8.3-1)*0.9), dpi=dpi, sharey='all')
boxes = {mod_def:[] for mod_def in accepted_models}
for model_result in product_results_simple_df.loc[(product_results_simple_df['model'].isin(accepted_models))].iterrows():
    #display(model[1]['2.5'])
    boxes[model_result[1]['model']].append(
        {
            'label': model_result[1]['simulation'],
            'whislo': model_result[1]['2.5'],    # Bottom whisker position
            'q1': model_result[1]['25'],    # First quartile (25th percentile)
            'med': model_result[1]['50'],    # Median         (50th percentile)
            'q3': model_result[1]['75'],    # Third quartile (75th percentile)
            'whishi': model_result[1]['97.5'],    # Top whisker position
            'fliers': []        # Outliers
        }
    )

# colours = {j:seaborn.utils.get_color_cycle()[j] for j in range(3)}
# light_vals = [rgb_to_hls(*c)[1] for c in colours.values()]
colours = colour_scheme
light_vals = [rgb_to_hls(*c)[1] for c in colours]

lum = min(light_vals[0:2]) * .6
gray = colors.rgb2hex((lum, lum, lum))

legend_elements = [
    Patch(facecolor=colours[accepted_model_labels.index(label)], edgecolor=gray,
          label=label) for label in accepted_model_labels]


i=0
for model_index in range(len(accepted_model_labels)):
    boxprops = dict(edgecolor=gray, facecolor=colours[model_index])
    medianprops = dict(color=gray)
    whiskerprops = dict(color=gray)
    capprops = dict(color=gray)
    ax = axes_list
    ax.bxp(boxes[accepted_models[i]], showfliers=False,
           positions=[r*3 + model_index + 1 for r in range(len(boxes[accepted_models[i]]))], boxprops=boxprops, medianprops=medianprops, whiskerprops=whiskerprops, capprops=capprops, patch_artist=True)
    ax.grid(True)
    
    if i == (len(accepted_model_labels)-1):
        # , rotation_mode='anchor')
        pyplot.xticks(rotation=75, horizontalalignment='right',
                     rotation_mode='anchor')
        dx = 10/72.
        dy = 0/72.
        offset = transforms.ScaledTranslation(dx, dy, fig.dpi_scale_trans)

        # apply offset transform to all x ticklabels.
        for label in ax.xaxis.get_majorticklabels():
            label.set_transform(label.get_transform() + offset)

    ax.set_ylabel("Benchmark minus Predicted (m)")
    ax.yaxis.set_major_formatter(ticker.StrMethodFormatter("{x:02}"))
    i=i+1


# this locator puts ticks at regular intervals
#loc = plticker.MultipleLocator(base=3.0)
loc = plticker.FixedLocator(numpy.arange(26)*3 +0.5)
ax.xaxis.set_major_locator(loc)

fig.suptitle("Model accuracy by flood scene")


# Create the figure
frame = pyplot.legend(handles=legend_elements, loc='lower right',
                  facecolor='white', framealpha=1, edgecolor=gray)

ax.set_axisbelow(True)

frame.set_frame_on(True)

pyplot.tight_layout()
pyplot.savefig(output_folder +
               f'{os.path.sep}alt_fig4_by_model_box' + hextent, **fig_args)
pyplot.cla()
pyplot.clf()
pyplot.close('all')
pyplot.close(fig)


In [None]:
display(product_results_simple_df.groupby(
    ['model', product_results_simple_df['50'] < 0])['model'].count()[accepted_models]
)

In [None]:
display(product_results_simple_df.groupby(
    ['model'])['2.5'].min()[accepted_models])
display(product_results_simple_df[product_results_simple_df['2.5'].isin(product_results_simple_df.groupby(
    ['model'])['2.5'].min()[accepted_models])])


In [None]:
import numpy
import seaborn
import pandas
import scikits.bootstrap as boot
import numpy
numpy.random.seed(42)

algs = {
    "25": lambda x: numpy.quantile(x, 0.25),
    "50": lambda x: numpy.quantile(x, 0.50),
    "75": lambda x: numpy.quantile(x, 0.75),
    "25 to 75%": lambda x: numpy.quantile(x, 0.75)-numpy.quantile(x, 0.25)
}

quantiles_for_analysis = [0.05, 0.5, 0.95]
disp_results={}
alg_results={}
for accepted_model in accepted_models:
    model_set = product_results_simple_df.loc[ product_results_simple_df['model'] == accepted_model]
    model_results = {}
    disp_model_results = {}
    alg_results[accepted_model]=model_results
    disp_results[accepted_model_labels_lookup[accepted_model]
                 ] = disp_model_results
    for (alg_name, alg_function) in algs.items():
        model_values = model_set[["50"]]
        if len(model_values)>1:
            av_ci = boot.ci(model_values, alg_function,
                            alpha=quantiles_for_analysis, n_samples=10000)
        else:
            av_ci = [numpy.nan, model_values.values[0][0], numpy.nan]
        for (key, value) in zip(quantiles_for_analysis, av_ci):
            #alg_results[(accepted_model, alg_name, key)] = value
            model_results[(alg_name, key)] = value
            if numpy.isnan(av_ci[0]) or numpy.isnan(av_ci[2]):
                disp_model_results[alg_name] = f"{av_ci[1]:0.2f}"
            else:
                disp_model_results[alg_name] = f"{av_ci[1]:0.2f} ({av_ci[0]:0.2f}, {av_ci[2]:0.2f})"
        
alg_results_df = pandas.DataFrame(alg_results).T
disp_results_df = pandas.DataFrame(disp_results).T
display(disp_results_df)


In [None]:
fstat_by_model={}
for (k, v) in overall_metrics.items():
    params_str = ", ".join([f"{val if type(val) is not dict else ' '.join(val.keys())}" for (
        att, val) in k.depth_model_params.items()])
    if len(params_str) > 0:
        params_str = f" ({params_str})"

    simple_str = f"{k.depth_model_type.name}{params_str}"
    fstat_by_model[simple_str] = {region: attribs['fstat'] for (region, attribs) in v.items()}

fstat_by_model_df = pandas.DataFrame.from_dict(fstat_by_model, orient='index')



In [None]:

from matplotlib import ticker
import matplotlib
import seaborn
seaborn.set(font_scale=1)
pyplot.style.use('seaborn-whitegrid')
#accepted_models = ['ProcessedFwdet']
#accepted_model_labels = ['FwDET']
accepted_models = ['HAND (best MAE)', 'TVD (ind)', 'FwDET']
accepted_model_labels = ['HAND', 'TVD', 'FwDET']
accepted_model_labels_lookup = {
    'HAND (best MAE)': 'HAND', 'TVD (ind)': 'TVD', 'FwDET': 'FwDET', 'ProcessedFwdet':'FwDET'}

 
fstat_disply_df = fstat_by_model_df.loc[accepted_models].rename(accepted_model_labels_lookup)
fstat_long_df = pandas.melt(
    fstat_disply_df, var_name="Reach", value_name="F-Stat", ignore_index=False)
fstat_long_df["Model"] = fstat_long_df.index
fstat_long_df = fstat_long_df.reset_index(level=0, drop=True)


paper_rc = {'lines.linewidth':1.2 }
seaborn.set_context("paper", rc=paper_rc)
#display(seaborn.context("paper"))
# display(matplotlib.rcParams)
# 'lines.linewidth': 1.2000000000000002,
# 'lines.markeredgewidth': 1.0,
# 'lines.markersize': 4.800000000000001,

fig, ax = pyplot.subplots( nrows=1, ncols=1, figsize=((8.3-1), (11.7-1)*0.5), dpi=dpi, sharey='all')
seaborn.scatterplot(x="Reach", y="F-Stat", style='Model', hue='Model', data=fstat_long_df,
                    ax=ax, markers={'HAND': '2', 'TVD': '+', 'FwDET': 'x'}, linewidth=1.2, s=70, palette=colour_scheme)

pyplot.xticks(rotation=75, horizontalalignment='right',
                       rotation_mode='anchor')
ax.legend(loc='center left', bbox_to_anchor=(1.125, 0.5), ncol=1)

pyplot.tight_layout()
pyplot.savefig(output_folder +
               f'{os.path.sep}fstat' + hextent, **fig_args)
pyplot.cla()
pyplot.clf()
pyplot.close('all')
#pyplot.close(fig)



In [None]:
fstat_region = fstat_long_df
fstat_region['Region']=fstat_region['Reach'].str.replace(" .*","",regex=True)
fstat_region[fstat_region['Model'] == 'FwDET'].groupby(
    'Region')['F-Stat'].apply(lambda d: f'{numpy.median(d):0.02f} (IQR {(numpy.quantile(d, [0.75]) - numpy.quantile(d, [0.25]))[0]:0.02f})')


In [None]:
product_metrics_simple = []
for (k, v) in overall_metrics.items():
    params_str = ", ".join([f"{val if type(val) is not dict else ' '.join(val.keys())}" for (
        att, val) in k.depth_model_params.items()])
    if len(params_str) > 0:
        params_str = f" ({params_str})"

    simple_str = f"{k.depth_model_type.name}{params_str}"
    for (k2, v2) in v.items():
        for (k3,v3) in v2.items():
            if type(v3) is dict:
                for (k4, v4) in v3.items():
                    rec = {}
                    rec['metric'] = k4
                    rec['value'] = v4
                    rec['depth-range'] = k3
                    rec['simulation'] = k2
                    rec['model'] = simple_str
                    product_metrics_simple.append(rec)
                    if k4 == 'mean_squared_error':
                        rec2={}
                        rec2['metric'] = 'root_mean_squared_error'
                        rec2['value'] = rec['value'] ** 0.5
                        rec2['depth-range'] = k3
                        rec2['simulation'] = k2
                        rec2['model'] = simple_str
                        product_metrics_simple.append(rec2)

product_metrics_simple_df = pandas.DataFrame(product_metrics_simple)[
    ['model', 'simulation', 'depth-range', 'metric','value']]

#pandas.set_option('display.max_rows', 500)
display(product_metrics_simple_df)

#product_metrics_simple_df
#overall_metrics

product_metrics_simple_df.to_csv(
    output_folder + f'{os.path.sep}rmse_resulst.csv')


In [None]:
product_metrics_simple_df[(product_metrics_simple_df['depth-range']=='all') & 
(product_metrics_simple_df['metric']=='root_mean_squared_error')].groupby('model').max()

In [None]:
import numpy
import seaborn
import pandas
import scikits.bootstrap as boot
import numpy
numpy.random.seed(42)
accepted_metrics = ['root_mean_squared_error', 'mean_absolute_error']

algs = {
    "root_mean_squared_error": lambda x: numpy.quantile(x, 0.50),
    "mean_absolute_error": lambda x: numpy.quantile(x, 0.50)
}
quantiles_for_analysis = [0.05, 0.5, 0.95]

regional_results={}
for accepted_model in accepted_models:

    model_set = product_metrics_simple_df.loc[(product_metrics_simple_df['model']
                                                == accepted_model) & (product_metrics_simple_df['depth-range'] == 'all')]

    regions = set(model_set['simulation'].str.replace(" .*", "", regex=True))
    region_analysis = {}
    for region in regions:
        disp_model_results = {}
        for (alg_name, alg_function) in algs.items():
            model_values = model_set.loc[(model_set['metric']
                                                == alg_name) & (model_set['simulation'].str.startswith(region))]['value']
            if len(model_values)>1:
                av_ci = boot.ci(model_values, alg_function,
                                alpha=quantiles_for_analysis, n_samples=10000)
            else:
                av_ci = [numpy.nan, model_values.values[0], numpy.nan]
            for (key, value) in zip(quantiles_for_analysis, av_ci):
                if numpy.isnan(av_ci[0]) or numpy.isnan(av_ci[2]):
                    disp_model_results[alg_name] = f"{av_ci[1]:0.2f}"
                else:
                    disp_model_results[alg_name] = f"{av_ci[1]:0.2f} ({av_ci[0]:0.2f}, {av_ci[2]:0.2f})"
        region_analysis[region]=disp_model_results
    regional_results[accepted_model]=pandas.DataFrame(region_analysis)


In [None]:
by_state = pandas.concat(regional_results.values(),keys=regional_results.keys()).T
by_state.to_csv(
    output_folder + f'{os.path.sep}by_state_median_results_by_model.csv')
by_state


In [None]:
import numpy
import seaborn
import pandas
import scikits.bootstrap as boot
import numpy
numpy.random.seed(42)
accepted_metrics = ['root_mean_squared_error', 'mean_absolute_error']

algs = {
    "root_mean_squared_error": lambda x: numpy.quantile(x, 0.50),
    "mean_absolute_error": lambda x: numpy.quantile(x, 0.50)
}

quantiles_for_analysis = [0.05, 0.5, 0.95]
rms_results = {}
for accepted_model in accepted_models:
    model_set = product_metrics_simple_df.loc[(product_metrics_simple_df['model']
                                              == accepted_model) & (product_metrics_simple_df['depth-range'] == 'all')]
    disp_model_results = {}
    rms_results[accepted_model_labels_lookup[accepted_model]
                 ] = disp_model_results
    for (alg_name, alg_function) in algs.items():
        model_values = model_set.loc[model_set['metric'] == alg_name]['value']
        if len(model_values) > 1:
            av_ci = boot.ci(model_values, alg_function,
                        alpha=quantiles_for_analysis, n_samples=10000)
        else:
            av_ci = [numpy.nan, model_values.values[0], numpy.nan]
        for (key, value) in zip(quantiles_for_analysis, av_ci):
            if numpy.isnan(av_ci[0]) or numpy.isnan(av_ci[2]):
                disp_model_results[alg_name] = f"{av_ci[1]:0.2f}"
            else:
                disp_model_results[alg_name] = f"{av_ci[1]:0.2f} ({av_ci[0]:0.2f}, {av_ci[2]:0.2f})"

rms_results_df = pandas.DataFrame(rms_results).T
display(rms_results_df)


In [None]:
# Table 2

display(rms_results_df.join(disp_results_df))
rms_results_df.join(disp_results_df).to_csv(
    output_folder + f'{os.path.sep}a_table_2_median_results_by_model.csv')


In [None]:


dt =product_metrics_simple_df.loc[(
    product_metrics_simple_df['metric'].isin(accepted_metrics)) &
    (product_metrics_simple_df['model'].isin(accepted_models))
    & (product_metrics_simple_df['depth-range'] == 'all')].groupby(by=["model", 'metric']).median()

product_results_simple_df_ranged = product_results_simple_df
product_results_simple_df_ranged['25 to 75%'] = product_results_simple_df_ranged['75'] - \
    product_results_simple_df_ranged['25']

dt3=[]
for mod in accepted_models:
    dt_mod = product_metrics_simple_df.loc[(
        product_metrics_simple_df['metric'].isin(accepted_metrics)) &
        (product_metrics_simple_df['model']==mod)
        & (product_metrics_simple_df['depth-range'] == 'all')].groupby(by=["model", 'metric']).median()

    dt_2 = product_results_simple_df_ranged.loc[
        (product_results_simple_df_ranged['model'] == mod)].groupby(by=["model"]).quantile([0.50])

    for multi_index in dt_mod.iterrows():
        dt_2[multi_index[0][1]] = multi_index[1]['value']

    #dt_2['model'] = dt_2['model'].apply(lambda x: accepted_model_labels[accepted_models.index(mod)])
    #label = accepted_model_labels[accepted_models.index(mod)]
    #dt_mod[label] = dt_mod['value'].apply(lambda x: f"{x:.2f}")
    #del dt_mod['value']
    #display(dt_mod.T)
    
    dt3.append(dt_2)
dt4 = pandas.concat(dt3)
dt4.rename(accepted_model_labels_lookup, inplace=True, axis='index')
default_precision = pandas.get_option('float_format')
pandas.set_option('float_format', '{:,.2f}'.format)
display(dt4[['root_mean_squared_error', 'mean_absolute_error',
        '25', '50', '75', '25 to 75%']])
pandas.set_option('float_format', default_precision)
#dt5 = pandas.DataFrame(dt4, index=accepted_model_labels)


In [None]:
import seaborn
from matplotlib.patches import PathPatch
seaborn.set(font_scale=2)

pyplot.style.use('seaborn-whitegrid')
sample_df = pandas.DataFrame(product_metrics_simple_df.loc[(product_metrics_simple_df['metric'] == 'root_mean_squared_error') & (
    product_metrics_simple_df['model'].isin(accepted_models))])

sample_df['model'] = sample_df['model'].apply(lambda x: accepted_model_labels_lookup[x]) 

display(sample_df.groupby(['model','depth-range']).median())

height=(11.7-1)/2
width=4.5
sb_plot = seaborn.catplot(x="metric", y="value",
                          hue="depth-range", kind="box", col='model', col_wrap=len(accepted_model_labels), col_order=accepted_model_labels,
                          data=sample_df, showfliers=False, hue_order=["d < 2", "2 <= d < 4", "d >= 4", 'all'], height=height, aspect=width/height, palette=colour_scheme)
#sb_plot.figure.set_size_inches(height, width)
sb_plot.figure.set_dpi(dpi)

fac=0.7
for ax in sb_plot.axes:
    # iterating through axes artists:
    for c in ax.get_children():

        # searching for PathPatches
        if isinstance(c, PathPatch):
            # getting current width of box:
            p = c.get_path()
            verts = p.vertices
            verts_sub = verts[:-1]
            xmin = numpy.min(verts_sub[:, 0])
            xmax = numpy.max(verts_sub[:, 0])
            xmid = 0.5*(xmin+xmax)
            xhalf = 0.5*(xmax - xmin)

            # setting new width of box
            xmin_new = xmid-fac*xhalf
            xmax_new = xmid+fac*xhalf
            verts_sub[verts_sub[:, 0] == xmin, 0] = xmin_new
            verts_sub[verts_sub[:, 0] == xmax, 0] = xmax_new

            # setting new width of median line
            for l in ax.lines:
                if numpy.all(l.get_xdata() == [xmin, xmax]):
                    l.set_xdata([xmin_new, xmax_new])


sb_plot.set_ylabels('RMSE (m)')
sb_plot.set_xlabels('')
sb_plot.set_xticklabels([''])
sb_plot.set_titles("{col_name}")
seaborn.move_legend(sb_plot, "lower center", bbox_to_anchor=(0.5, 0),
                ncol=4, title=None)

pyplot.savefig(output_folder + f'{os.path.sep}fig_6_error_by_model_by_depth_range_RMSE'+hextent,
               bbox_inches='tight', pad_inches=0, **fig_args)


In [None]:
import seaborn
from matplotlib.patches import PathPatch
seaborn.set(font_scale=2)

pyplot.style.use('seaborn-whitegrid')
sample_df = pandas.DataFrame(product_metrics_simple_df.loc[(product_metrics_simple_df['metric'] == 'mean_absolute_error') & (
    product_metrics_simple_df['model'].isin(accepted_models))])

sample_df['model'] = sample_df['model'].apply(lambda x: accepted_model_labels_lookup[x]) 

display(sample_df.groupby(['model','depth-range']).median())

height=(11.7-1)/2
width=4.5
sb_plot = seaborn.catplot(x="metric", y="value",
                          hue="depth-range", kind="box", col='model', col_wrap=len(accepted_model_labels), col_order=accepted_model_labels,
                          data=sample_df, showfliers=False, hue_order=["d < 2", "2 <= d < 4", "d >= 4", 'all'], height=height, aspect=width/height, palette=colour_scheme)
#sb_plot.figure.set_size_inches(height, width)
sb_plot.figure.set_dpi(dpi)

fac=0.7
for ax in sb_plot.axes:
    # iterating through axes artists:
    for c in ax.get_children():

        # searching for PathPatches
        if isinstance(c, PathPatch):
            # getting current width of box:
            p = c.get_path()
            verts = p.vertices
            verts_sub = verts[:-1]
            xmin = numpy.min(verts_sub[:, 0])
            xmax = numpy.max(verts_sub[:, 0])
            xmid = 0.5*(xmin+xmax)
            xhalf = 0.5*(xmax - xmin)

            # setting new width of box
            xmin_new = xmid-fac*xhalf
            xmax_new = xmid+fac*xhalf
            verts_sub[verts_sub[:, 0] == xmin, 0] = xmin_new
            verts_sub[verts_sub[:, 0] == xmax, 0] = xmax_new

            # setting new width of median line
            for l in ax.lines:
                if numpy.all(l.get_xdata() == [xmin, xmax]):
                    l.set_xdata([xmin_new, xmax_new])


sb_plot.set_ylabels('MAE (m)')
sb_plot.set_xlabels('')
sb_plot.set_xticklabels([''])
sb_plot.set_titles("{col_name}")
seaborn.move_legend(sb_plot, "lower center", bbox_to_anchor=(0.5, 0),
                ncol=4, title=None)

pyplot.savefig(output_folder + f'{os.path.sep}fig_6_error_by_model_by_depth_range_MAE'+hextent,
               bbox_inches='tight', pad_inches=0, **fig_args)


In [None]:
proportion = product_metrics_simple_df[(product_metrics_simple_df['model']
                           == 'FwDET') & (product_metrics_simple_df['metric']=='reference_samples')].groupby('depth-range').mean()

percentage = proportion/proportion.loc['all'].value * 100.0
percentage['area %'] = percentage['value']
del percentage['value']
percentage.sort_values(by='area %')


In [None]:
import pandas
import geopandas
from simpledbf import Dbf5
from matplotlib import pyplot
import numpy
from IPython.display import display
import geopandas as gpd
from shapely.geometry import Point, LineString
import math
import seaborn
import scipy

In [None]:
import logging
import os
from dotenv import load_dotenv
load_dotenv()
logging.getLogger().setLevel('WARNING')
output_folder = os.environ['OUTPUT_FOLDER']
highres = os.environ['HIGHRES'] == 'TRUE'
if highres:
    dpi = 600
    hextent = "_high.tif"
    fig_args = {'format': "tiff", 'pil_kwargs': {"compression": "tiff_lzw"}}
else:
    dpi = 300
    hextent = ".png"
    fig_args = {'format': "png"}


In [None]:
input_files = [ ]

In [None]:
def cross(series, cross=0, direction='cross'):
    """
    Given a Series returns all the index values where the data values equal 
    the 'cross' value. 

    Direction can be 'rising' (for rising edge), 'falling' (for only falling 
    edge), or 'cross' for both edges
    """
    # Find if values are above or bellow yvalue crossing:
    above = series.values > cross
    below = numpy.logical_not(above)
    left_shifted_above = above[1:]
    left_shifted_below = below[1:]
    x_crossings = []
    # Find indexes on left side of crossing point
    if direction == 'rising':
        idxs = (left_shifted_above & below[0:-1]).nonzero()[0]
    elif direction == 'falling':
        idxs = (left_shifted_below & above[0:-1]).nonzero()[0]
    else:
        rising = left_shifted_above & below[0:-1]
        falling = left_shifted_below & above[0:-1]
        idxs = (rising | falling).nonzero()[0]

    # Calculate x crossings with interpolation using formula for a line:
    x1 = series.index.values[idxs]
    x2 = series.index.values[idxs+1]
    y1 = series.values[idxs]
    y2 = series.values[idxs+1]
    x_crossings = (cross-y1)*(x2-x1)/(y2-y1) + x1

    return x_crossings


In [None]:
def calculate_stats(line_df):
    crosses=cross(line_df['FIRST_Z'],line_df['min_max'].mean()-0.5)
    crossing_results = line_df[['FIRST_DIST', 'FIRST_Z', 'CENTER_DIST']
                               ].loc[crosses.astype(numpy.int32)]
    
    counts1 = crossing_results[crossing_results['CENTER_DIST']
                              > 0]['CENTER_DIST'].count()
    counts2 = crossing_results[crossing_results['CENTER_DIST']
                              < 0]['CENTER_DIST'].count()

    if counts1<1 or counts2<1:
        return pandas.DataFrame.from_dict({"width": numpy.nan, "cross_section_area": numpy.nan,
                           "wetted_perimeter": numpy.nan, "mean_depth": numpy.nan}, orient='index')[0]
        #return numpy.nan
    bound1 = crossing_results[crossing_results['CENTER_DIST']
                              > 0]['CENTER_DIST'].idxmin()

    bound2 = crossing_results[crossing_results['CENTER_DIST']
                          < 0]['CENTER_DIST'].idxmax()
    line_dist = line_df.loc[bound1]['FIRST_DIST']-line_df.loc[bound2]['FIRST_DIST']
    cross_section_df = line_df.loc[bound2:bound1]
    depth=cross_section_df['min_max']-cross_section_df['FIRST_Z']
    mean_depth = depth.mean()

    line_string = LineString([Point(xy) for xy in zip(
        cross_section_df['FIRST_DIST'], cross_section_df['FIRST_Z'])])
    wetted_perimeter=line_string.length

    cross_section_area = mean_depth * line_dist
    cross_section_area 

    res = pandas.DataFrame.from_dict({
        "width": line_dist,
        "cross_section_area": cross_section_area,
                           "wetted_perimeter": wetted_perimeter, "mean_depth": mean_depth}, orient='index')
    return res[0]


In [None]:
reach_stats={}
#input_file = input_files[4]
for input_file in input_files:
    print(input_file["Short_Loc"])
    points = geopandas.read_file(input_file["point"])
    points['min_max'] = points[['west', 'south', 'east', 'north']].min(axis=1)
    dbf = Dbf5(input_file["transect"])
    df = dbf.to_dataframe()
    joined_line_unfiltered = pandas.merge(
        df, points, 'inner', left_on='LINE_ID', right_on='ORIG_FID')
    joined_line = joined_line_unfiltered[joined_line_unfiltered['LINE_ID'].isin(
        input_file["include-lines"])]
    center = joined_line.loc[joined_line.groupby('LINE_ID')['FIRST_Z'].idxmin()][[
        'LINE_ID', 'FIRST_DIST']]
    center['CENTER'] = center['FIRST_DIST']
    del center['FIRST_DIST']
    joined = pandas.merge(joined_line, center, 'inner',
                        left_on='LINE_ID', right_on='LINE_ID')
    joined['CENTER_DIST'] = joined['FIRST_DIST'] - joined['CENTER']
    stats = joined.groupby('LINE_ID').apply(calculate_stats)
    stats_no_null = stats[~numpy.isnan(stats['width'])]
    start = points.loc[input_file["start-line"]]['geometry']
    end = points.loc[input_file["end-line"]]['geometry']
    crow_line = LineString([start,end])
    crow_line.length

    upstream_min = joined[joined['LINE_ID'] == input_file["start-line"]]['FIRST_Z'].min(skipna=True)
    downstream_min = joined[joined['LINE_ID'] == input_file["end-line"]]['FIRST_Z'].min(skipna=True)
    rise = abs(upstream_min-downstream_min)
    #pyplot.plot(width_no_null)
    # width_no_null.sort_values()
    width_cv_perc = stats_no_null['width'].std()/stats_no_null['width'].mean()*100
    local_stats={
        "Short_Loc":input_file["Short_Loc"],
        "lines": len(input_file["include-lines"]),
        "mean width (m)":stats_no_null['width'].mean(),
        "width CV (%)":width_cv_perc,
        "mean depth (m)": stats_no_null['mean_depth'].mean(),
        "cross sectional area (m2)":stats_no_null['cross_section_area'].mean(),
        "wetted perimeter (m)":stats_no_null['wetted_perimeter'].mean(),
        "width depth ratio": stats_no_null['width'].mean()/stats_no_null['mean_depth'].mean(),
        "hydraulic ratio": stats_no_null['cross_section_area'].mean()/stats_no_null['wetted_perimeter'].mean(),
        "crow-distance": crow_line.length,
        "sinuosity": len(input_file["include-lines"])*1000/crow_line.length,
        "bed slope": rise/len(input_file["include-lines"])/1000
    }
    reach_stats[input_file["Short_Loc"]] = local_stats




In [None]:
local_stats = {
    "Short_Loc": "Namoi - DS Mollee",
    "lines": 0,
    "mean width (m)": 40.36601307,
    "width CV (%)": 22.15769,
    "mean depth (m)":2.011860096,
    "cross sectional area (m2)": 77.68530278,
    "wetted perimeter (m)": 45.05597617,
    "width depth ratio": 20.06403,
    "hydraulic ratio": 1.724195,
    "crow-distance": 0,
    "sinuosity": 1.879405,
    "bed slope": 0.000314
}
reach_stats["Namoi - DS Mollee"]=local_stats

local_stats = {
    "Short_Loc": "Namoi - DS Gunidgera",
    "lines": 0,
    "mean width (m)": 38.65882353,
    "width CV (%)": 24.61572,
    "mean depth (m)": 2.266443563,
    "cross sectional area (m2)": 85.087905,
    "wetted perimeter (m)": 42.94594679,
    "width depth ratio": 17.05704,
    "hydraulic ratio": 1.981279,
    "crow-distance": 0,
    "sinuosity": 1.718927,
    "bed slope": 0.000215
}
reach_stats["Namoi - DS Gunidgera"] = local_stats

local_stats = {
    "Short_Loc": "Murray - GKPF", # Example only
    "lines": 0,
    "mean width (m)": 38.65882353,
    "width CV (%)": 24.61572,
    "mean depth (m)": 2.266443563,
    "cross sectional area (m2)": 85.087905,
    "wetted perimeter (m)": 42.94594679,
    "width depth ratio": 17.05704,
    "hydraulic ratio": 1.981279,
    "crow-distance": 0,
    "sinuosity": 1.718927,
    "bed slope": 0.000215
}
reach_stats["Murray - GKPF"] = local_stats
reach_stats_less_count_df = pandas.DataFrame.from_dict(reach_stats)


In [None]:
channel_count = [
 ['SA - Weir Pool 5', 1.01754386,'Type 18 (4)','?','Type 1'],
 ['SA - Weir Pool 3', 1.680851064 - 0.680851064, 'Type 19 (7)', '?', 'Type 1'],
 ['SA - Weir Pool 4', 1,'Type 18 (4)','?','Type 1'],
 ['LBS - St George', 1.452380952, 'Type 19 (7)', 'meandering', 'Type 3'],
 ['LBS - Narran River', 1.41509434, 'Type 19 (7)', 'anabranching', 'Type 3'],
 ['LBS - Culgoa FP South', 1.12345679,'Type 19 (7)','anabranching','Type 3'],
 ['LBS - Culgoa FP North', 1.23943662,'Type 19 (7)','anabranching','Type 3'],
 ['Namoi - DS Mollee', 1,'Type 18 (4)','meandering','Type 3'],
 ['Namoi - DS Gunidgera', 1.021739,'Type 18 (4)','meandering','Type 3'],
 ['Murray - GKPF', 1.021739,'Type 18 (4)','meandering','Type 3'] # Example only
 ]


channel_count_df = pandas.DataFrame.from_records(channel_count, columns=['Short_Loc','channel count','ERT','Thoms and Parsons (2003)','Larkin et al., 2020'],index="Short_Loc")
reach_stats_df = pandas.merge(reach_stats_less_count_df.T, channel_count_df,'inner', left_index=True, right_index=True).T


In [None]:

reach_stats_df.to_csv(output_folder + f'{os.path.sep}geomorph_attrs.csv')
reach_stats_df


In [None]:
rmse_results_without_ri = pandas.read_csv(
    output_folder + f'{os.path.sep}rmse_resulst.csv')
rmse_results_without_ri['Short_Loc'] = rmse_results_without_ri['simulation'].apply(
    lambda s: s[0:len(s)-10])
rmse_results_without_ri['scene'] = rmse_results_without_ri['simulation'].apply(
    lambda s: s[len(s)-7:len(s)])
rmse_results_without_ri['scene'].unique()

ri = [
    ['1998-07', 22,185],
    ['1996-01', 8,150],
    ['2008-01', 2,50],
    ['2011-01', 25,250],
    ['2012-01', 56,300],
    ['2013-09',2,25],
    ['2016-11',8,80],
    ['2016-07',8,80]
]

ri_df = pandas.DataFrame.from_records(ri, columns=[
                                                 'scene', 'return interval (years)', 'discharge (GL/day)'])
rmse_results= pandas.merge(rmse_results_without_ri,
                              ri_df, 'inner', left_on='scene', right_on='scene')


In [None]:
rmse_results

In [None]:

for model_selection in ['FwDET','TVD (all)','HAND (best MAE)']:
    rmse_geomorph = pandas.merge(reach_stats_df.T, rmse_results, 'inner',
                                left_on='Short_Loc', right_on='Short_Loc')

    subset = rmse_geomorph[(rmse_geomorph['depth-range'] == 'all') &
                        (rmse_geomorph['model'] == model_selection) &
                        (rmse_geomorph['metric'] == 'root_mean_squared_error')]
    subset_long_form_df = subset.melt(id_vars=['Short_Loc',  'value'], value_vars=['mean width (m)', 'width CV (%)',
        'mean depth (m)', 'cross sectional area (m2)', 'wetted perimeter (m)',
        'width depth ratio', 'hydraulic ratio', 'sinuosity',
        'bed slope', 'channel count', 'return interval (years)', 'discharge (GL/day)'], value_name='geomorph metric')
    subset_long_form_df['geomorph metric'] = subset_long_form_df['geomorph metric'].astype(
        numpy.float64)
    seaborn.set(font_scale=2)
    pyplot.style.use('seaborn-whitegrid')
    g = seaborn.lmplot(x="geomorph metric", y="value", col="variable",
                    data=subset_long_form_df, col_wrap=3, sharex=False)

    def annotate(data, **kws):
        if len(data['value'])>1:
            r, p = scipy.stats.pearsonr(data['geomorph metric'], data['value'])
            ax = pyplot.gca()
            ax.text(.05, .8, 'r² = {:.2f}'.format(r*r),
                    transform=ax.transAxes)
        
    g.map_dataframe(annotate)
    g.set_axis_labels("")
    g.set_ylabels("RMSE (m)")
    g.set_titles(row_template = '{row_name}', col_template = '{col_name}')

    #pyplot.show()
    pyplot.tight_layout()
    pyplot.savefig(output_folder +
                f'{os.path.sep}geomorph_{model_selection}'+hextent, **fig_args)
    pyplot.cla()
    pyplot.clf()
    pyplot.close('all')
    #pyplot.close(g)


In [None]:
subset_all = rmse_geomorph[(rmse_geomorph['depth-range'] == 'all') &
                       (rmse_geomorph['metric'] == 'root_mean_squared_error')]
subset_all_long_form_df = subset_all.melt(id_vars=['simulation','model', 'value'], value_vars=['mean width (m)', 'width CV (%)',
                                                                              'mean depth (m)', 'cross sectional area (m2)', 'wetted perimeter (m)',
                                                                              'width depth ratio', 'hydraulic ratio', 'sinuosity',
                                                                                               'bed slope', 'channel count'], value_name='geomorph metric')
subset_all_long_form_df
#mean = subset_all_long_form_df.groupby(['model', 'variable', 'geomorph metric']).mean(
#    "value")
    
best_model = subset_all_long_form_df.groupby(['simulation','variable', 'geomorph metric'])['value'].idxmin()
best_model_values = pandas.merge(subset_all_long_form_df[['model','value']],pandas.DataFrame(best_model),'inner',left_index=True, right_on='value').reset_index()

import seaborn
import scipy
seaborn.set(font_scale=2)
pyplot.style.use('seaborn-whitegrid')
g = seaborn.lmplot(x="geomorph metric", y="value_x", col="variable", hue='model',
                   data=best_model_values, col_wrap=3, sharex=False, fit_reg=False)

g.set_axis_labels("")
g.set_ylabels("RMSE (m)")
g.set_titles(row_template='{row_name}', col_template='{col_name}')
g
#best_model_values#.melt('geomorph metric')

# pyplot.show()
#pyplot.tight_layout()
pyplot.savefig(output_folder +
               f'{os.path.sep}geomorph_best_rmse'+hextent, **fig_args)
pyplot.cla()
pyplot.clf()
pyplot.close('all')


In [None]:
import scipy
import seaborn

rmse_geomorph = pandas.merge(reach_stats_df.T, rmse_results, 'inner',
                             left_on='Short_Loc', right_on='Short_Loc')

rmse_geomorph = pandas.merge(reach_stats_df.T, rmse_results, 'inner',
                             left_on='Short_Loc', right_on='Short_Loc')

subset = rmse_geomorph[(rmse_geomorph['depth-range'] == 'all') &
                       (rmse_geomorph['model'].isin(['FwDET', 'TVD (all)', 'HAND (best MAE)'])) &
                       (rmse_geomorph['metric'] == 'mean_absolute_error')]
#(rmse_geomorph['metric'] == 'root_mean_squared_error')]
subset['model'] = subset['model'].str.replace(' .*', '', regex=True)
subset['Model'] = subset['model']
subset['Reach'] = subset['simulation']
subset_m=subset.merge(fstat_long_df, how='inner', on=['Reach', 'Model'])

subset_long_form_df = subset_m.melt(id_vars=['model','Short_Loc',  'value'], value_vars=['return interval (years)','mean width (m)', 'sinuosity',
                                                                               'bed slope', 'channel count'], value_name='geomorph metric')
subset_long_form_df['geomorph metric'] = subset_long_form_df['geomorph metric'].astype(
    numpy.float64)
seaborn.set(font_scale=2)
pyplot.style.use('seaborn-whitegrid')
row_order = ['HAND','TVD','FwDET']
g = seaborn.lmplot(x="geomorph metric", y="value", col="variable",row="model", row_order=row_order,
                    data=subset_long_form_df, sharex=False,facet_kws={'margin_titles':True})



def annotate(data, **kws):
    if len(data['value'])>1:
        r, p = scipy.stats.pearsonr(data['geomorph metric'], data['value'])
        ax = pyplot.gca()
        ax.text(.05, .8, 'r² = {:.2f}'.format(r*r),
                transform=ax.transAxes)

g.map_dataframe(annotate)
g.set_axis_labels("")
g.set_ylabels("MAE (m)")
g.set(ylim=(0, 3))
g.set_titles(row_template='{row_name}', col_template='{col_name}')


#pyplot.show()
#pyplot.tight_layout()
pyplot.savefig(output_folder +
               f'{os.path.sep}Figure_6_geomorph_mae'+hextent, **fig_args)
pyplot.cla()
pyplot.clf()
pyplot.close('all')


In [None]:
import scipy
import seaborn

rmse_geomorph = pandas.merge(reach_stats_df.T, rmse_results, 'inner',
                             left_on='Short_Loc', right_on='Short_Loc')

subset = rmse_geomorph[(rmse_geomorph['depth-range'] == 'all') &
                       (rmse_geomorph['model'].isin(['FwDET', 'TVD (all)', 'HAND (best MAE)'])) &
                       (rmse_geomorph['metric'] == 'mean_absolute_error')]
                       #(rmse_geomorph['metric'] == 'root_mean_squared_error')]
subset_long_form_df = subset.melt(id_vars=['model','Short_Loc',  'value'], value_vars=['mean width (m)', 'sinuosity',
                                                                               'bed slope', 'channel count'], value_name='geomorph metric')
subset_long_form_df['geomorph metric'] = subset_long_form_df['geomorph metric'].astype(
    numpy.float64)
subset_long_form_df['model'] = subset_long_form_df['model'].str.replace( ' .*', '', regex=True)
seaborn.set(font_scale=2)
pyplot.style.use('seaborn-whitegrid')
g = seaborn.lmplot(x="geomorph metric", y="value", row="variable",col="model", col_order=['HAND','TVD','FwDET'],
                    data=subset_long_form_df, sharex=False)

def annotate(data, **kws):
    if len(data['value'])>1:
        r, p = scipy.stats.pearsonr(data['geomorph metric'], data['value'])
        ax = pyplot.gca()
        ax.text(.05, .8, 'r² = {:.2f}'.format(r*r),
                transform=ax.transAxes)

g.map_dataframe(annotate)
g.set_axis_labels("")
g.set_ylabels("MAE (m)")
g.set(ylim=(0, 3))
g.set_titles(row_template='{row_name}', col_template='{col_name}')

#pyplot.show()
#pyplot.tight_layout()
pyplot.savefig(output_folder +
               f'{os.path.sep}Figure_6b_geomorph_mae'+hextent, **fig_args)
pyplot.cla()
pyplot.clf()
pyplot.close('all')
