In [1]:
import os
import time

import dask.dataframe as dd
import matplotlib.colors as colors
import matplotlib.patches as mpatches
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

In [2]:
def truncate_colormap(cmap, minval=0.0, maxval=1.0, n=100):
    new_cmap = colors.LinearSegmentedColormap.from_list(
        'trunc({n},{a:.2f},{b:.2f})'.format(n=cmap.name, a=minval, b=maxval),
        cmap(np.linspace(minval, maxval, n)))
    return new_cmap

In [3]:
cmap = plt.get_cmap('cubehelix', 5)
new_cmap = truncate_colormap(cmap, 0.2, 0.75)

In [4]:
color = []

for i in np.linspace(0, 1, 3):
    color.append(colors.rgb2hex(new_cmap(i)))

In [5]:
palette = sns.color_palette(color)
palette_two = sns.color_palette(color[:-1])

In [6]:
# Order of all possible bins for plotting
order = ['(0, 10]', '(10, 20]', '(20, 30]', '(30, 40]', '(40, 50]',
         '(50, 60]', '(60, 70]', '(70, 80]', '(80, 90]', '(90, 100]',
         '(100, 150]', '(150, 200]', '(200, 500]']

In [7]:
# save_path = '/nas/cee-water/cjgleason/fiona/narrow_rivers_PIXC/figures_for_2025_05_27/'

## Threshold

In [8]:
# Thresholds
thresholds = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]

### HUC2_01, Northeast

In [10]:
## Setup
bwidth = 'mean'
bbin = 'Bin'
huc2 = '01'
data_path = '../../narrow_rivers_PIXC_output/PIXC_v2_0_HUC2_' + \
            huc2 + '_2025_05_24_' + bwidth

In [11]:
# Get list of files    
files = []
for file in os.listdir(data_path):
    if not file.startswith('.'):
        files.append(os.path.join(data_path, file))

In [12]:
ddf_ne = dd.read_parquet(files)

In [13]:
ddf_ne['0.1'] = ddf_ne.iloc[:,1:11][ddf_ne.iloc[:,1:11] > 0.1].count(axis=1)
ddf_ne['0.2'] = ddf_ne.iloc[:,1:11][ddf_ne.iloc[:,1:11] > 0.2].count(axis=1)
ddf_ne['0.3'] = ddf_ne.iloc[:,1:11][ddf_ne.iloc[:,1:11] > 0.3].count(axis=1)
ddf_ne['0.4'] = ddf_ne.iloc[:,1:11][ddf_ne.iloc[:,1:11] > 0.4].count(axis=1)
ddf_ne['0.5'] = ddf_ne.iloc[:,1:11][ddf_ne.iloc[:,1:11] > 0.5].count(axis=1)
ddf_ne['0.6'] = ddf_ne.iloc[:,1:11][ddf_ne.iloc[:,1:11] > 0.6].count(axis=1)
ddf_ne['0.7'] = ddf_ne.iloc[:,1:11][ddf_ne.iloc[:,1:11] > 0.7].count(axis=1)
ddf_ne['0.8'] = ddf_ne.iloc[:,1:11][ddf_ne.iloc[:,1:11] > 0.8].count(axis=1)
ddf_ne['0.9'] = ddf_ne.iloc[:,1:11][ddf_ne.iloc[:,1:11] > 0.9].count(axis=1)

In [None]:
# test = ddf_ne.get_partition(0).compute()

In [None]:
# test = test.drop(test.columns[1:11], axis=1)

In [None]:
# test.melt(id_vars=['NHDPlusID'],
#           value_vars=['0.1', '0.2', '0.3', '0.4', '0.5', '0.6', '0.7', '0.8', '0.9'],
#           var_name='threshold')

In [14]:
ddf_ne = ddf_ne.drop(list(ddf_ne.columns[1:11]), axis=1)

In [15]:
ddf_ne_melted = ddf_ne.melt(id_vars=['NHDPlusID'],
                            value_vars=['0.1', '0.2', '0.3', '0.4', '0.5',
                                        '0.6', '0.7', '0.8', '0.9'],
                            var_name='threshold')

In [16]:
ddf_ne = ddf_ne.drop(list(ddf_ne.columns[-9:]), axis=1)

In [17]:
test = ddf_ne_melted.merge(right=ddf_ne, how='left', on='NHDPlusID')

In [18]:
testy = test.get_partition(0)

In [None]:
testy.compute()

In [None]:
# test = ddf_ne.compute()

In [None]:
# ddf_ne = ddf_ne.drop(columns='NHDPlusID')

In [None]:
ddf_ne['coverage'] = ddf_ne['coverage']*100

In [None]:
# unique_val = ddf_ne['Bin'].unique()

In [None]:
# unique_val.compute()

### HUC2_15, Southwest

In [None]:
## Setup
bwidth = 'mean'
bbin = 'Bin'
huc2 = '15'
data_path = '../../narrow_rivers_PIXC_output/PIXC_v2_0_HUC2_' + \
            huc2 + '_2025_05_24_' + bwidth

In [None]:
# Get list of files    
files = []
for file in os.listdir(data_path):
    if not file.startswith('.'):
        if 'thresh' in file:
            files.append(os.path.join(data_path, file))

In [None]:
ddf_sw = dd.read_parquet(files)

In [None]:
ddf_sw['thresh_01'] = ddf_sw.iloc[:,1:11][ddf_sw.iloc[:,1:11] > 0.1].count(axis=1)
ddf_sw['thresh_02'] = ddf_sw.iloc[:,1:11][ddf_sw.iloc[:,1:11] > 0.2].count(axis=1)
ddf_sw['thresh_03'] = ddf_sw.iloc[:,1:11][ddf_sw.iloc[:,1:11] > 0.3].count(axis=1)
ddf_sw['thresh_04'] = ddf_sw.iloc[:,1:11][ddf_sw.iloc[:,1:11] > 0.4].count(axis=1)
ddf_sw['thresh_05'] = ddf_sw.iloc[:,1:11][ddf_sw.iloc[:,1:11] > 0.5].count(axis=1)
ddf_sw['thresh_06'] = ddf_sw.iloc[:,1:11][ddf_sw.iloc[:,1:11] > 0.6].count(axis=1)
ddf_sw['thresh_07'] = ddf_sw.iloc[:,1:11][ddf_sw.iloc[:,1:11] > 0.7].count(axis=1)
ddf_sw['thresh_08'] = ddf_sw.iloc[:,1:11][ddf_sw.iloc[:,1:11] > 0.8].count(axis=1)
ddf_sw['thresh_09'] = ddf_sw.iloc[:,1:11][ddf_sw.iloc[:,1:11] > 0.9].count(axis=1)

In [None]:
ddf_sw = ddf_sw.drop(columns='NHDPlusID')

In [None]:
ddf_sw['coverage'] = ddf_sw['coverage']*100

### HUC2_17, Northwest

In [None]:
## Setup
bwidth = 'mean'
bbin = 'Bin'
huc2 = '17'
data_path = '../../narrow_rivers_PIXC_output/PIXC_v2_0_HUC2_' + \
            huc2 + '_2025_05_24_' + bwidth

In [None]:
# Get list of files    
files = []
for file in os.listdir(data_path):
    if not file.startswith('.'):
        files.append(os.path.join(data_path, file))

In [None]:
ddf_nw = dd.read_parquet(files)

In [None]:
ddf_nw['thresh_01'] = ddf_nw.iloc[:,1:11][ddf_nw.iloc[:,1:11] > 0.1].count(axis=1)
ddf_nw['thresh_02'] = ddf_nw.iloc[:,1:11][ddf_nw.iloc[:,1:11] > 0.2].count(axis=1)
ddf_nw['thresh_03'] = ddf_nw.iloc[:,1:11][ddf_nw.iloc[:,1:11] > 0.3].count(axis=1)
ddf_nw['thresh_04'] = ddf_nw.iloc[:,1:11][ddf_nw.iloc[:,1:11] > 0.4].count(axis=1)
ddf_nw['thresh_05'] = ddf_nw.iloc[:,1:11][ddf_nw.iloc[:,1:11] > 0.5].count(axis=1)
ddf_nw['thresh_06'] = ddf_nw.iloc[:,1:11][ddf_nw.iloc[:,1:11] > 0.6].count(axis=1)
ddf_nw['thresh_07'] = ddf_nw.iloc[:,1:11][ddf_nw.iloc[:,1:11] > 0.7].count(axis=1)
ddf_nw['thresh_08'] = ddf_nw.iloc[:,1:11][ddf_nw.iloc[:,1:11] > 0.8].count(axis=1)
ddf_nw['thresh_09'] = ddf_nw.iloc[:,1:11][ddf_nw.iloc[:,1:11] > 0.9].count(axis=1)

In [None]:
ddf_nw

In [None]:
# test = ddf_nw.compute()

In [None]:
# ddf_nw = ddf_nw.drop(columns='NHDPlusID')

In [None]:
ddf_nw['coverage'] = ddf_nw['coverage']*100

In [None]:
# unique_val = ddf_nw['Bin'].unique()

In [None]:
# unique_val.compute()

### Combined

In [None]:
for i in range(len(thresholds)):
    
    ne = ddf_ne[ddf_ne['threshold'] == thresholds[i]]
    ne = ne.compute().reset_index().drop(columns='index')

    zeros = ne[ne.coverage == 0].count()
    reaches = ne.shape[0]
    ne_zeros = round(((zeros['coverage']/reaches)*100), 1)
    
    ne = ne[ne.coverage != 0]
    if ne.shape[0] > 100000:
        ne = ne.sample(100000)
    
    sw = ddf_sw[ddf_sw['threshold'] == thresholds[i]]
    sw = sw.compute().reset_index().drop(columns='index')

    zeros = sw[sw.coverage == 0].count()
    reaches = sw.shape[0]
    sw_zeros = round(((zeros['coverage']/reaches)*100), 1)

    sw = sw[sw.coverage != 0]
    if sw.shape[0] > 100000:
        sw = sw.sample(100000)

    # Get combined reaches at 10% threshold
    combined = pd.concat(objs=[ne, sw], axis=0).reset_index()

    del ne, sw

    percent = str(int(thresholds[i]*100))

    fig, ax = plt.subplots(figsize=(10,6))
    sns.boxplot(data=combined, x='Bin', y='coverage',
                order=order,
                color=color[2],
                saturation=1, showmeans=True,
                fliersize=2, linewidth=2,
                boxprops=dict(alpha=0.8),
                meanprops={'marker':'o',
                           'markerfacecolor':'k',
                           'markeredgecolor':'k',
                           'markersize':'8'})

    plt.title('Reaches by proportion of nodes that meet or exceed ' + percent +
              '% coverage, excluding reaches with 0% coverage\nHUC2_01: ' + str(ne_zeros) +
              '% of reaches have 0% coverage in all nodes\nHUC2_15: ' +
              str(sw_zeros) + '% of reaches have 0% coverage in all nodes')

    patch = mpatches.Patch(color=color[2], label='combined')
    plt.legend(handles=[patch], title='HUC2', loc='upper right',
               title_fontproperties={'weight': 'bold'})

    plt.xticks(rotation=-45)

    plt.xlabel('Width Bin [m]')
    plt.ylabel('Proportion of nodes that exceed ' + percent + '% coverage')

    plt.savefig(save_path + 'threshold_combined_mean_' + percent + '.png')

### Two

In [None]:
for i in range(len(thresholds)):

    # Get ne reaches at threshold
    ne = ddf_ne[ddf_ne['threshold'] == thresholds[i]].reset_index().drop(columns='index')
    ne = ne.compute()
    
    zeros = ne[ne.coverage == 0].count()
    reaches = ne.shape[0]
    ne_zeros = round(((zeros['coverage']/reaches)*100), 1)

    ne = ne[ne.coverage != 0]
    if ne.shape[0] > 100000:
        ne = ne.sample(100000)
    ne['huc2'] = '01'

    # Get sw reaches
    sw = ddf_sw[ddf_sw['threshold'] == thresholds[i]].reset_index().drop(columns='index')
    sw = sw.compute()

    zeros = sw[sw.coverage == 0].count()
    reaches = sw.shape[0]
    sw_zeros = round(((zeros['coverage']/reaches)*100), 1)

    sw = sw[sw.coverage != 0]
    if sw.shape[0] > 100000:
        sw = sw.sample(100000)
    sw['huc2'] = '15'

#     # Get combined reaches at 10% threshold
#     both = pd.concat(objs=[ne, sw], axis=0).reset_index()

#     combo = both.copy()
#     combo['huc2'] = 'combined'

    combined = pd.concat(objs=[ne, sw], axis=0).reset_index().drop(columns='index')

    del ne, sw

    percent = str(int(thresholds[i]*100))

    fig, ax = plt.subplots(figsize=(12,6))
    sns.boxplot(data=combined, x='Bin', y='coverage',
                order=order,
                hue='huc2', palette=palette_two,
                legend=True,
                saturation=1, showmeans=True,
                fliersize=2, linewidth=2,
                boxprops=dict(alpha=0.8),
                meanprops={'marker':'o',
                           'markerfacecolor':'k',
                           'markeredgecolor':'k',
                           'markersize':'8'})

    plt.title('Reaches by proportion of nodes that meet or exceed ' + percent +
              '% coverage, excluding reaches with 0% coverage\nHUC2_01: ' + str(ne_zeros) +
              '% of reaches have 0% coverage in all nodes\nHUC2_15: ' +
              str(sw_zeros) + '% of reaches have 0% coverage in all nodes')

    plt.legend(title='HUC2', loc='upper right',
               title_fontproperties={'weight': 'bold'})

    plt.xlabel('Width Bin [m]')
    plt.ylabel('Proportion of nodes that exceed ' + percent + '% coverage')
    plt.xticks(rotation=-45)

    plt.savefig(save_path + 'threshold_just_hucs_mean_' + percent + '.png')

### All three

In [None]:
for i in range(len(thresholds)):

    # Get ne reaches at threshold
    ne = ddf_ne[ddf_ne['threshold'] == thresholds[i]]
    ne = ne.compute().reset_index().drop(columns=['NHDPlusID', 'index'])

    zeros = ne[ne.coverage == 0].count()
    reaches = ne.shape[0]
    ne_zeros = round(((zeros['coverage']/reaches)*100), 1)

    ne = ne[ne.coverage != 0]
    if ne.shape[0] > 100000:
        ne = ne.sample(100000)
    ne['huc2'] = '01'

    # Get sw reaches
    sw = ddf_sw[ddf_sw['threshold'] == thresholds[i]]
    sw = sw.compute().reset_index().drop(columns=['NHDPlusID', 'index'])

    zeros = sw[sw.coverage == 0].count()
    reaches = sw.shape[0]
    sw_zeros = round(((zeros['coverage']/reaches)*100), 1)

    sw = sw[sw.coverage != 0]
    if sw.shape[0] > 100000:
        sw = sw.sample(100000)
    sw['huc2'] = '15'

    # Get combined reaches at 10% threshold
    both = pd.concat(objs=[ne, sw], axis=0).reset_index()

    combo = both.copy()
    combo['huc2'] = 'combined'

    combined = pd.concat(objs=[ne, sw, combo], axis=0).reset_index().drop(columns=['level_0', 'index'])

    del ne, sw, combo

    percent = str(int(thresholds[i]*100))

    fig, ax = plt.subplots(figsize=(12,6))
    sns.boxplot(data=combined, x='Bin', y='coverage',
                order=order,
                hue='huc2', palette=palette,
                legend=True,
                saturation=1, showmeans=True,
                fliersize=2, linewidth=2,
                boxprops=dict(alpha=0.8),
                meanprops={'marker':'o',
                           'markerfacecolor':'k',
                           'markeredgecolor':'k',
                           'markersize':'8'})

    plt.title('Reaches with at least ' + percent +
              '% coverage in each node, excluding reaches with 0% coverage\nHUC2_01: ' + str(ne_zeros) +
              '% of reaches have 0% coverage in all nodes\nHUC2_15: ' +
              str(sw_zeros) + '% of reaches have 0% coverage in all nodes')

    plt.legend(title='HUC2', loc='upper right',
               title_fontproperties={'weight': 'bold'})

    plt.xlabel('Width Bin [m]')
    plt.ylabel('Reaches with at least ' + percent + '% coverage')
    plt.xticks(rotation=-45)

    plt.savefig(save_path + 'threshold_all_three_mean_' + percent + '.png')

### Combined old

In [None]:
for i in range(len(thresholds)):
    # Make combined dataframe
    ddf_combo = dd.concat(dfs=[ddf_ne, ddf_sw], axis=0)

    # Get combined reaches at 10% threshold
    ddf_combo = ddf_combo[(ddf_combo['threshold'] == thresholds[i])]
    combo = ddf_combo.compute().drop(columns=['NHDPlusID']).reset_index()
    combo = combo.drop(columns=['index', 'threshold'])
    combo['coverage'] = combo['coverage']*100
    # combo['huc2'] = '01, 15'

    percent = str(int(thresholds[i]*100))

    fig, ax = plt.subplots(figsize=(10,6))
    sns.boxplot(x='Bin', y='coverage',
                data=combo,
                order=order,
                color=color[2],
                saturation=1, showmeans=True,
                fliersize=2, linewidth=2,
                boxprops=dict(alpha=0.8),
                meanprops={'marker':'o',
                           'markerfacecolor':'k',
                           'markeredgecolor':'k',
                           'markersize':'8'})

    plt.title('Reaches with at least ' + percent +'% coverage in each node, HUC2_01 and HUC2_15')
    plt.xticks(rotation=-45)

    plt.xlabel('Width Bin [m]')
    plt.ylabel('Detected [%]')

    plt.savefig('./figures_for_2025_03_06/threshold_combined_mean_' + percent + '.png')

### Side-by-side old

In [None]:
for i in range(len(thresholds)):

    # Get ne reaches at 10% threshold
    ddf_ne_thresh = ddf_ne[(ddf_ne['threshold'] == thresholds[i])]
    ne = ddf_ne_thresh.compute().drop(columns=['NHDPlusID']).reset_index()
    ne = ne.drop(columns=['index', 'threshold'])
    ne['coverage'] = ne['coverage']*100
    ne['huc2'] = '01'

    # Get sw reaches at 10% threshold
    ddf_sw_thresh = ddf_sw[(ddf_sw['threshold'] == thresholds[i])]
    sw = ddf_sw_thresh.compute().drop(columns=['NHDPlusID']).reset_index()
    sw = sw.drop(columns=['index', 'threshold'])
    sw['coverage'] = sw['coverage']*100
    sw['huc2'] = '15'

    combo = pd.concat(objs=[ne, sw], axis=0).reset_index()

    del ne, sw

    percent = str(int(thresholds[i]*100))

    fig, ax = plt.subplots(figsize=(10,6))
    sns.boxplot(x='Bin', y='coverage', hue='huc2',
                data=combo, order=order,
                palette=palette_two,
                legend=True,
                saturation=1, showmeans=True,
                fliersize=2, linewidth=2,
                boxprops=dict(alpha=0.8),
                meanprops={'marker':'o',
                           'markerfacecolor':'k',
                           'markeredgecolor':'k',
                           'markersize':'8'})

    plt.legend(title='HUC2', loc='upper right')

    plt.title('Reaches with at least ' + percent +'% coverage in each node, HUC2_01 and HUC2_15')
    plt.xlabel('Width Bin [m]')
    plt.ylabel('Detected [%]')
    plt.xticks(rotation=-45)

    plt.savefig('./figures_for_2025_03_06/threshold_huc2_01_and_huc_15_mean_' + percent + '.png')

In [None]:
del combo