In [14]:
import os
import pandas as pd
import numpy as np
from pydsstools.heclib.dss import HecDss
from bokeh.palettes import Category20
from bokeh.plotting import figure, save, gridplot
from bokeh.io import output_file
from bokeh.models import ColumnDataSource, Legend, LegendItem, HoverTool

folder_path = "C:/DSS FILE OUTPUTS/DSS-Sensitivity"
gauge_data_path = os.path.join(folder_path, "Gauge_Data.csv")
river_stations, search_word, pathname_pattern, start_date, end_date = ["90128", "117188", "110659", "7458"], "Harvey", "/*/*/*/*/*/*/", "26AUG2017 00:00:00", "14SEP2017 00:00:00"

def check_identical_values(df, new_series):
    for column in df.columns:
        if np.array_equal(df[column].head(100).values, new_series.head(100).values):
            return True
    return False

df = pd.DataFrame()
for file in os.listdir(folder_path):
    if file.endswith(".dss"):
        with HecDss.Open(os.path.join(folder_path, file)) as fid:
            path_list = fid.getPathnameList(pathname_pattern, sort=1)
            filtered_path_list = [
                path for path in path_list
                if any(station in path for station in river_stations)
                and search_word in path
                and ("FLOW" in path or "STAGE" in path)
                and "FLOW-CUM" not in path
            ]
            for pathname in filtered_path_list:
                ts = fid.read_ts(pathname, window=(start_date, end_date), trim_missing=True)
                if ts is not None:
                    times, values = np.array(ts.pytimes), ts.values
                    header = f"{file} {pathname}"
                    temp_df = pd.DataFrame({header: pd.Series(values[~ts.nodata], index=times[~ts.nodata])})
                    if not check_identical_values(df, temp_df[header]):
                        df = pd.concat([df, temp_df], axis=1)

df.to_csv(os.path.join(folder_path, "output_raw.csv"), index_label="Date")

grouped_data = {(split_label[2], "STAGE" if "STAGE" in split_label[3] else "FLOW"): [] for column in df.columns for split_label in [column.split('/')]}
for column in df.columns:
    split_label = column.split('/')
    grouped_data[(split_label[2], "STAGE" if "STAGE" in split_label[3] else "FLOW")].append(df[column])

gauge_data = pd.read_csv(gauge_data_path, header=None, nrows=3)
metadata = {key: gauge_data.iloc[idx, 1:].tolist() for idx, key in enumerate(["River Station", "STAGE or FLOW", "USGS"])}
data = pd.read_csv(gauge_data_path, header=2, parse_dates=["Date"])
data.set_index("Date", inplace=True)

for idx, column in enumerate(data.columns):
    key = (metadata["River Station"][idx], metadata["STAGE or FLOW"][idx])
    if key not in grouped_data: grouped_data[key] = []
    grouped_data[key].append(data[column])

def plot_grouped_data_bokeh(grouped_data):
    output_file(os.path.join(folder_path, "Combined_Data_Plots.html"), title="Time Series Plots by River Station and Series Type")

    plots = []

    for key in grouped_data:
        p = figure(width=800, height=700, x_axis_type="datetime", title=f"{key[0]} - {key[1]}")

        legend_items = []

        for idx, series in enumerate(grouped_data[key]):
            if "Gauge" in series.name:
                plan_name = series.name
            else:
                split_name = series.name.split('/')
                if len(split_name) >= 2:
                    plan_name = split_name[-2]
                else:
                    plan_name = "Unknown"

            source = ColumnDataSource(data=dict(x=series.index, y=series.values, plan_name=[plan_name] * len(series)))
            color = Category20[20][idx % 20]
            line = p.line(x='x', y='y', source=source, line_width=2, color=color)
            legend_items.append(LegendItem(label=plan_name, renderers=[line]))

            hover = HoverTool(tooltips=[("Plan Name", "@plan_name"), ("Date", "@x{%F %T}"), ("Value", "@y{0.000}")], formatters={"@x": "datetime"}, renderers=[line])
            p.add_tools(hover)

        legend = Legend(items=legend_items, location="top_right")
        p.add_layout(legend)

        p.xaxis.axis_label = 'Date'
        unit = "CFS" if key[1] == "FLOW" else "FT"
        p.yaxis.axis_label = f'Values ({unit})'

        plots.append([p])

    layout = gridplot(plots)
    save(layout)
plot_grouped_data_bokeh(grouped_data)