In [None]:
from my_packages import *

from appgeopy import *

In [None]:
# define a simple function to decode the datetime string
string_decode_func = np.vectorize(lambda x: x.decode("utf-8"))


# ______________________________________________________________________________________________________
def get_info_fromHDF5(hdf5_fpath):
    with h5py.File(hdf5_fpath, "r") as hdf5_file:
        datasets = gwatertools.h5pytools.list_datasets(hdf5_file)
        data_dict = gwatertools.h5pytools.hdf5_to_data_dict(hdf5_file)
        stations = sorted(set([elem.split("/")[0] for elem in datasets if "date" not in elem]))
        return datasets, data_dict, stations


# ______________________________________________________________________________________________________
def transform_to_dataframe(dict_byWellCode, extreme_type):
    # Validate extreme_type
    if extreme_type not in ["peaks", "troughs"]:
        raise ValueError("extreme_type must be either 'peaks' or 'troughs'")

    date_string = dict_byWellCode[extreme_type]["date"]
    string2date = pd.to_datetime(string_decode_func(date_string), format="%Y%m%d")
    value_array = dict_byWellCode[extreme_type]["value"]
    return pd.DataFrame({"time": string2date, "value": value_array}).assign(Type=extreme_type)


# ______________________________________________________________________________________________________
def get_combined_peakstroughs(dict_byWellCode):
    # Retrieve peaks and troughs
    peak_df = transform_to_dataframe(dict_byWellCode, "peaks")
    trough_df = transform_to_dataframe(dict_byWellCode, "troughs")

    # Combine peaks and troughs, sorted by time
    combined = pd.concat([peak_df, trough_df], ignore_index=True).sort_values(by="time").reset_index(drop=True)
    return combined


# ______________________________________________________________________________________________________


def model_gwl_array(dict_byWellCode, time_array):
    model_array = dict_byWellCode["measure"]["model"]
    return pd.DataFrame({"time": time_array, "value": model_array})

In [None]:
save_Figure = True
fld2savefig = "compare"

In [None]:
# open HDF5 file and get data
old_file = "20240903_GWL_CRFP.h5"
new_file = "20240919_GWL_CRFP_peakstroughs.h5"

old_dsets, old_ddict, _ = get_info_fromHDF5(old_file)
new_dsets, new_ddict, available_stations = get_info_fromHDF5(new_file)

In [None]:
gwl_datetime = pd.to_datetime(string_decode_func(new_ddict["date"]), format="%Y%m%d")

# select_station = available_stations[0]
for select_station in available_stations[:]:
    
    old_ddict_byStation = old_ddict[select_station]
    new_ddict_byStation = new_ddict[select_station]
    
    # Retrieve available well codes in each station
    wellcodes = [elem for elem in new_ddict_byStation if isinstance(new_ddict_byStation[elem], dict)]
    
    # select_wellcode = wellcodes[0]
    
    for select_wellcode in wellcodes:
        old_ddict_byWellCode = old_ddict_byStation.get(select_wellcode)
        new_ddict_byWellCode = new_ddict_byStation.get(select_wellcode)
    
        # Skip if no peaks data is found for the well code
        if ("peaks" not in old_ddict_byWellCode) or ("peaks" not in new_ddict_byWellCode):
            continue
    
        model_gwl_df = model_gwl_array(new_ddict_byWellCode, gwl_datetime).set_index("time")
        old_peakstroughs = get_combined_peakstroughs(old_ddict_byWellCode).set_index("time")
        new_peakstroughs = get_combined_peakstroughs(new_ddict_byWellCode).set_index("time")\
    
        # _______________________________________________________________________
        fig_width, fig_height = (11.7, 8.3 * 2 / 3)
        fig = plt.figure(figsize=(fig_width, fig_height))
        
        ax = fig.add_subplot(111)
        
        # _______________________________________________________________________
        # Plot the valid groundwater level series
        ax.plot(model_gwl_df, color="black", zorder=1, label="GWL")
        # _______________________________________________________________________
        ax.plot(
            old_peakstroughs.query("Type=='peaks'")["value"],
            marker="o",
            linestyle="-",
            markersize=12,
            color="deepskyblue",
            lw=2,
            zorder=2,
            alpha=0.2,
        )
        
        ax.plot(
            old_peakstroughs.query("Type=='troughs'")["value"],
            marker="o",
            linestyle="-",
            markersize=12,
            color="darkorange",
            lw=2,
            zorder=2,
            alpha=0.2,
        )
        # _______________________________________________________________________
        ax.plot(
            new_peakstroughs.query("Type=='peaks'")["value"],
            marker="^",
            linestyle=(0, (1, 2)),
            markersize=9,
            color="lime",
            markeredgecolor="black",
            zorder=3,
            alpha=1,
            label="Troughs",
        )
        
        ax.plot(
            new_peakstroughs.query("Type=='troughs'")["value"],
            marker="^",
            linestyle=(0, (1, 2)),
            markersize=9,
            color="magenta",
            markeredgecolor="black",
            zorder=3,
            alpha=1,
            label="Troughs",
        )
        # _______________________________________________________________________
        
        # Configure datetime ticks for the x-axis
        visualize.configure_axis(
            ax=ax,
            xlabel="",
            ylabel="Groundwater Levels (m)",
            scaling_factor=1.2,
            title=f"{select_station} - {select_wellcode}",
        )
        visualize.configure_datetime_ticks(ax=ax, axis="x")
        visualize.configure_legend(ax=ax, scaling_factor=1, frameon=False, fontsize_base=12)
        # _______________________________________________________________________
        # Add grid and set layout for better readability
        ax.grid(axis="x", which="major", linestyle="-", linewidth=1, color="grey")
        ax.grid(axis="x", which="minor", linestyle="--", linewidth=1, color="lightgrey")
        ax.set_axisbelow(True)
        ax.set_xlim(datetime(2000, 1, 1), datetime(2025, 1, 1))
        # _______________________________________________________________________
        # Optimize layout and show the plot
        fig.tight_layout()
        # Apply rotation to the x-tick labels on the shared axis (ax3)
        plt.setp(ax.get_xticklabels(), rotation=90, ha="center")
        
        # _______________________________________________________________________
        if save_Figure:
            output_fig_path = os.path.join(fld2savefig, f"{select_station}_{select_wellcode}.png")
            visualize.save_figure(fig, output_fig_path)
        
        plt.close()