In [None]:
from my_packages import *
from tools import *

from appgeopy import *

In [None]:
# _______________________________________________________________________
# Load HDF5 File and Extract Dataset Information
hdf5_fpath = r"20240903_GWL_CRFP.h5"

with h5py.File(hdf5_fpath, "r") as hdf5_file:
    # Extract existing data and available datasets
    existing_data_dict = h5pytools.hdf5_to_data_dict(hdf5_file)
    available_datasets = h5pytools.list_datasets(hdf5_file)

    # Extract the 'date' array and convert to a datetime index
    date_strings = [date.decode("utf-8") for date in existing_data_dict["date"]]
    datetime_array = pd.to_datetime(date_strings, format="%Y%m%d")

# _______________________________________________________________________
# Extract the list of stations and wellcodes for processing
stations = sorted(set([elem.split("/")[0] for elem in available_datasets if "date" not in elem]))

#### LOOPS TO PROCESS STATIONS

In [None]:
error_station = []

save_Excel = True
save_Figure = True

for i in range(1, 10000):
    savefolder = f"Attemp{i:03}"
    if not os.path.exists(savefolder):
        # Your code to create or use the savefolder goes here
        break


# station = "DONGFANG"
for station in tqdm(stations[::]):
    try:
        # _______________________________________________________________________
        # Extract and process station data
        station_data = existing_data_dict[station]
        wellcodes = [elem for elem, val in station_data.items() if isinstance(val, dict) and len(val) == 3]
        # wellcode = wellcodes[1]
        for wellcode in wellcodes:

            # _______________________________________________________________________
            # Extract time-series data and process the valid data range
            model_gwl_arr = station_data[wellcode]["measure"]["model"]
            model_gwl_series = pd.Series(data=model_gwl_arr, index=datetime_array)
            valid_gwl_series = model_gwl_series.loc[
                model_gwl_series.first_valid_index() : model_gwl_series.last_valid_index()
            ]

            if valid_gwl_series.index.year.unique().size > 2:

                trend, detrended_series = get_seasonal_and_trend_data(series=valid_gwl_series, detrend_degree=3)

                # _______________________________________________________________________
                # Apply smoothing on the time series
                smoothed_series = smoothing.simple_moving_average(num_arr=detrended_series, window_size=15)

                # _______________________________________________________________________
                # Detect peaks and troughs
                peaks, properties_peaks, peak_times, troughs, properties_troughs, trough_times = detect_peaks_troughs(
                    signal=smoothed_series.values, time_index=smoothed_series.index
                )

                # _______________________________________________________________________
                # Calculate thresholds for peaks and troughs
                peak_prom, peak_dist = get_properties_threshold(
                    properties_peaks, prom_proportion=0.8, dist_proportion=0.5
                )
                trough_prom, trough_dist = get_properties_threshold(
                    properties_troughs, prom_proportion=0.8, dist_proportion=0.5
                )

                peak_dist = 1 if peak_dist < 1 else peak_dist
                trough_dist = 1 if trough_dist < 1 else trough_dist
                # _______________________________________________________________________
                # Redetect peaks and troughs based on thresholds
                peaks, _ = scipy.signal.find_peaks(smoothed_series.values, prominence=peak_prom, distance=peak_dist)
                peak_times = smoothed_series.index[peaks]
                troughs, _ = scipy.signal.find_peaks(
                    -smoothed_series.values, prominence=trough_prom, distance=trough_dist
                )
                trough_times = smoothed_series.index[troughs]
                # _______________________________________________________________________
                # Convert to DataFrames
                signal_peaks = pd.DataFrame(data={"time": peak_times, "value": valid_gwl_series[peak_times].values})
                signal_troughs = pd.DataFrame(
                    data={"time": trough_times, "value": valid_gwl_series[trough_times].values}
                )

                # _______________________________________________________________________

                # Extract two largest values in a searching time range
                extreme_peaks = filter_extremes_by_range(
                    signal_peaks, date_col="time", value_col="value", extreme_type="peak", months_range=1
                )

                extreme_troughs = filter_extremes_by_range(
                    signal_troughs, date_col="time", value_col="value", extreme_type="trough", months_range=1
                )
                # _______________________________________________________________________
                # Apply filtering for consecutive peaks and troughs
                filtered_peaks, filtered_troughs = filter_consecutive(
                    input_peaks=extreme_peaks,
                    input_troughs=extreme_troughs,
                    time_peak="time",
                    value_peak="value",
                    time_trough="time",
                    value_trough="value",
                )

                # _______________________________________________________________________
                # Get some thresholds for the final filter
                days_threshold = get_timediff_threshold(peaks=filtered_peaks, troughs=filtered_troughs, quantile=0.5)
                absval_threshold = get_valuediff_threshold(peaks=filtered_peaks, troughs=filtered_troughs, quantile=0.5)

                # _______________________________________________________________________
                # We expect to get the value of peak from November to February following year
                cutoff_filtered_peaks = select_peak_within_timeframe(
                    peaks=filtered_peaks,
                    time_col="time",
                    value_col="value",
                    cutoff_year=2016,
                    X_years=3,
                    month_A=12,
                    month_B=1,
                )

                # _______________________________________________________________________
                # Final filter to get condition-satisfied peak-trough pairs
                final_peaks, final_troughs = finalize_peaks_and_troughs(
                    peaks=cutoff_filtered_peaks,
                    troughs=filtered_troughs,
                    time_col="time",
                    value_col="value",
                    time_threshold=pd.Timedelta(days=days_threshold),
                    value_threshold=absval_threshold,
                )
                # _______________________________________________________________________

                if save_Excel:
                    fld2saveExcel = os.path.join(savefolder, "Peaks_Troughs")
                    os.makedirs(fld2saveExcel, exist_ok=True)

                    savepath = os.path.join(fld2saveExcel, f"{station}_{wellcode}.xlsx")

                    if not os.path.isfile(savepath):
                        final_peaks.to_excel(savepath, index=False, sheet_name="peaks")
                        data_io.save_df_to_excel(
                            df_to_save=final_troughs, filepath=savepath, sheet_name="troughs", verbose=False
                        )
                    else:
                        print("Target file has already existed!")

                # _______________________________________________________________________

                fig_width, fig_height = (11.7, 8.3 * 2 / 3)
                fig = plt.figure(figsize=(fig_width, fig_height))

                ax = fig.add_subplot(111)
                # _______________________________________________________________________
                # Plot the valid groundwater level series
                ax.plot(valid_gwl_series, color="black", zorder=1)

                # _______________________________________________________________________

                # Plot all detected peaks (in grey with transparency)
                ax.plot(
                    signal_peaks.set_index("time"),
                    marker="o",
                    linestyle=" ",
                    markersize=16,
                    zorder=2,
                    color="none",
                    markeredgecolor="black",
                    alpha=0.3,
                )

                # # Plot filtered peaks (by year, in blue)
                # ax.plot(
                #     filtered_peaks.set_index("time"),
                #     marker="s",
                #     linestyle=" ",
                #     markersize=12,
                #     zorder=3,
                #     color="blue",
                #     alpha=0.1,
                # )

                # Plot final filtered peaks (after consecutive filtering, in lime green)
                ax.plot(
                    final_peaks.set_index("time"),
                    marker="^",
                    linestyle=(0, (1, 2)),
                    markersize=14,
                    zorder=4,
                    color="lime",
                    markeredgecolor="black",
                    alpha=1,
                    label="Peaks",
                )

                # _______________________________________________________________________

                # Plot all detected troughs (in grey with transparency)
                ax.plot(
                    signal_troughs.set_index("time"),
                    marker="o",
                    linestyle=" ",
                    markersize=16,
                    color="none",
                    markeredgecolor="black",
                    zorder=2,
                    alpha=0.3,
                )

                # # Plot filtered troughs (by year, in red)
                # ax.plot(
                #     filtered_troughs.set_index("time"),
                #     marker="s",
                #     linestyle=" ",
                #     markersize=12,
                #     color="darkorange",
                #     zorder=3,
                #     alpha=0.1,
                # )

                ax.plot(
                    final_troughs.set_index("time"),
                    marker="^",
                    linestyle=(0, (1, 2)),
                    markersize=14,
                    color="magenta",
                    markeredgecolor="black",
                    zorder=4,
                    alpha=1,
                    label="Troughs",
                )
                # _______________________________________________________________________

                # Configure datetime ticks for the x-axis
                visualize.configure_axis(
                    ax=ax,
                    xlabel="",
                    ylabel="Groundwater Levels (m)",
                    scaling_factor=1.2,
                    title=f"{station} - {wellcode}",
                )
                visualize.configure_datetime_ticks(ax=ax, axis="x")
                visualize.configure_legend(ax=ax, scaling_factor=1, frameon=False, fontsize_base=12)

                # _______________________________________________________________________

                # Add grid and set layout for better readability
                ax.grid(axis="x", which="major", linestyle="-", linewidth=1, color="grey")
                ax.grid(axis="x", which="minor", linestyle="--", linewidth=1, color="lightgrey")
                ax.set_axisbelow(True)
                ax.set_xlim(datetime(2000, 1, 1), datetime(2025, 1, 1))

                # _______________________________________________________________________

                # Optimize layout and show the plot
                fig.tight_layout()
                # Apply rotation to the x-tick labels on the shared axis (ax3)
                plt.setp(ax.get_xticklabels(), rotation=90, ha="center")

                # _______________________________________________________________________
                if save_Figure:
                    fld2savefig = os.path.join(savefolder, "Figures")
                    os.makedirs(fld2savefig, exist_ok=True)
                    output_fig_path = os.path.join(fld2savefig, f"{station}_{wellcode}.png")
                    visualize.save_figure(fig, output_fig_path)

                plt.close()
                # _______________________________________________________________________
    except Exception as e:
        print(f"{station}_{wellcode}", e)
        error_station.append(f"{station}_{wellcode}")
        pass

#### SINGLE STATION MODIFICATION