In [None]:
from appgeopy import *
from my_packages import *
from tqdm.notebook import tqdm  # Use tqdm for notebooks

In [None]:
hdf5_fpath = r"20240903_GWL_CRFP.h5"
with h5py.File(hdf5_fpath, "r") as hdf5_file:
    existing_data_dict = gwatertools.h5pytools.hdf5_to_data_dict(hdf5_file)
    available_datasets = gwatertools.h5pytools.list_datasets(hdf5_file)
    datetime_array = pd.to_datetime(existing_data_dict["date"], format="%Y%m%d")

In [None]:
# Example input from your existing structure:
stations = sorted(set([elem.split("/")[0] for elem in available_datasets if "date" not in elem]))

# Select the first station for this example (can loop over stations)
# select_station = stations[0]

for select_station in tqdm(stations, desc="Processing Stations", leave=True):
    # select_station = "BEIGANG"
    try:
        # Extract station data from the existing data dictionary
        station_data = existing_data_dict[select_station]
        
        # Extract wellcodes that contain dictionaries and have exactly 3 items
        wellcode_byStation = [elem for elem, val in station_data.items() if isinstance(val, dict) and len(val) == 3]
        
        N = len(wellcode_byStation)  # Number of subplots
        
        # A4 paper size in inches
        figsize = (11.7, 8.27)
        fig_width, fig_height = figsize
        
        # Scaling factor for fonts
        scaling_factor = calculate_scaling_factor(fig_width, fig_height)
        
        # Create figure with N subplots in 1 column, sharing the x-axis
        fig, axes = plt.subplots(nrows=N, ncols=1, figsize=figsize, sharex=True)
        
        # Ensure axes is iterable even if there's only one subplot
        if N == 1:
            axes = [axes]
        
        # To store all lines for a shared legend later
        lines = []
        labels = ["GWL", "peaks", "troughs"]
        
        # Line properties: each tuple contains properties for one of the lines (GWL, peaks, troughs)
        line_properties = [
            {"label": "GWL", "color": "black", "linestyle": "-", "marker": None},
            {"label": "peaks", "color": "green", "linestyle": "--", "marker": "^"},
            {"label": "troughs", "color": "orangered", "linestyle": "--", "marker": "v"},
        ]
        
        # Loop over each wellcode to populate each subplot
        for idx, select_wellcode in enumerate(wellcode_byStation):
            ax = axes[idx]
        
            # Extract model, peak, and trough data
            model_gwl_arr = station_data[select_wellcode]["measure"]["model"]
            model_gwl_series = pd.Series(data=model_gwl_arr, index=datetime_array)
        
            peak_date, peak_value = station_data[select_wellcode]["peaks"].values()
            peak_date = pd.to_datetime(peak_date, format="%Y%m%d")
            peak_series = pd.Series(data=peak_value, index=peak_date)
        
            trough_date, trough_value = station_data[select_wellcode]["troughs"].values()
            trough_date = pd.to_datetime(trough_date, format="%Y%m%d")
            trough_series = pd.Series(data=trough_value, index=trough_date)
        
            # Data to be plotted for this wellcode (in the same order as line_properties)
            data_list = [model_gwl_series]#, peak_series, trough_series]
        
            # Plot the lines for the current wellcode
            for data, line_props in zip(data_list, line_properties):
                line = base_plot(
                    ax,
                    data,
                    label=line_props["label"],
                    xlabel=" ",
                    ylabel=" ",
                    title=" ",
                    scaling_factor=scaling_factor,
                    color=line_props["color"],
                    linestyle=line_props["linestyle"],
                    marker=line_props["marker"],
                )
        
                # Collect line objects only once for the legend (when processing the first subplot)
                if idx == 0:
                    lines.append(line)
        
            # Set the major ticks to be at the start of each year
            ax.xaxis.set_major_locator(mdates.YearLocator(1))
            ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y'))
            
            # Set the minor ticks to be at June 1st of each year
            ax.xaxis.set_minor_locator(mdates.MonthLocator(bymonth=6, bymonthday=30))
            # ax.xaxis.set_minor_formatter(mdates.DateFormatter('%b %d'))
            
            # Configure minor ticks to make them visible
            ax.tick_params(axis='x', which='major', length=8, color='gray')
            ax.tick_params(axis='x', which='minor', length=5, color='gray')
            
            ax.legend().set_visible(False)
            
            # Configure the axis (without the legend inside the subplot)
            ax.set_title(f"{select_wellcode}", fontsize=14 * scaling_factor, fontweight='bold')
        
            ax.grid(which='major', axis='x', color='grey', alpha=0.5)
            ax.grid(which='minor', axis='x', color='lightgrey', alpha=0.5)

            time_leftbound = datetime(2015, 12, 1)
            time_rightbound = datetime(2025, 1, 1)
            # ax.set_xlim(time_leftbound, time_rightbound)
        
        # Add a single shared legend outside the subplots
        fig.legend(lines, labels, loc='upper center', bbox_to_anchor=(0.5, -0.05), ncol=3, frameon=False)
        
        fig.suptitle(
            select_station, fontsize=18 * scaling_factor * 1.5, y=0.925, fontweight="bold"
        )
        
        # Add a single y-axis label for the entire figure (big frame)
        fig.text(0.005, 0.5, "Groundwater Levels (m)", va="center", rotation="vertical", fontsize=20 * scaling_factor)
        
        # Adjust layout to prevent overlap, and ensure space for the legend
        fig.tight_layout(rect=[0, 0, 1, 0.95])
        
        # Rotate the x-axis labels for better readability
        plt.setp(ax.get_xticklabels(), rotation=90, ha='center')
    
        savepath = f"figures\\{select_station}_peakstroughs.png"
        visualize.save_figure(fig, savepath)
        plt.close()
    except Exception as e:
        print(select_station, e)
        pass

In [None]:
pd.DataFrame(model_gwl_series, columns=[select_wellcode]).to_csv(f"temp2/{select_station}_{select_wellcode}.csv")