In [1]:
from appgeopy import *
from my_packages import *

In [2]:
def dataset_to_series(hdf5_file, dataset, time_idx):
    with h5py.File(hdf5_file, "r") as f:
        array = f[dataset][...]
        series = pd.Series(data=array, index=time_idx)
    start = series.first_valid_index()
    end = series.last_valid_index()
    return series[start:end]

In [3]:
gwl_hdf5_file = "20240828_GWL_CRFP_model.h5"

# List available datasets in the HDF5 file for reference
with h5py.File(gwl_hdf5_file, "r") as hdf5_file:
    # _____________________________________________
    # Filter datasets that contain "model" in their name
    available_datasets = [
        elem
        for elem in gwatertools.h5pytools.list_datasets(hdf5_file)
        if "model" in elem
    ]
    # _____________________________________________
    # Find active datasets based on the 'Status' attribute
    active_datasets = []
    for dataset in available_datasets:
        datagroup = "/".join(dataset.split("/")[:2])
        if hdf5_file[datagroup].attrs.get("Status") == "Active":
            active_datasets.append(dataset)
    # _____________________________________________
    # Get the datetime index from the 'date' dataset
    datetime_idx = pd.to_datetime(hdf5_file["date"][...], format="%Y%m%d")

# Extract the station names from available datasets, ensuring unique and sorted values
available_stations = sorted(
    {dataset.split("/")[0] for dataset in active_datasets}
)

# Display the first five available stations
available_stations[:5]

['ANHE', 'ANNAN', 'BEIGANG', 'BOZI', 'CAICUO']

In [4]:
all_measurement_data = {station: {} for station in available_stations}

for select_dataset in tqdm(active_datasets):
    # Extract station and wellcode from dataset path
    station, wellcode, *_ = select_dataset.split("/")

    # Convert dataset to a pandas Series
    model_gwl_series = dataset_to_series(
        hdf5_file=gwl_hdf5_file, dataset=select_dataset, time_idx=datetime_idx
    )

    # Initialize peak and trough caches within a single structure
    cache = {
        "peaks": {"date": [], "value": []},
        "troughs": {"date": [], "value": []},
    }

    # Process each unique year in the model_gwl_series
    for year in model_gwl_series.index.year.unique():
        select_by_year = model_gwl_series.loc[str(year)]
        peaks, troughs = analysis.find_peaks_troughs(select_by_year)

        # Use len() to check if peaks and troughs are non-empty
        if len(peaks) > 0 and len(troughs) > 0:
            temp = analysis.find_peak_to_peak(
                data=select_by_year, peak_idx=peaks, trough_idx=troughs
            )

            # Store peak data
            cache["peaks"]["date"].append(
                temp.idxmax().iloc[0].strftime("%Y%m%d")
            )
            cache["peaks"]["value"].append(temp.max().iloc[0])

            # Store trough data
            cache["troughs"]["date"].append(
                temp.idxmin().iloc[0].strftime("%Y%m%d")
            )
            cache["troughs"]["value"].append(temp.min().iloc[0])

    # Store results in the all_measurement_data structure
    all_measurement_data[station][wellcode] = cache

100%|████████████████████████████████████████████████████████████████████████████████| 238/238 [00:18<00:00, 12.87it/s]


In [5]:
# Copy the original HDF5 file to a secure version
shutil.copy2(src=gwl_hdf5_file, dst=gwl_hdf5_file.replace(".h5", "_secure.h5"))

# Extract existing data and metadata
with h5py.File(gwl_hdf5_file, "r") as hdf5_file:
    existing_data_dict = gwatertools.h5pytools.hdf5_to_data_dict(hdf5_file)
    existing_metadata_dict = gwatertools.h5pytools.hdf5_to_metadata_dict(
        hdf5_file
    )

# Update dictionaries with new measurement data
updated_data_dict = gwatertools.h5pytools.update_data_dict(
    existing_data_dict, all_measurement_data
)

# Write updated data and metadata back to a new HDF5 file
output_file_name = f"{datetime.now().strftime('%Y%m%d')}_GWL_CRFP.h5"
with h5py.File(output_file_name, "w") as hdf5_file:
    gwatertools.h5pytools.metadata_to_hdf5(hdf5_file, existing_metadata_dict)
    gwatertools.h5pytools.data_to_hdf5(hdf5_file, updated_data_dict)

AttributeError: module 'appgeopy.gwatertools.h5pytools' has no attribute 'h5py_to_data_dict'