In [1]:
from appgeopy import *
from my_packages import *

In [2]:
# ------------------------------------------------------------------------------
# Define Functions for Repeated Operations
# ------------------------------------------------------------------------------
def get_seasonal_and_trend_data(series):
    """Extract trend and seasonal components from a series."""
    numeric_time_idx = datetime_handle.numeric_time_index(series)
    finite_values = series[~series.isnull()].values

    # Polynomial trend
    trend, _ = analysis.get_polynomial_trend(
        x=numeric_time_idx,
        y=finite_values,
        order=1,
        x_estimate=np.arange(len(series)),
    )
    trend.index = series.index

    # Detrend Data
    detrended_series = series - trend

    # Seasonality Analysis
    seasonality_info = analysis.find_seasonality(
        time_series_data=detrended_series
    )
    seasonality_info = seasonality_info[seasonality_info["Period (days)"] > 7]
    seasonality_info = seasonality_info.nlargest(n=50, columns="Amplitude")

    return trend, detrended_series, seasonality_info

In [3]:
def process_well_data(ename, wellcode, hdf5_file):
    """
    Process data for a single well, removing outliers, extracting trends,
    fitting a sinusoidal model, and correcting phase shifts.

    Parameters:
    - ename (str): The name of the station or entity.
    - wellcode (str): The code identifying the specific well.
    - hdf5_file (str): The path to the HDF5 file containing the data.

    Returns:
    - wellcode (str): The well code.
    - df_fromHDF5 (pd.DataFrame): The DataFrame containing the processed data.
    """
    try:
        # Extract data from the HDF5 file for the specific location and sensor
        df_fromHDF5 = gwatertools.h5pytools.export_data_to_dataframe(
            file_name=hdf5_file,
            location_name=ename,
            sensor_name=wellcode,
        ).set_index("datetime")

        # Calculate mean and standard deviation
        series_average = np.nanmean(df_fromHDF5)
        series_stdev = np.nanstd(df_fromHDF5)

        # Remove outliers beyond 3 standard deviations
        condition = (df_fromHDF5 >= (series_average - 3 * series_stdev)) & (
            df_fromHDF5 <= (series_average + 3 * series_stdev)
        )

        df_fromHDF5 = df_fromHDF5.where(condition, np.nan)

        # Trim the DataFrame to the first and last valid indices
        df_trimmed = df_fromHDF5.loc[
            df_fromHDF5.first_valid_index() : df_fromHDF5.last_valid_index()
        ]

        series = df_trimmed["value"]

        # Extract trend and seasonality
        trend, detrended_series, seasonality_info = get_seasonal_and_trend_data(
            series
        )

        # Prepare Sinusoidal Model Inputs
        (
            time_values,
            observed_values,
            amplitudes,
            periods,
            phase_shifts,
            baseline,
        ) = modeling.prepare_sinusoidal_model_inputs(
            time_series_data=detrended_series,
            seasonality_info=seasonality_info.query("Frequency != 0"),
        )

        # Fit Sinusoidal Model and Correct Phase Shift
        fitted_signal = modeling.fit_sinusoidal_model(
            time_values=time_values,
            observed_values=observed_values,
            amplitudes=amplitudes,
            periods=periods,
            phase_shifts=phase_shifts,
            baseline=baseline,
            predict_time=np.arange(len(df_trimmed)),
        )

        # Correct phase shift in the fitted signal
        corrected_signal_series = pd.Series(
            analysis.correct_phase_shift(detrended_series, fitted_signal),
            index=df_trimmed.index,
        )

        # Combine trend and corrected signal to get the modeled series
        df_trimmed["model"] = trend + corrected_signal_series
        df_fromHDF5["model"] = df_fromHDF5.index.map(df_trimmed["model"])

    except Exception as e:
        print(f"Error processing data for {ename}/{wellcode}: {e}")
        # Handle error appropriately (e.g., logging, returning None, etc.)

    return wellcode, df_fromHDF5

In [4]:
# ------------------------------------------------------------------------------
# Main Script
# ------------------------------------------------------------------------------
gwl_hdf5_file = "test.h5"

# List available datasets in the HDF5 file for reference
with h5py.File(gwl_hdf5_file, "r") as hdf5_file:
    available_datasets = gwatertools.h5pytools.list_datasets(hdf5_file)
    available_datasets = [_ for _ in available_datasets if "date" not in _]

# Extract unique station names from available datasets
available_stations = sorted(
    set(dataset.split("/")[0] for dataset in available_datasets)
)

# Create a dictionary mapping each station to its corresponding files to process
file_to_process_dict = {
    station: {
        dataset.split("/")[-1]
        for dataset in available_datasets
        if dataset.startswith(station)
    }
    for station in available_stations
}

In [5]:
updates_dict = {}
error_log = {}

for ename in list(file_to_process_dict.keys())[:1]:
    """
    {
    'sensor_data':{
                'date': new_data,
                'well_1' : np.array(xyz, 2),
                'well_2' : np.array(xyz, 2)
                }
    }
    """

    temp_data = {}

    temp_date_idx = []
    for wellcode in file_to_process_dict[ename]:
        try:
            wellcode, df = process_well_data(ename, wellcode, gwl_hdf5_file)
            temp_data[wellcode] = df.values
            temp_date_idx.extend(df.index.strftime("%Y%m%d").tolist())
        except Exception as e:
            error_log.setdefault(ename, {})[wellcode] = str(e)

    temp_data["date"] = np.array(sorted(set(temp_date_idx)), dtype="S10")
    dict_to_update = {
        ename: {
            "sensor_data": temp_data,
            "metadata": {
                "Updated Date": datetime.now().strftime("%Y/%m/%d %H:%M:%S"),
                "Description": "Model the groundwater level data to reduce noise and fill missing values",
            },
        }
    }
    updates_dict.update(dict_to_update)

In [6]:
# SECTION 5: Apply Updates to HDF5 File
# -------------------------------------
# Use the predefined function to update the HDF5 file with new data and metadata
shutil.copy2(src=gwl_hdf5_file, dst=gwl_hdf5_file.replace(".h5", "_secure.h5"))
gwatertools.h5pytools.update_hdf5(gwl_hdf5_file, updates_dict)


# Save error log if any errors occurred
if error_log:
    error_log_path = "error_log_test.txt"
    with open(error_log_path, "w") as f:
        for ename, errors in error_log.items():
            for wellcode, error_msg in errors.items():
                f.write(f"Error for {ename}/{wellcode}: {error_msg}\n")
    print(
        f"Errors occurred during processing. See error log at: {error_log_path}"
    )

print("Processing completed.")

Processing completed.
