In [1]:
from appgeopy import *
from my_packages import *

In [2]:
# ------------------------------------------------------------------------------
# Define Functions for Repeated Operations
# ------------------------------------------------------------------------------
def get_seasonal_and_trend_data(series):
    """Extract trend and seasonal components from a series."""
    numeric_time_idx = datetime_handle.numeric_time_index(series)
    finite_values = series[~series.isnull()].values

    # Polynomial trend
    trend, _ = analysis.get_polynomial_trend(
        x=numeric_time_idx,
        y=finite_values,
        order=1,
        x_estimate=np.arange(len(series)),
    )
    trend.index = series.index

    # Detrend Data
    detrended_series = series - trend

    # Seasonality Analysis
    seasonality_info = analysis.find_seasonality(
        time_series_data=detrended_series
    )
    seasonality_info = seasonality_info[seasonality_info["Period (days)"] > 7]
    seasonality_info = seasonality_info.nlargest(n=30, columns="Amplitude")

    return trend, detrended_series, seasonality_info

In [3]:
# ------------------------------------------------------------------------------
# Main Script
# ------------------------------------------------------------------------------
gwl_hdf5_file = "20240826_GWL_CRFP_until20240820.h5"

# List available datasets in the HDF5 file for reference
with h5py.File(gwl_hdf5_file, "r") as hdf5_file:
    available_datasets = gwatertools.h5pytools.list_datasets(hdf5_file)
    available_datasets = [_ for _ in available_datasets if "date" not in _]

# Extract unique station names from available datasets
available_stations = sorted(
    set(dataset.split("/")[0] for dataset in available_datasets)
)

# Create a dictionary mapping each station to its corresponding files to process
file_to_process_dict = {
    station: {
        dataset.split("/")[-1]
        for dataset in available_datasets
        if dataset.startswith(station)
    }
    for station in available_stations
}

In [4]:
ename = 'DAQI'
wellcode = "10050221"
df_fromHDF5 = gwatertools.h5pytools.export_data_to_dataframe(
    file_name=gwl_hdf5_file,
    location_name=ename,
    sensor_name=wellcode,
).set_index("datetime")

In [6]:
# Calculate mean and standard deviation
series_average = np.nanmean(df_fromHDF5)
series_stdev = np.nanstd(df_fromHDF5)

# Remove outliers beyond 3 standard deviations
condition = (df_fromHDF5 >= (series_average - 3 * series_stdev)) & (df_fromHDF5 <= (series_average + 3 * series_stdev))

df_fromHDF5 = df_fromHDF5.where(condition, np.nan)

df_trimmed = df_fromHDF5.loc[
    df_fromHDF5.first_valid_index() : df_fromHDF5.last_valid_index()
]

series = df_trimmed["value"]

In [None]:
# Trim the DataFrame to the first and last valid indices


df_trimmed = df_fromHDF5.loc[
    df_fromHDF5.first_valid_index() : df_fromHDF5.last_valid_index()
]

series = df_trimmed["value"]

# Extract trend and seasonality
trend, detrended_series, seasonality_info = get_seasonal_and_trend_data(series)

seasonality_info

In [None]:
# Prepare Sinusoidal Model Inputs
time_values, observed_values, amplitudes, periods, phase_shifts, baseline = modeling.prepare_sinusoidal_model_inputs(
    time_series_data=detrended_series,
    seasonality_info=seasonality_info.query("Frequency != 0"),
)