In [1]:
from appgeopy import *
from my_packages import *
from tools import *

In [2]:
def get_seasonal_and_trend_data(series):
    """
    Extract trend and seasonal components from time series data.

    This function performs time series decomposition by:
    1. Creating a complete timeline (filling gaps)
    2. Extracting the long-term trend
    3. Removing trend to identify repeating cycles
    4. Filtering meaningful seasonal patterns

    Args:
        series: Input time series with datetime index (possibly with gaps)

    Returns:
        trend: Long-term linear trend component
        detrended_series: Series with trend removed
        seasonality_info: DataFrame with identified seasonal patterns
    """
    # Generate continuous timeline covering the entire data period
    # (Creates dates for every day even if original data has gaps)
    fulltime_arr = datetime_handle.get_fulltime(series.index)

    # Fill gaps in original data to create complete time series
    # (This helps identify patterns that might be obscured by missing values)
    complete_series = datetime_handle.fulltime_table(series, fulltime_arr)

    # Calculate linear trend component (general upward/downward movement)
    # First returned value is trend, second (ignored) is coefficients
    trend, _ = analysis.get_linear_trend(complete_series)
    trend.index = complete_series.index  # Ensure trend has same timeline as data

    # Remove trend from data to isolate cyclical components
    # (Subtract trend line from data to see repeating patterns more clearly)
    detrended_series = complete_series - trend

    # Identify repeating cycles in the detrended data
    # (Uses Fourier analysis to find frequencies, periods, and amplitudes)
    seasonality_info = analysis.find_seasonality(time_series_data=detrended_series)

    # Filter out noise and keep only meaningful seasonal patterns:
    # 1. Keep cycles longer than weekly (> 7 days)
    # 2. Remove infinite periods (non-cycles)
    # 3. Remove zero-frequency components (constants)
    seasonality_info = seasonality_info[
        (seasonality_info["Period (days)"] > 7)
        & (seasonality_info["Period (days)"] != np.inf)
        & (seasonality_info["Frequency"] != 0)
    ]

    # Round values for cleaner results
    seasonality_info["Amplitude"] = seasonality_info["Amplitude"].round(2)  # Size of cycle
    seasonality_info["Period (days)"] = seasonality_info["Period (days)"].round(0)  # Length of cycle
    seasonality_info["Phase"] = seasonality_info["Phase"].round(2)  # Starting position of cycle

    # Keep only the 50 strongest seasonal patterns (by amplitude)
    seasonality_info = seasonality_info.nlargest(n=50, columns="Amplitude")

    return trend, detrended_series, seasonality_info


def model_timeseries(timeseries):
    """
    Create a mathematical model of a time series by decomposing it into trend and seasonal components.

    This function takes raw time series data, separates trend from cycles,
    applies sinusoidal modeling, and performs phase correction to create
    an accurate mathematical representation of the data.
    """
    # Decompose time series into trend, detrended data, and seasonality information
    trend, detrend, seasonality_info = get_seasonal_and_trend_data(timeseries)

    # Extract parameters needed for sinusoidal modeling from detrended data
    # (time points, observed values, amplitudes of cycles, periods, phase shifts, and baseline)
    (
        time_values,
        observed_values,
        amplitudes,
        periods,
        phase_shifts,
        baseline,
    ) = modeling.prepare_sinusoidal_model_inputs(
        time_series_data=detrend,
        seasonality_info=seasonality_info,
    )

    # Fit combined sinusoidal waves to the detrended data
    # This creates a mathematical representation of all cyclical patterns
    fitted = modeling.fit_sinusoidal_model(
        time_values=time_values,
        observed_values=observed_values,
        amplitudes=amplitudes,
        periods=periods,
        phase_shifts=phase_shifts,
        baseline=baseline,
        predict_time=np.arange(len(detrend)),  # Generate predictions for all time points
    )

    # Adjust the timing of cycles to better match actual data patterns
    # Creates a pandas Series with corrected phase shifts using the same index as detrend
    corrected_phase = pd.Series(
        analysis.correct_phase_shift(detrend, fitted),
        index=detrend.index,
    )

    # Combine the long-term trend with phase-corrected cycles to create complete model
    modeled = trend + corrected_phase

    # Subset the modeled data to match the original time series timepoints
    # (This handles cases where the original data might have missing values)
    modeled_timeseries = modeled.loc[timeseries.index]

    return modeled_timeseries


def show_elapsed_time(start, end):
    elapsed_seconds = round(end - start, 2)  # Compute total elapsed time in seconds
    hours = int(elapsed_seconds // 3600)
    minutes = int((elapsed_seconds % 3600) // 60)
    seconds = int(elapsed_seconds % 60)
    print(f"Processing Time: {hours} hours {minutes} minutes {seconds} seconds")

In [3]:
h5_fpath = "20250307_MLCW_CRFP_v7.h5"
# initiate MLCW class object
mlcw_obj = MLCW(h5_fpath=h5_fpath)
mlcw_data, mlcw_metadata = mlcw_obj.get_data()
available_stations = mlcw_obj.list_stations()
available_stations[:5]

FileNotFoundError: [Errno 2] Unable to synchronously open file (unable to open file: name = '220250307_MLCW_CRFP_v7.h5', errno = 2, error message = 'No such file or directory', flags = 0, o_flags = 0)

#### Modeling MLCW data

In [None]:
start = time.time()
# select_station = available_stations[0]
for select_station in tqdm(available_stations, desc="Modeling ring records", position=0, leave=True):

    # Extract station code from metadata using split operation
    station_wellcode = mlcw_metadata[select_station]["LandSubsidenceMonitoringWellIdentifier"].split("_")[-1]
    
    original_mlcw = mlcw_obj.build_dataframe(station=select_station, value_type="original")
    original_mlcw = original_mlcw.T * 1000
    original_mlcw = original_mlcw.sort_index()
    
    original_mlcw = original_mlcw.loc[:"2022", :]
    
    colnames = original_mlcw.columns
    
    model_original_mlcw = pd.DataFrame(data=None, index=original_mlcw.index)
    
    for col in tqdm(colnames, desc=f"Processing {select_station}", position=1, leave=False):
        original_mlcw_byRing = original_mlcw.loc[:, col]
    
        start_idx = original_mlcw_byRing.first_valid_index()
        end_idx = original_mlcw_byRing.last_valid_index()
    
        original_mlcw_byRing = original_mlcw_byRing.loc[start_idx:end_idx]
    
        modeled_original_mlcw_byRing = model_timeseries(original_mlcw_byRing)
        modeled_original_mlcw_byRing.name = col
        modeled_original_mlcw_byRing = modeled_original_mlcw_byRing / 1000
        model_original_mlcw[col] = model_original_mlcw.index.map(modeled_original_mlcw_byRing)
    
    mlcw_data[select_station]["values"]["model_original"] = model_original_mlcw.T.to_numpy()

end = time.time()
show_elapsed_time(start, end)

In [None]:
today_string = datetime.now().strftime("%Y%m%d")

mlcw_metadata["Description_3"] = (
    "2025/03/08: Model the ring readings using sinusoidal fitting function because they also show the sinusoidal patterns"
)

# Write updated data and metadata back to the HDF5 file
with h5py.File(f"{today_string}_MLCW_CRFP_v8.h5", "w") as hdf5_file:
    gwatertools.h5pytools.data_to_hdf5(hdf5_file, mlcw_data)
    gwatertools.h5pytools.metadata_to_hdf5(hdf5_file, mlcw_metadata)

#### Plot MLCW data ring by ring

#### Plot MLCW measurent differecing --> detect weird changes

#### Export original file to Excel