In [1]:
"""
Script for Analyzing GPS Data Trends and Seasonality

This script processes GPS data to identify and remove trends, analyze seasonality, and correct phase shifts.
The workflow includes:
- Loading data from an Excel file
- Removing trends using polynomial fitting
- Analyzing seasonality using Fourier Transform
- Fitting a sinusoidal model to the detrended data
- Correcting phase shifts in the reconstructed signal

Author: David
Date: 2024/07/02
"""

'\nScript for Analyzing GPS Data Trends and Seasonality\n\nThis script processes GPS data to identify and remove trends, analyze seasonality, and correct phase shifts.\nThe workflow includes:\n- Loading data from an Excel file\n- Removing trends using polynomial fitting\n- Analyzing seasonality using Fourier Transform\n- Fitting a sinusoidal model to the detrended data\n- Correcting phase shifts in the reconstructed signal\n\nAuthor: David\nDate: 2024/07/02\n'

In [2]:
from appgeopy import *
from my_packages import *

# ------------------------------------------------------------------------------
# Program Constants and Paths
# ------------------------------------------------------------------------------
# Top-level folder containing the input and output data
topfolder = r"E:\SUBSIDENCE_PROJECT_DATA\GPS_2023_YehDaGang\2023_NEW_20240515\20240527_PROCESSED_DATA\_MERGED"

# Path to the Excel file containing the GPS data
fpath = os.path.join(topfolder, "1__Datetime_Modified\GPS_2022_2023_merged_mod_v3.xlsx")

# Path for saving the processed data
savefolder = os.path.join(topfolder, "4__Modeled")
savename = r"GPS_2022_2023_model_v4.xlsx"
savepath = os.path.join(savefolder, savename)

# Ensure the save directory exists
os.makedirs(savefolder, exist_ok=True)

# Remove existing output file if it exists
if os.path.isfile(savepath):
    os.remove(savepath)

# Get sheet names from the Excel file
try:
    available_sheetnames = data_io.get_sheetnames(fpath)
except Exception as e:
    print(f"Error reading sheet names from '{fpath}': {e}")
    exit(1)  # Exit the program if sheet names cannot be read

# Initialize error log
error_log = []

# ------------------------------------------------------------------------------
# Main Processing Loop
# ------------------------------------------------------------------------------
for select_sheetname in tqdm(available_sheetnames):
    try:
        # Load data from a specific sheet
        df = pd.read_excel(fpath, sheet_name=select_sheetname, parse_dates=[0], index_col=[0])
    except Exception as e:
        error_log.append(f"Error reading sheet '{select_sheetname}' from '{fpath}': {e}")
        continue

    try:
        # Preprocessing: Trim DataFrame to First and Last Valid Index
        df_trimmed = df.loc[df.first_valid_index():df.last_valid_index()].copy()

        for select_col in df_trimmed.columns:
            try:
                # Polynomial Trend Removal
                gps_series = df_trimmed[select_col]
                numeric_time_idx = datetime_handle.numeric_time_index(gps_series)
                finite_gps_values = gps_series[~gps_series.isnull()].values

                gps_trend, _ = get_polynomial_trend(
                    x=numeric_time_idx,
                    y=finite_gps_values,
                    order=3,
                    x_estimate=np.arange(len(gps_series)),
                )
                gps_trend.index = df_trimmed.index

                # Detrend Data
                gps_detrend = gps_series - gps_trend

                # Seasonality Analysis
                seasonality_info = analysis.find_seasonality(time_series_data=gps_detrend)
                seasonality_info = seasonality_info.nlargest(n=50, columns="Amplitude")

                # Prepare Sinusoidal Model Inputs
                (
                    time_values,
                    observed_values,
                    amplitudes,
                    periods,
                    phase_shifts,
                    baseline,
                ) = modeling.prepare_sinusoidal_model_inputs(
                    time_series_data=gps_detrend,
                    seasonality_info=seasonality_info.query("Frequency != 0"),
                )

                # Fit Sinusoidal Model and Correct Phase Shift
                fitted_signal = modeling.fit_sinusoidal_model(
                    time_values=time_values,
                    observed_values=observed_values,
                    amplitudes=amplitudes,
                    periods=periods,
                    phase_shifts=phase_shifts,
                    baseline=baseline,
                    predict_time=np.arange(len(df_trimmed)),
                )

                corrected_signal = analysis.correct_phase_shift(
                    original_data=gps_detrend, reconstructed_series=fitted_signal
                )

                corrected_signal_series = pd.Series(corrected_signal, index=df_trimmed.index)

                # Add the trend back to the corrected signal
                gps_model = gps_trend + corrected_signal_series

                # Map the modeled signal back to the original time series index
                df["model_" + select_col] = df.index.map(gps_model)
            
            except Exception as e:
                error_log.append(f"Error processing column '{select_col}' in sheet '{select_sheetname}': {e}")
                continue
        
    except Exception as e:
        error_log.append(f"Error processing sheet '{select_sheetname}': {e}")
        continue

    # Save the processed DataFrame to Excel
    try:
        data_io.save_df_to_excel(df_to_save=df, filepath=savepath, sheet_name=select_sheetname, index=True, verbose=False)
    except Exception as e:
        error_log.append(f"Error saving sheet '{select_sheetname}' to '{savepath}': {e}")

# ------------------------------------------------------------------------------
# Error Logging
# ------------------------------------------------------------------------------
# Save error log if any errors occurred
if error_log:
    error_log_path = os.path.join(savefolder, "error_log.txt")
    with open(error_log_path, 'w') as f:
        for error in error_log:
            f.write(f"{error}\n")
    print(f"Errors occurred during processing. See error log at: {error_log_path}")


100%|██████████████████████████████████████████████████████████████████████████████████| 55/55 [06:57<00:00,  7.60s/it]
