In [1]:
from appgeopy import *
from my_packages import *

# Set matplotlib for inline plotting
%matplotlib inline
plt.style.use("default")  # Optional: clean plot style

In [2]:
sys.path.append(
    r"D:\1000_SCRIPTS\003_Project002\20250222_GTWR001\3_MGTWR\20250415_PCA_Imputation"
)
from pca_imputation import *

In [3]:
select_GPS_fpath = r"D:\1000_SCRIPTS\003_Project002\20250222_GTWR001\1_PrepareDatasets\6_GPS_Vertical\GPS_station_and_filepath.pkl"
with open(select_GPS_fpath, "rb") as f:
    gps_and_fpath_dict = pickle.load(f)

gps_and_fpath_dict

{'ALIS': 'E:\\SUBSIDENCE_PROJECT_DATA\\GPS_2024_HsuYaru\\2__Processed\\A__Fulltime\\ALIS.csv',
 'BDES': 'E:\\SUBSIDENCE_PROJECT_DATA\\GPS_2024_HsuYaru\\2__Processed\\A__Fulltime\\BDES.csv',
 'C001': 'E:\\SUBSIDENCE_PROJECT_DATA\\GPS_2024_HsuYaru\\2__Processed\\A__Fulltime\\C001.csv',
 'C002': 'E:\\SUBSIDENCE_PROJECT_DATA\\GPS_2024_HsuYaru\\2__Processed\\A__Fulltime\\C002.csv',
 'CAOT': 'E:\\SUBSIDENCE_PROJECT_DATA\\GPS_2024_HsuYaru\\2__Processed\\A__Fulltime\\CAOT.csv',
 'CHIA': 'E:\\SUBSIDENCE_PROJECT_DATA\\GPS_2024_HsuYaru\\2__Processed\\A__Fulltime\\CHIA.csv',
 'CHIN': 'E:\\SUBSIDENCE_PROJECT_DATA\\GPS_2024_HsuYaru\\2__Processed\\A__Fulltime\\CHIN.csv',
 'CHIU': 'E:\\SUBSIDENCE_PROJECT_DATA\\GPS_2024_HsuYaru\\2__Processed\\A__Fulltime\\CHIU.csv',
 'CHUA': 'E:\\SUBSIDENCE_PROJECT_DATA\\GPS_2024_HsuYaru\\2__Processed\\A__Fulltime\\CHUA.csv',
 'CHUK': 'E:\\SUBSIDENCE_PROJECT_DATA\\GPS_2024_HsuYaru\\2__Processed\\A__Fulltime\\CHUK.csv',
 'CHYI': 'E:\\SUBSIDENCE_PROJECT_DATA\\GPS_2024_Hs

In [4]:
insar_df = pd.read_pickle(
    r"D:\1000_SCRIPTS\003_Project002\20250222_GTWR001\1_PrepareDatasets\3_InSAR\ASC_overlap_desc_+pointkey_TWD97.pkl",
    compression="zip",
)
insar_df.columns[:10]

Index(['CODE', 'PointKey', 'HEIGHT', 'H_STDEV', 'VEL', 'V_STDEV', 'COHERENCE',
       'EFF_AREA', 'D20160414', 'D20160508'],
      dtype='object')

In [5]:
# ================================================================
# GPS DATA PROCESSING WITH PCA SIGNAL EXTRACTION
# ================================================================

# ----------------------------------------------------------------
# SETUP: Create output directory
# ----------------------------------------------------------------
savefolder = "PCA_imputed_GPS"

if not os.path.exists(savefolder):
    os.makedirs(savefolder)
    print(f"Created output directory: {savefolder}")

# ----------------------------------------------------------------
# MAIN PROCESSING LOOP: Process each GPS station
# ----------------------------------------------------------------
all_stations = list(gps_and_fpath_dict.keys())

for select_station in tqdm(all_stations[:], desc="Processing GPS stations"):
    try:
        # print(f"\nProcessing station: {select_station}")

        # ----------------------------------------------------------------
        # DATA LOADING: Load GPS data for current station
        # ----------------------------------------------------------------
        fpath_byStation = gps_and_fpath_dict[select_station]

        GPS_df_byStation = pd.read_csv(
            fpath_byStation, parse_dates=[0], index_col=[0]
        )

        # ----------------------------------------------------------------
        # DATA PREPROCESSING: Time subset and column naming
        # ----------------------------------------------------------------
        # Extract time subset (2016-4-14 to 2022-1)
        subset_byStation = GPS_df_byStation.loc["2016-4-14":"2022-1"]

        # Update column names: replace 'm' with 'mm' for clarity
        colnames = GPS_df_byStation.columns
        new_colnames = [col.replace("m", "mm") for col in colnames]
        subset_byStation.columns = new_colnames

        # print(f"  Data shape: {subset_byStation.shape}")
        # print(f"  Columns: {list(subset_byStation.columns)}")

        # ----------------------------------------------------------------
        # PCA SIGNAL EXTRACTION: Apply to each coordinate column
        # ----------------------------------------------------------------
        for col in subset_byStation.columns:
            # print(f"  Processing column: {col}")

            # Extract time series for current coordinate
            gps_series = subset_byStation.loc[:, col]
            series_name = gps_series.name

            # Create new column name for PCA results
            series_newname = series_name.replace("(", "_pca(")

            # Apply PCA signal extraction
            results = run_fundamental_signal_extraction(
                data=gps_series,
                signal_strength="strong",  # Extract only major trends/patterns
            )

            # Extract fundamental signal as pandas Series
            fundamental_signal = pd.Series(
                results["signal_results"]["fundamental_signal"],
                index=results["signal_results"]["time_values"],
            )

            # Add PCA results to dataframe
            subset_byStation[series_newname] = subset_byStation.index.map(
                fundamental_signal
            )

        # ----------------------------------------------------------------
        # DATA NORMALIZATION: Remove initial offset and convert to mm
        # ----------------------------------------------------------------
        # Subtract first row values (remove initial position) and convert to mm
        subset_byStation = (
            subset_byStation.subtract(subset_byStation.iloc[0, :]) * 1000
        )

        # ----------------------------------------------------------------
        # SAVE RESULTS: Export processed data
        # ----------------------------------------------------------------
        output_filepath = os.path.join(savefolder, f"{select_station}_pca.csv")
        subset_byStation.to_csv(output_filepath, index=True)
        # print(f"  Saved processed data: {output_filepath}")

        # ----------------------------------------------------------------
        # VISUALIZATION: Create comparison plots
        # ----------------------------------------------------------------
        fig, axes = plt.subplots(3, 1, figsize=(12, 4), sharex=True)

        # Plot original vs PCA-processed data for each coordinate (X, Y, Z)
        for col_idx in [0, 1, 2]:
            ax = axes[col_idx]

            # Original data (columns 0, 1, 2)
            original_col = subset_byStation.iloc[:, col_idx]
            ax.plot(
                original_col, label=f"Original {original_col.name}", alpha=0.7
            )

            # PCA-processed data (columns 3, 4, 5)
            pca_col = subset_byStation.iloc[:, col_idx + 3]
            ax.plot(pca_col, label=f"PCA {pca_col.name}", linewidth=2)

            ax.legend()
            ax.grid(True, alpha=0.3)
            ax.set_ylabel("Displacement (mm)")

        # Set plot title and labels
        fig.suptitle(f"GPS Station: {select_station} - Original vs PCA Signals")
        axes[-1].set_xlabel("Time")

        # ----------------------------------------------------------------
        # SAVE PLOTS: Export visualization
        # ----------------------------------------------------------------
        plot_filepath = os.path.join(savefolder, f"{select_station}_fig.png")
        visualize.save_figure(fig=fig, savepath=plot_filepath)
        plt.close()

        print(f"  Saved plot: {plot_filepath}")
    except Exception as e:
        print(e)
        pass

print(f"\n{'='*50}")
print("GPS processing complete!")
print(f"Results saved in: {savefolder}")
print(f"{'='*50}")

Processing GPS stations:   0%|          | 0/112 [00:00<?, ?it/s]

  Saved plot: PCA_imputed_GPS\ALIS_fig.png
  Saved plot: PCA_imputed_GPS\BDES_fig.png
  Saved plot: PCA_imputed_GPS\C001_fig.png
  Saved plot: PCA_imputed_GPS\C002_fig.png
  Saved plot: PCA_imputed_GPS\CAOT_fig.png
  Saved plot: PCA_imputed_GPS\CHIA_fig.png
  Saved plot: PCA_imputed_GPS\CHIN_fig.png
  Saved plot: PCA_imputed_GPS\CHIU_fig.png
  Saved plot: PCA_imputed_GPS\CHUA_fig.png
  Saved plot: PCA_imputed_GPS\CHUK_fig.png
  Saved plot: PCA_imputed_GPS\CHYI_fig.png
  Saved plot: PCA_imputed_GPS\CWEN_fig.png
  Saved plot: PCA_imputed_GPS\DNAN_fig.png
  Saved plot: PCA_imputed_GPS\DOSH_fig.png
  Saved plot: PCA_imputed_GPS\DPIN_fig.png
  Saved plot: PCA_imputed_GPS\ERLN_fig.png
Parameter search failed: No valid parameter combinations found, using conservative defaults
Adjusting embedding dimension to 1.
negative dimensions are not allowed
  Saved plot: PCA_imputed_GPS\FKDO_fig.png
  Saved plot: PCA_imputed_GPS\FNGU_fig.png
  Saved plot: PCA_imputed_GPS\FUNY_fig.png
  Saved plot: PCA_i