In [1]:
from appgeopy import *
from my_packages import *

#### I want to check the corresponding CSV files of the GPS stations

#### Search for GPS stations which have surrounding PS points

#### Compare GPS and InSAR

now we have the GPS data and we know which GPS stations have surrounding points (within 1-km radius), we can perform the comparison now

In [2]:
dU_timeseries = pd.read_pickle(
    r"D:\1000_SCRIPTS\003_Project002\20251111_GTWR003\1_PrepareDatasets\Leveling\CORRECTED_Monthly_CUMDISP_saveqgis_Oct2025_updateNov.xz"
)

try:
    dU_timeseries = dU_timeseries.set_index("PointKey")
except:
    pass

dU_timeseries = dU_timeseries.drop(labels=["X_TWD97", "Y_TWD97"], axis=1)

# gps_file_location = r"D:\1000_SCRIPTS\003_Project002\20250222_GTWR001\1_PrepareDatasets\6_GPS_Vertical\GPS_station_and_filepath.pkl"
gps_file_location = r"GPS_station_and_filepath_Oct2025.pkl"

with open(gps_file_location, "rb") as f:
    gps_and_fpath_dict = pickle.load(f)

points_around_gps = pd.read_pickle(r"InSAR_points_around_GPS_200m_v3.xz")

#### GPS-InSAR Comparison Module

This script processes and compares GPS and InSAR time series data for multiple stations.
It calculates displacement trends, generates comparative visualizations, and exports results.

Key operations:
1. Iterates through available stations present in both datasets
2. Extracts and processes time series data for each station
3. Calculates and compares displacement velocities
4. Generates and saves visualization figures
5. Maintains a results cache for further analysis

In [3]:
# Dictionary to store velocity comparison results across all stations
insar_gps_slope_cache = {
    "STATION": [],
    "InSAR_slope_mmyr": [],
    "GPS_slope_mmyr": [],
}

# Output directory for saving generated figures
fig_savefolder = r"GPS_InSAR_figs_v3/"

if not os.path.exists(fig_savefolder):
    os.makedirs(fig_savefolder)

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# DATA SELECTION AND PREPARATION
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

# Identify stations that exist in both point dataset and GPS filepath dictionary
# Sort for consistent processing order
available_stations = sorted(
    set(points_around_gps["STATION"].unique()).intersection(
        set(gps_and_fpath_dict)
    )
)

# Process each station with progress tracking via tqdm
# Previous single-station approach is commented out
# select_station = available_stations[0]
for select_station in tqdm(available_stations[:]):
    try:

        # Filter points dataset to include only points surrounding current station
        points_byStation = points_around_gps.query("STATION==@select_station")

        # Extract point identifiers for the selected station's vicinity
        pointkey_points_byStation = points_byStation["PointKey"].tolist()

        # Retrieve displacement time series for the identified points
        dU_byStation = dU_timeseries.loc[pointkey_points_byStation, :]

        # Convert column headers to datetime objects
        # Format assumes date string with leading character (e.g., T20180101)
        time_arr = pd.to_datetime(
            [col[1:] for col in dU_byStation.columns if col.startswith("D")]
        )

        # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
        # GPS DATA PROCESSING
        # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

        # Retrieve file path for the current GPS station
        gps_fpath_byStation = gps_and_fpath_dict[select_station]

        # Load GPS data with datetime indexing
        gps_df_byStation = pd.read_csv(
            gps_fpath_byStation, parse_dates=[0], index_col=[0]
        )

        # Convert to millimeters if values are in meters (common in GPS data)
        # Using min value instead of max for more robust unit detection
        if abs(gps_df_byStation.iloc[:, 2]).max() < 1:
            gps_df_byStation = gps_df_byStation * 1000

        # Subset GPS data to match InSAR time period
        subset_gps_df_byStation = gps_df_byStation.loc[time_arr[0] :, :]

        # Continue processing only if GPS data exists for this time period
        if len(subset_gps_df_byStation) > 0:

            # Normalize displacements relative to first observation
            subset_gps_df_byStation = subset_gps_df_byStation.subtract(
                subset_gps_df_byStation.iloc[0, :], axis=1
            )

            # Extract vertical displacement component
            subset_gps_dU = subset_gps_df_byStation.iloc[:, 2]

            # Optional smoothing operation (preserved for future use)
            # moving_average_subset_gps_dU = pd.Series(
            #     smoothing.simple_moving_average(subset_gps_dU, window_size=15),
            #     index=subset_gps_dU.index,
            # )

            # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
            # TIME SERIES ANALYSIS
            # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

            # Assign datetime labels to InSAR time series
            dU_byStation.columns = time_arr

            # Convert InSAR data to time-indexed format
            # Using helper function to ensure complete time series formatting
            insar_arr_all = dataframe_handle.convert_to_fulltime(dU_byStation.T)

            # Calculate average displacement across all InSAR points
            insar_arr_average = insar_arr_all.mean(axis=1)
            insar_arr_average = insar_arr_average - insar_arr_average.iloc[0]

            # Calculate linear trend and velocity for InSAR data
            insar_trend, insar_slope = analysis.get_linear_trend(
                insar_arr_average
            )

            # Calculate linear trend and velocity for GPS data
            # Using raw GPS data instead of smoothed version
            gps_trend, gps_slope = analysis.get_linear_trend(subset_gps_dU)
            # Alternative using smoothed GPS data (preserved for future use)
            # gps_trend, gps_slope = analysis.get_linear_trend(moving_average_subset_gps_dU)

            # Store velocity results in cache for cross-station analysis
            insar_gps_slope_cache["STATION"].append(select_station)
            insar_gps_slope_cache["InSAR_slope_mmyr"].append(
                insar_slope * 365.25
            )
            insar_gps_slope_cache["GPS_slope_mmyr"].append(gps_slope * 365.25)

            # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
            # VISUALIZATION SETUP
            # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

            # Create figure with A4 landscape proportions (2/3 height ratio)
            fig = plt.figure(figsize=(11.7, 8.3 * 2 / 3))

            # Configure single plot area
            ax = fig.add_subplot(111)

            # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
            # PLOTTING RESULTS
            # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

            # Plot all individual InSAR points as background reference
            ax.plot(
                insar_arr_all,
                color="lightgrey",
                marker="o",
                linestyle="--",
                alpha=0.1,
                markersize=6,
                markevery=2,
            )

            # Plot raw GPS displacement time series
            ax.plot(
                subset_gps_dU,
                color="deepskyblue",
                label="GPS",
                marker="x",
                markersize=4,
                linestyle=" ",
            )
            # Alternative smoothed GPS plot (preserved for future use)
            # ax.plot(moving_average_subset_gps_dU, color="limegreen", label="GPS")

            # Plot GPS linear trend line
            ax.plot(gps_trend, color="navy", linewidth=1)

            # Plot average InSAR time series with markers
            # Using dropna() to ensure clean plotting without gaps
            ax.plot(
                insar_arr_average.dropna(),
                color="orangered",
                label="Average InSAR",
                marker="o",
                linestyle="--",
                markersize=6,
                markevery=2,
            )

            # Plot InSAR linear trend line
            ax.plot(insar_trend, color="darkred", linewidth=1)

            # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
            # GRAPH CUSTOMIZATION AND ANNOTATION
            # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

            # Annotate InSAR velocity (converted to mm/year)
            ax.text(
                x=0.7,
                y=0.975,
                s=r"$\bar{v}_{InSAR}$" + f"={insar_slope*365.25:.2f} mm/yr",
                transform=ax.transAxes,
                fontsize=14,
                color="darkred",
            )

            # Annotate GPS velocity (converted to mm/year)
            ax.text(
                x=0.7,
                y=0.9,
                s=r"$\bar{v}_{GPS}$" + f"={gps_slope*365.25:.2f} mm/yr",
                transform=ax.transAxes,
                fontsize=14,
                color="navy",
            )

            # Annotate number of InSAR points used in analysis
            ax.text(
                x=0.025,
                y=0.2,
                s=f"{insar_arr_all.shape[1]} InSAR points",
                transform=ax.transAxes,
                fontsize=14,
            )

            # Configure axis formatting and labeling
            visualize.configure_axis(
                ax=ax,
                ylabel="Cumulative\nDisplacement (mm)",
                title=select_station,
                hide_spines=["right", "top"],
                tick_direction="out",
            )

            # Optional tick interval configuration (preserved for future use)
            # visualize.configure_ticks(ax=ax, y_major_interval=rounded_interval)

            # Configure legend with optimal auto-placement
            visualize.configure_legend(
                ax=ax,
                loc="lower left",
                handletextpad=0.4,
                labelspacing=0.1,
                handlelength=1.25,
                fontsize_base=14,
            )

            # Configure date axis with yearly tick marks
            visualize.configure_datetime_ticks(
                ax=ax,
                major_interval=6,  # Change from 12 to 6
                minor_interval=3,
                fontsize=14,
                grid=False,
                start_date=datetime(2016, 1, 1),
                end_date=datetime(2026, 1, 1),
            )

            # Add horizontal grid lines for readability
            ax.grid(which="major", axis="y")

            # Rotate date labels to prevent overlap
            fig.autofmt_xdate(ha="center", rotation=90)

            # Save figure to designated output directory with station-specific filename
            visualize.save_figure(
                fig=fig,
                savepath=os.path.join(
                    fig_savefolder, f"{select_station}_compare.png"
                ),
            )

            # Close figure to free memory during batch processing
            plt.close()
    except Exception as e:
        print(e)
        print(select_station)
        pass

  0%|          | 0/34 [00:00<?, ?it/s]

Found array with 0 sample(s) (shape=(0, 1)) while a minimum of 1 is required by RANSACRegressor.
GS27
Found array with 0 sample(s) (shape=(0, 1)) while a minimum of 1 is required by RANSACRegressor.
GS68
