In [1]:
from appgeopy import *
from my_packages import *

1. search for leveling benchmarks located within xyz (m) surrounding GPS stations, xyz ~ 1 km
2. extract `alignment periods` between `leveling` and `GPS` data sets
3. compare **average linear velocities** between `GPS` and `leveling` data sets

In [2]:
# ----> gps shapefile
gps_df = gpd.read_file(r"E:\006_InSAR_Project_2022\GPSDataWorkspace\Selected_ActiveGPS_4Calibration_20220609.shp")
# ----> leveling shapefile
leveling_df = gpd.read_file(
    r"E:\SUBSIDENCE_PROJECT_DATA\地陷資料整理\水準點\LevelingBenchmarks_DavidNCU\shapefiles\leveling_benchmark_location.shp"
)
# ----> buffer radius
buffer_radius = 200
# ----> extract leveling points surrounding GPS stations
results = [
    geospatial.find_point_neighbors(
        central_point=gps_df.iloc[i, :],
        target_points_gdf=leveling_df,
        central_key_column="STATION",
        buffer_radius=buffer_radius,
    )
    for i in range(len(gps_df))
]

result_gdf = pd.concat(results, ignore_index=True)
result_gdf.head(3)

Unnamed: 0,樁號,中英對,點名,鄉鎮市,97縱座,97橫座,備註,始有記,最終記,id,STATION,geometry
0,井BD,JBD,布袋國小,布袋鎮,2586729.0,165431.0,,1998-12-15,2019-06-15,214.0,BDES,POINT (165431.000 2586729.000)
1,CYBDWW12,CYBDWW12,布袋水位站,布袋鎮,2586679.0,165348.0,,2010-08-15,2019-06-15,227.0,BDES,POINT (165348.000 2586679.000)
2,內部031,NB031,西港國小,大城鄉,2639747.0,177579.0,104年遺失,2000-06-15,2014-06-15,455.0,CHSG,POINT (177579.000 2639747.000)


In [3]:
def prepare_dictionary(df, output_date_colname="date", output_value_colname="value"):
    return {output_date_colname: df.index.strftime("%Y%m%d").to_numpy(), output_value_colname: df.values.flatten()}

In [4]:
# Define paths to folders containing GPS and leveling data
gps_folder = (
    r"E:\GPSData+4Calibration+PSInSAR\AVAILABLE_4CALIBRATION\GPS_nearby"  # Path to the folder containing GPS data
)
leveling_folder = r"E:\SUBSIDENCE_PROJECT_DATA\website_LandSubsidence-wra-gov-tw\Leveling_Download_20240722"  # Path to the folder containing leveling data

# Initialize a list to store processed data for each station
cache = []

cache2 = {
    "STATION": [],
    "GPS(mm/year)": [],
    "Leveling(mm/year)": [],
}

# Extract a list of unique station names from the result_gdf DataFrame
available_stations = result_gdf["STATION"].unique().tolist()

# Iterate through each available station to process the data
for select_station in tqdm(available_stations):
    # Extract rows from the 'result_gdf' DataFrame that match the current selected station
    gdf_bySTATION = result_gdf.query("STATION == @select_station")

    # Load GPS data for the selected station from CSV, using 'Datetime' and 'dU(mm)' columns
    gps_fpath = os.path.join(gps_folder, select_station + ".csv")
    if not os.path.isfile(gps_fpath):
        continue  # Skip this station if GPS data is not available

    # Read the GPS data
    gps_data_arr = pd.read_csv(
        gps_fpath,
        usecols=["Datetime", "dU(mm)"],
        parse_dates=["Datetime"],
        index_col=["Datetime"],
    )

    gps_data_arr = gps_data_arr.interpolate(method="piecewise_polynomial")

    # Initialize lists to store pairs of station benchmarks and their respective velocities
    station_benchmark_pairs = []
    velocity_pairs = []

    # Track if there's at least one valid leveling benchmark for this station
    has_valid_leveling_benchmark = False

    # Iterate through each leveling benchmark associated with the selected GPS station
    for idx, row in gdf_bySTATION.iterrows():
        loc_code, loc_name = row[["樁號", "點名"]]

        # Search for the corresponding leveling data file for the selected location code and name in the leveling folder
        leveling_files = glob(os.path.join(leveling_folder, "*", f"{loc_code}*{loc_name}*.xlsx"))
        if not leveling_files:
            continue  # Skip if no matching leveling file is found

        # Assuming only one file is expected, take the first match
        leveling_fpath_byCODENAME = leveling_files[0]

        try:
            # Load the leveling data from the identified Excel file
            leveling_data_arr = pd.read_excel(leveling_fpath_byCODENAME, usecols=[1, 2], parse_dates=[0], index_col=[0])
        except Exception:
            continue  # Skip in case of file read errors

        if len(leveling_data_arr) <= 3:
            continue  # Skip if there are insufficient data points

        # Set the flag indicating this station has valid leveling benchmarks
        has_valid_leveling_benchmark = True

        # Append the leveling data to the cache list
        cache.append(
            {
                select_station: {
                    "LevelingList": {f"{loc_code}_{loc_name}": prepare_dictionary(leveling_data_arr, "date", "h(m)")}
                }
            }
        )

        # Determine the overlapping date range shared by both GPS and leveling data
        start_idx_date = max(gps_data_arr.first_valid_index(), leveling_data_arr.first_valid_index())
        end_idx_date = max(gps_data_arr.last_valid_index(), leveling_data_arr.last_valid_index())

        aligned_gps_arr = gps_data_arr.loc[start_idx_date:end_idx_date]
        aligned_leveling_arr = leveling_data_arr.loc[start_idx_date:end_idx_date]

        leveling_years = aligned_leveling_arr.index.year.unique().size
        if leveling_years < 3:
            continue  # Skip if the leveling data is insufficient

        # Normalize data
        aligned_gps_arr -= aligned_gps_arr.iloc[0, 0]
        aligned_leveling_arr = (aligned_leveling_arr - aligned_leveling_arr.iloc[0, 0]) * 1000

        # Convert to daily intervals
        daily_leveling_arr = datetime_handle.fulltime_table(
            aligned_leveling_arr, datetime_handle.get_fulltime(aligned_leveling_arr.index)
        )

        # Calculate linear trends and velocities
        gps_linear_trend, gps_velocity = analysis.get_linear_trend(
            aligned_gps_arr.iloc[:, 0], force_zero_intercept=True
        )
        leveling_linear_trend, leveling_velocity = analysis.get_linear_trend(
            daily_leveling_arr.iloc[:, 0], force_zero_intercept=True
        )

        # Convert velocities to mm/year
        gps_velocity_year = gps_velocity * 365.25
        leveling_velocity_year = leveling_velocity * 365.25

        # Append the current station and benchmark pairing
        station_benchmark_pairs.append([select_station, f"{loc_code}_{loc_name}"])
        velocity_pairs.append([gps_velocity_year, leveling_velocity_year])

        # Update cache2 dictionary
        for key, value in zip(
            ["STATION", "GPS(mm/year)", "Leveling(mm/year)"],
            [select_station, gps_velocity_year, leveling_velocity_year],
        ):
            cache2[key].append(value)

    # Only store the GPS data and calculated velocities if there was at least one valid leveling benchmark
    if has_valid_leveling_benchmark:
        cache.append({select_station: prepare_dictionary(gps_data_arr, "date", "dU(mm)")})
        # Encode each element in the station-benchmark pairs array to Big5 encoding
        station_benchmark_pairs = np.array(
            [[element.encode("big5") for element in row] for row in station_benchmark_pairs]
        )
        # Append the velocities data to the cache for the current station
        cache.append(
            {
                select_station: {
                    "velocities": {"Pairs": np.array(station_benchmark_pairs), "Values": np.array(velocity_pairs)}
                }
            }
        )

  0%|          | 0/20 [00:00<?, ?it/s]

In [5]:
dict_to_saveHDF5 = gwatertools.h5pytools.merge_dicts(*cache)

today_string = datetime.now().strftime("%Y%m%d")

# Write updated data and metadata back to the HDF5 file
with h5py.File(f"{today_string}_GPS_Leveling_LinearVelocity_{buffer_radius}m.h5", "w") as hdf5_file:
    gwatertools.h5pytools.data_to_hdf5(hdf5_file, dict_to_saveHDF5)

In [6]:
pd.DataFrame(cache2).to_excel(f"{today_string}_GPS_Leveling_Compare_{buffer_radius}m.xlsx", index=False)

#### decode array