In [1]:
from appgeopy import *
from my_packages import *

In [2]:
# geospatial file of MLCW
# showing the location and information
mlcw_gdf = gpd.read_file(
    r"D:\1000_SCRIPTS\003_Project002\20250222_GTWR001\2_KrigingInterpolation\points_fld\mlcw_twd97.shp"
)

In [3]:
select_regpoints_file = "3__PredictionOutput/Layer_1.feather"
select_gtwr_csv = "gtwr_Layer_1_kernel-bisquare_lambda-0d006_bw-23_coefficients.csv"

In [21]:
# Read the regression point data. This file contains the predicted values at every grid point for every time step.
regpoints_df = pd.read_feather(select_regpoints_file)
regpoints_df = regpoints_df.set_index("PointKey")
regpoints_df = regpoints_df.drop("time_nt", axis=1)
regpoints_df.head(5)

Unnamed: 0_level_0,Intrcp_,CUMDISP,predctn,prdctn_,tim_prd,tm_nmrc,X_TWD97,Y_TWD97
PointKey,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
X162648119Y2595992857,3.276164,0.064431,2.259336,10.98442,1,1,162648.119363,2595993.0
X162848119Y2595992857,3.262489,0.06406,2.310098,11.197413,1,1,162848.119363,2595993.0
X163048119Y2595992857,3.248794,0.063687,2.348585,11.374948,1,1,163048.119363,2595993.0
X162648119Y2596192857,3.281034,0.064521,2.355751,11.342274,1,1,162648.119363,2596193.0
X162848119Y2596192857,3.26728,0.064147,2.3534,11.35736,1,1,162848.119363,2596193.0


In [22]:
# To perform a spatial search efficiently, we only need the unique locations of the grid points.
# We can get this by filtering the dataframe for just one time period (e.g., time_period == 1).
print("  - Preparing unique spatial locations for regression points...")
regpoints_df_byPointKey = regpoints_df.query("tim_prd==1")

# Convert this subset of unique points into a GeoDataFrame.
# This is essential for performing spatial operations like buffering and searching.
regpoints_df_byPointKey = geospatial.convert_to_geodata(
    df=regpoints_df_byPointKey, xcoord_col="X_TWD97", ycoord_col="Y_TWD97", crs_epsg="EPSG:3826"
)

  - Preparing unique spatial locations for regression points...


In [23]:
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Step 2: Load and Prepare the GTWR Model Output (the monitoring stations)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

# Read the CSV file containing the GTWR model's output.
# This file includes the original ("y") and predicted ("yhat") values at the monitoring station locations.
gtwr_output = pd.read_csv(select_gtwr_csv)

# Create a unique identifier ('PointKey') for each monitoring station based on its coordinates.
# This allows us to easily filter and query data for a specific station.
gtwr_output["PointKey"] = [
    f"X{int(x*1000)}Y{int(y*1000)}" for x, y in zip(gtwr_output["X_TWD97"], gtwr_output["Y_TWD97"])
]

# Get a list of the unique monitoring stations that were processed.
unique_pointkeys = gtwr_output["PointKey"].unique()

# Convert the GTWR output data into a GeoDataFrame to enable spatial analysis.
gtwr_output = geospatial.convert_to_geodata(
    df=gtwr_output, xcoord_col="X_TWD97", ycoord_col="Y_TWD97", crs_epsg="EPSG:3826"
)

In [31]:
fig_savefld = r"D:\1000_SCRIPTS\003_Project002\20250222_GTWR001\5_GTWR_Prediction\test_figs"

layer_from_name = "Layer 1"

# select_pointkey = unique_pointkeys[0]
for select_pointkey in tqdm(unique_pointkeys, desc=f"Validating {layer_from_name}", position=1, leave=False):

    # Get the data for the single, current monitoring (MLCW) station.
    mlcw_data_byPointKey = mlcw_gdf.query("PointKey==@select_pointkey")
    mlcw_station_name = mlcw_data_byPointKey.STATION.values[0]
    
    # Get the GTWR model's time-series output for this specific station.
    df_byPointKey = gtwr_output.query("PointKey==@select_pointkey")
    
    # --- Geospatial Search ---
    # Find all the regression grid points that are physically close to the current monitoring station.
    # HINT: The `buffer_radius` is a key parameter. Here it's set to 500 units (likely meters).
    # You can change this value to include more or fewer neighboring points in the analysis.
    search_points_around_mlcw = geospatial.find_point_neighbors(
        central_point=mlcw_data_byPointKey.iloc[0],
        target_points_gdf=regpoints_df_byPointKey,
        central_key_column="STATION",
        buffer_radius=500,
    )
    
    # Get the unique identifiers of these neighboring points.
    points_around_mlcw_byPointKey = search_points_around_mlcw.index
    
    # From the full regression point dataset, extract all time-series measurements for these neighboring points.
    points_around_mlcw_measurements = regpoints_df.loc[points_around_mlcw_byPointKey, :]
    points_around_mlcw_measurements = points_around_mlcw_measurements.sort_values("tim_prd")
    
    # --- Plotting ---
    # Create a new figure for this station's validation plot.
    fig = plt.figure(figsize=(12, 4))
    ax = fig.add_subplot(111)
    
    # Plot 1: Plot all individual predicted values from the neighboring grid points.
    # These are shown as light grey, semi-transparent circles to visualize the spread of predictions.
    for t in regpoints_df["tim_prd"].unique():
        points_around_mlcw_measurements_byTime = points_around_mlcw_measurements.query(
            "tim_prd==@t"
        )
        ax.plot(
            points_around_mlcw_measurements_byTime["tim_prd"],
            points_around_mlcw_measurements_byTime["predctn"],
            marker="o",
            linestyle=" ",
            markerfacecolor="none",
            markeredgecolor="lightgrey",
            alpha=0.6,
        )
    
    # Plot 2: Calculate and plot the AVERAGE of the predictions from the neighboring points at each time step.
    # This gives a single, smoothed time-series representing the model's general prediction for that local area.
    average_measurements = points_around_mlcw_measurements.groupby("tim_prd").mean()
    ax.plot(
        average_measurements["predctn"],
        marker="s",
        linestyle="--",
        color="blue",
        alpha=0.5,
        label="Average Predicted",
    )
    
    # Plot 3: Plot the ORIGINAL, measured data from the actual monitoring station.
    # This is the "ground truth" that we are comparing our model's predictions against.
    ax.plot(df_byPointKey["time_stamp"], df_byPointKey["y"], color="magenta", marker="o", label="Original")
    
    # Plot 4 (Optional): Plot the direct GTWR prediction at the station location.
    # You can uncomment this to see how the direct prediction ('yhat') compares.
    # ax.plot(df_byPointKey["time_stamp"], df_byPointKey["yhat"], color="lime", marker="o", label="y_hat", alpha=0.25)
    
    # --- Finalizing and Saving the Figure ---
    
    # Apply custom formatting to the plot axes and title.
    visualize.configure_axis(
        ax=ax, title=f"{mlcw_station_name} - {layer_from_name.lower()}", hide_spines=["right", "top"], fontsize_base=12
    )
    
    # Configure the plot legend.
    visualize.configure_legend(ax=ax, fontsize_base=12, labelspacing=0.1, handletextpad=0.2)
    
    fig_outpath = os.path.join(fig_savefld, f"{mlcw_station_name}.png")
    
    # Save the figure with specific dimensions and resolution.
    visualize.save_figure_with_exact_dimensions(fig=fig, savepath=fig_outpath, width_px=2000, height_px=680*2, dpi=300)
    
    # Close the figure to free up memory before starting the next loop iteration.
    plt.close(fig)
    # plt.show()