In [3]:
from appgeopy import *
from my_packages import *

In [10]:
# ==============================================================================
# USER CONFIGURATION - Modify these parameters
# ==============================================================================

# --- File Paths ---
MLCW_SHAPEFILE = r"D:\1000_SCRIPTS\003_Project002\20250222_GTWR001\2_KrigingInterpolation\points_fld\mlcw_twd97.shp"

REGPOINTS_FILE = r".\3__PredictionOutput\Layer_4.feather"

CURRENT_LAYER = os.path.basename(REGPOINTS_FILE).split(".")[0]

GTWR_CSV_FILE = glob(f"*{CURRENT_LAYER}*.csv")[0]

# --- Column Name Mappings ---
# Regression Points DataFrame columns
REGPOINTS_COLUMNS = {
    "time_col": "pred_time",  # Time period column
    "prediction_col": "gtwr_prediction",  # Predicted values column
    "x_coord": "X_TWD97",  # X coordinate column
    "y_coord": "Y_TWD97",  # Y coordinate column
}

# GTWR Output DataFrame columns
GTWR_COLUMNS = {
    "time_col": "Time_value",  # Time period column
    "observed_col": f"input_{CURRENT_LAYER}",  # Observed/measured values
    "predicted_col": "predicted_value",  # Model predicted values
    "x_coord": "X_TWD97",  # X coordinate column
    "y_coord": "Y_TWD97",  # Y coordinate column
}

# MLCW Station columns
MLCW_COLUMNS = {
    "station_name": "STATION",  # Station name/ID column
    "pointkey_col": "PointKey",  # Point key identifier
}

# --- Analysis Parameters ---
REFERENCE_TIME_PERIOD = (
    1  # Which time period to use for unique spatial locations
)
BUFFER_RADIUS = 500  # Search radius around stations (in map units)
COORDINATE_SCALE = (
    1000  # Scale factor for PointKey generation (1000 for mm to m)
)

# --- Output Settings ---
OUTPUT_FOLDER = "figure_validate_regpoints"
FIGURE_WIDTH_PX = 2000
FIGURE_HEIGHT_PX = 680
FIGURE_DPI = 300

In [11]:
# ==============================================================================
# MAIN PROCESSING
# ==============================================================================

# Setup output directory
dirname = os.getcwd()
fig_savefld = os.path.join(dirname, OUTPUT_FOLDER)
os.makedirs(fig_savefld, exist_ok=True)

print(f"--- Starting Validation Script ---")
print(f"Output folder: {fig_savefld}")
print(f"Processing: {os.path.basename(REGPOINTS_FILE)}")
print(f"GTWR results: {os.path.basename(GTWR_CSV_FILE)}")

# Extract layer name for plot titles
layer_name = os.path.basename(REGPOINTS_FILE).split(".")[0]

# --- Load and Prepare Data ---

# 1. Load MLCW stations
mlcw_gdf = gpd.read_file(MLCW_SHAPEFILE)

# 2. Load regression points
regpoints_df = pd.read_feather(REGPOINTS_FILE)
regpoints_df = regpoints_df.set_index("PointKey")

# Get unique spatial locations (using reference time period)
query_str = f"{REGPOINTS_COLUMNS['time_col']} == @REFERENCE_TIME_PERIOD"
regpoints_unique = regpoints_df.query(query_str)

# Convert to GeoDataFrame
regpoints_gdf = geospatial.convert_to_geodata(
    df=regpoints_unique,
    xcoord_col=REGPOINTS_COLUMNS["x_coord"],
    ycoord_col=REGPOINTS_COLUMNS["y_coord"],
    crs_epsg="EPSG:3826",
)

# 3. Load GTWR output
gtwr_output = pd.read_csv(GTWR_CSV_FILE)

# Generate PointKey column dynamically
gtwr_output["PointKey"] = [
    f"X{int(x*COORDINATE_SCALE)}Y{int(y*COORDINATE_SCALE)}"
    for x, y in zip(
        gtwr_output[GTWR_COLUMNS["x_coord"]],
        gtwr_output[GTWR_COLUMNS["y_coord"]],
    )
]

# Convert to GeoDataFrame
gtwr_gdf = geospatial.convert_to_geodata(
    df=gtwr_output,
    xcoord_col=GTWR_COLUMNS["x_coord"],
    ycoord_col=GTWR_COLUMNS["y_coord"],
    crs_epsg="EPSG:3826",
)

# Create output subfolder
fig_savefld_sub = os.path.join(fig_savefld, layer_name)
os.makedirs(fig_savefld_sub, exist_ok=True)

# --- Validation Loop ---
unique_pointkeys = gtwr_output["PointKey"].unique()
print(f"Validating {len(unique_pointkeys)} stations...")

for pointkey in tqdm(unique_pointkeys, desc=f"Validating {layer_name}"):

    # Get station data
    mlcw_station = mlcw_gdf.query(
        f"{MLCW_COLUMNS['pointkey_col']} == @pointkey"
    )
    if mlcw_station.empty:
        continue

    station_name = mlcw_station[MLCW_COLUMNS["station_name"]].values[0]

    # Get GTWR data for this station
    gtwr_station = gtwr_gdf.query("PointKey == @pointkey")

    # Find neighboring regression points
    neighboring_points = geospatial.find_point_neighbors(
        central_point=mlcw_station.iloc[0],
        target_points_gdf=regpoints_gdf,
        central_key_column=MLCW_COLUMNS["station_name"],
        buffer_radius=BUFFER_RADIUS,
    )

    # Get time-series data for neighboring points
    neighbor_pointkeys = neighboring_points.index
    neighbor_timeseries = regpoints_df.loc[neighbor_pointkeys, :].sort_values(
        REGPOINTS_COLUMNS["time_col"]
    )

    # Create plot
    fig, ax = plt.subplots(figsize=(12, 4))

    # Plot individual predictions from neighbors (light grey dots)
    for t in regpoints_df[REGPOINTS_COLUMNS["time_col"]].unique():
        neighbor_data_t = neighbor_timeseries.query(
            f"{REGPOINTS_COLUMNS['time_col']} == @t"
        )
        ax.plot(
            neighbor_data_t[REGPOINTS_COLUMNS["time_col"]],
            neighbor_data_t[REGPOINTS_COLUMNS["prediction_col"]],
            marker="s",
            linestyle=" ",
            markerfacecolor="none",
            markeredgecolor="lightgrey",
            alpha=0.5,
            markevery=5,
        )

    # Plot average of neighboring predictions
    # Drop non-numeric columns before grouping
    cols_to_drop = (
        ["kernel", "prediction_time"]
        if "kernel" in neighbor_timeseries.columns
        else []
    )
    if cols_to_drop:
        neighbor_avg = (
            neighbor_timeseries.drop(cols_to_drop, axis=1)
            .groupby(REGPOINTS_COLUMNS["time_col"])
            .mean()
        )
    else:
        neighbor_avg = neighbor_timeseries.groupby(
            REGPOINTS_COLUMNS["time_col"]
        ).mean()

    ax.plot(
        neighbor_avg.index,
        neighbor_avg[REGPOINTS_COLUMNS["prediction_col"]],
        marker="s",
        linestyle="--",
        color="blue",
        alpha=1,
        label="Average Predicted",
        markevery=5,
    )

    # Plot original measured data
    ax.plot(
        gtwr_station[GTWR_COLUMNS["time_col"]],
        gtwr_station[GTWR_COLUMNS["observed_col"]],
        color="magenta",
        mec="magenta",
        mfc="none",
        marker="D",
        label="Original",
        alpha=0.75,
        markevery=5,
    )

    # Optional: Plot direct GTWR prediction at station
    # ax.plot(
    #     gtwr_station[GTWR_COLUMNS["time_col"]],
    #     gtwr_station[GTWR_COLUMNS["predicted_col"]],
    #     color="lime", marker="o", label="Direct Prediction", alpha=0.25
    # )

    bot_ylim = min(
        neighbor_timeseries[[REGPOINTS_COLUMNS["prediction_col"]]].min().min(),
        gtwr_station[GTWR_COLUMNS["observed_col"]].min(),
    )

    top_ylim = max(
        neighbor_timeseries[[REGPOINTS_COLUMNS["prediction_col"]]].max().max(),
        gtwr_station[GTWR_COLUMNS["observed_col"]].max(),
    )

    ax.set_ylim(bottom=bot_ylim - 10, top=top_ylim + 5)

    # Configure plot appearance
    visualize.configure_axis(
        ax=ax,
        title=f"{station_name} - {layer_name.lower()}",
        hide_spines=["right", "top"],
        fontsize_base=12,
    )
    visualize.configure_legend(
        ax=ax, fontsize_base=12, labelspacing=0.2, handletextpad=0.4
    )

    # Save figure
    fig_outpath = os.path.join(fig_savefld_sub, f"{station_name}.png")
    visualize.save_figure_with_exact_dimensions(
        fig=fig,
        savepath=fig_outpath,
        width_px=FIGURE_WIDTH_PX,
        height_px=FIGURE_HEIGHT_PX,
        dpi=FIGURE_DPI,
    )
    plt.close(fig)

print("Validation complete!")

--- Starting Validation Script ---
Output folder: D:\1000_SCRIPTS\003_Project002\20250917_GTWR002\5_Prediction_4B\figure_validate_regpoints
Processing: Layer_4.feather
GTWR results: gtwr_Layer_4_calibration_kernel-bisquare_lambda-0d1_bw-18_results.csv
Validating 25 stations...


Validating Layer_4:   0%|          | 0/25 [00:00<?, ?it/s]

Validation complete!
