In [1]:
# --- 1. SETUP AND IMPORTS ---
# Import custom modules and necessary third-party libraries.
# `appgeopy` and `my_packages` appear to be user-defined modules.
# It's assumed they contain helper functions and libraries like geopandas, pandas, os, glob, and matplotlib.
from appgeopy import *
from my_packages import *
from sklearn.metrics import (
    mean_absolute_error,
    r2_score,
    root_mean_squared_error,
)

In [2]:
# --- 2. DATA LOADING AND PREPARATION ---

# Load a shapefile containing geospatial point data (monitoring stations) into a GeoDataFrame.
# The 'r' prefix indicates a raw string, which prevents backslashes from being treated as escape characters.
mlcw_gdf = gpd.read_file(
    r"D:\1000_SCRIPTS\003_Project002\20250222_GTWR001\2_KrigingInterpolation\points_fld\mlcw_twd97.shp"
)
# Display the first 5 rows of the GeoDataFrame to verify it loaded correctly.
mlcw_gdf.head(5)

Unnamed: 0,STATION,GroundWate,LandSubsid,Y_WGS84,X_WGS84,POINT_X,POINT_Y,PointKey,geometry
0,ANHE,60,MW_AHES,23.52,120.31,179539.204623,2602035.0,X179539204Y2602035470,POINT (179539.205 2602035.471)
1,BEICHEN,50,MW_BCES,23.575894,120.303054,178859.958807,2608229.0,X178859958Y2608228949,POINT (178859.959 2608228.949)
2,CANLIN,50,MW_TLES,23.574983,120.246516,173088.151033,2608157.0,X173088151Y2608157276,POINT (173088.151 2608157.277)
3,DONGGUANG,50,MW_DGES,23.652743,120.272488,175783.144962,2616755.0,X175783144Y2616755313,POINT (175783.145 2616755.314)
4,ERLUN,50,MW_ELES,23.771726,120.415522,190429.148778,2629865.0,X190429148Y2629865287,POINT (190429.149 2629865.287)


In [40]:
# Define the folder path where model prediction results are stored.
model_folder = (
    r"D:\1000_SCRIPTS\003_Project002\20250222_GTWR001\5_GTWR_Prediction"
)

# Define the specific kernel name used in the model to find relevant result files.
kernel_name = "bisquare"
# Use `glob` to find all files within `model_folder` that contain the kernel name in their filename.
output_files = glob(os.path.join(model_folder, f"*{kernel_name}*.csv"))
# Display the list of found files.
output_files

['D:\\1000_SCRIPTS\\003_Project002\\20250222_GTWR001\\5_GTWR_Prediction\\gtwr_Layer_1_kernel-bisquare_lambda-0d006_bw-23_coefficients.csv',
 'D:\\1000_SCRIPTS\\003_Project002\\20250222_GTWR001\\5_GTWR_Prediction\\gtwr_Layer_2_kernel-bisquare_lambda-0d001_bw-23_coefficients.csv',
 'D:\\1000_SCRIPTS\\003_Project002\\20250222_GTWR001\\5_GTWR_Prediction\\gtwr_Layer_3_kernel-bisquare_lambda-0d004_bw-17_coefficients.csv',
 'D:\\1000_SCRIPTS\\003_Project002\\20250222_GTWR001\\5_GTWR_Prediction\\gtwr_Layer_4_kernel-bisquare_lambda-0d005_bw-17_coefficients.csv']

In [42]:
# ==============================================================================
# --- 1. SETUP FILE PATHS AND MAIN LOOP ---
# This section is preserved exactly from your original script.
# ==============================================================================
# Get the directory from the first file in the list to determine the model folder.
model_folder = os.path.dirname(output_files[0])
# Define a general folder for saving the output figures.
savefig_folder = os.path.join(model_folder, r"y_yhat_figs")

if not os.path.exists(savefig_folder):
    os.makedirs(savefig_folder, exist_ok=True)

# Create the directory if it doesn't already exist.
if not os.path.exists(savefig_folder):
    os.makedirs(savefig_folder)

initial_timepoint = datetime(2016, 5, 1)

cache = {"STATION":[], "Layer":[], "r_sq":[], "RMSE":[], "MAE":[], "PBIAS":[]}

# Loop through each model output file provided in the 'output_files' list.
for select_file in tqdm(output_files[:], desc="Layer"):

    # Extract a "layer number" from the filename for use in titles and filenames.
    layer_number = os.path.basename(select_file).split("_")[2]

    # --- 2. DATA PROCESSING FOR THE CURRENT FILE ---
    # This logic for creating a PointKey and iterating through points is preserved.

    # Load the selected CSV file into a pandas DataFrame.
    df = pd.read_csv(select_file)
    # Create a unique identifier ("PointKey") for each data point by combining
    # its X and Y coordinates. This provides a simple key for grouping by location.
    pointkey_arr = [
        f"X{int(x*1000)}Y{int(y*1000)}"
        for x, y in zip(df["X_TWD97"], df["Y_TWD97"])
    ]
    df.insert(loc=0, column="PointKey", value=pointkey_arr)
    df = df.set_index("PointKey")

    # Get a list of all unique locations in the current file.
    unique_pointkey = df.index.unique()

    # Loop through each unique location in the file to generate a plot.
    for select_pointkey in tqdm(
        unique_pointkey[:], desc="Pointkey", leave=False
    ):

        # Find the station name corresponding to the current PointKey.
        mlcw_station = mlcw_gdf.query(
            "PointKey==@select_pointkey"
        ).STATION.values[0]

        # Filter the DataFrame to get all time-series data for the current location.
        df_byPointKey = df.loc[select_pointkey].copy()

        # Convert time stamp to datetime objects for proper plotting.
        df_byPointKey["time_stamp"] = pd.to_datetime(
            [
                initial_timepoint + relativedelta(months=time_step)
                for time_step in df_byPointKey["time_stamp"]
            ]
        )

        # --- 3. PREPARE DATA FOR PLOTTING ---

        # Extract data columns into variables for clarity.
        time_arr = df_byPointKey["time_stamp"]
        obs_val = df_byPointKey["y"]
        predict_val = df_byPointKey["yhat"]

        # Calculate y-axis limits, adding 20% padding for better visualization.
        y_range = obs_val.max() - obs_val.min()
        obs_toplim = obs_val.max() + y_range * 0.2
        obs_botlim = obs_val.min() - y_range * 0.2

        # --- 4. CALCULATE EVALUATION METRICS ---
        # Calculate key performance indicators to quantify model accuracy.
        r2 = r2_score(obs_val, predict_val)
        rmse = root_mean_squared_error(obs_val, predict_val)
        mae = mean_absolute_error(obs_val, predict_val)
        pbias = 100.0 * np.sum(predict_val - obs_val) / np.sum(obs_val)

        # ==========================================================================
        # --- 5. VISUALIZATION (ENHANCED) ---
        # ==========================================================================

        # Create a figure with 3 subplots stacked vertically.
        fig, axes = plt.subplots(3, 1, figsize=(11.69, 8.27), sharex=True)

        # Set the main title, incorporating the station and layer number.
        fig.suptitle(
            f"{mlcw_station} - Layer {layer_number}",
            y=0.975,
            fontweight="bold",
            fontsize=20,
        )

        # --- Plot 1: Observations vs. Predictions ---
        ax1 = axes[0]
        ax1.plot(
            time_arr,
            obs_val,
            color="darkgrey",
            linewidth=2,
            marker="o",
            ms=8,
            label="Observations",
            markevery=3
        )
        ax1.plot(
            time_arr,
            predict_val,
            color="blue",
            linestyle=(0, (1, 1)),
            marker="o",
            ms=8,
            linewidth=2,
            label="Predictions",
            markevery=3
        )
        ax1.set_ylim(bottom=obs_botlim, top=obs_toplim)

        # Add a text box with performance metrics.
        stats_text = (
            f"R²   : {r2:.3f}\n"
            f"RMSE : {rmse:.3f}\n"
            f"MAE  : {mae:.3f}\n"
            f"PBIAS: {pbias:.2f}%"
        )
        ax1.text(
            0.98,
            0.97,
            stats_text,
            transform=ax1.transAxes,
            ha="right",
            va="top",
            fontsize=10,
            fontfamily="monospace",
            bbox=dict(boxstyle="round,pad=0.5", fc="#EAEAF2", ec="black", lw=1),
        )

        # --- Plot 2: GTWR Coefficients ---
        ax2 = axes[1]

        ax2.plot(
            time_arr,
            df_byPointKey["CUMDISP"],  # Assumed to be a model coefficient
            color="blueviolet",
            # linestyle="--",  # Use a dashed line
            marker="s",  # Use square markers
            markerfacecolor="none",
            ms=8,
            linewidth=2,
            markevery=3
        )
        ax2.set_ylabel("GTWR\nCoefficients", fontweight="bold")

        # --- Plot 3: Model Intercept ---
        ax3 = axes[2]
        ax3.plot(
            time_arr,
            df_byPointKey["Intercept"],
            color="black",
            marker="D",
            markerfacecolor="none",
            ms=8,
            linewidth=2,
            markevery=3
        )

        # --- 6. FINAL STYLING AND OUTPUT ---

        for ax in axes:
            # `visualize.configure_axis` is a custom function to style the axis ticks and borders.
            visualize.configure_axis(
                ax=ax,
                tick_direction="out",
                hide_spines=["top", "right"],
                major_tick_length=10,
                minor_tick_length=7,
            )
            # `visualize.configure_legend` is another custom function for legend styling.
            visualize.configure_legend(
                ax=ax,
                columnspacing=0.5,
                labelspacing=0.1,
                handletextpad=0.2,
                ncols=4,
                fontsize_base=14,
            )

            visualize.configure_datetime_ticks(
                ax=ax,
                major_interval=12,
                minor_interval=3,
                fontsize=14,
                grid=False,
                start_date=datetime(2016, 1, 1),
                end_date=datetime(2022, 1, 1),
            )

        ax1.set_ylabel(
            "Cumulative\nCompaction (mm)",
            fontsize=14,
            loc="center",
            fontweight="bold",
        )
        ax2.set_ylabel(
            "GTWR\nCoefficient", fontsize=14, loc="center", fontweight="bold"
        )
        ax3.set_ylabel(
            "GTWR\nIntercept", fontsize=14, loc="center", fontweight="bold"
        )

        # Adjust layout to prevent titles and labels from overlapping.
        fig.tight_layout(rect=[0, 0.05, 1, 0.96])
        fig.autofmt_xdate(ha="center", rotation=90)

        for key, value in zip(cache.keys(), [mlcw_station, layer_number, r2, rmse, mae, pbias]):
            cache[key].append(value)

        # Save the figure using your original filename format.
        save_filename = "_".join([mlcw_station, "layer", layer_number]) + ".png"
        # visualize.save_figure(
        #     fig=fig,
        #     savepath=os.path.join(savefig_folder, save_filename),
        # )

        # Close the figure to free up memory.
        plt.close(fig)

summary_table = pd.DataFrame(cache)
summary_table.to_excel(os.path.join(savefig_folder, "summary_table.xlsx"), index=False)

Layer:   0%|          | 0/4 [00:00<?, ?it/s]

Pointkey:   0%|          | 0/29 [00:00<?, ?it/s]

Pointkey:   0%|          | 0/29 [00:00<?, ?it/s]

Pointkey:   0%|          | 0/28 [00:00<?, ?it/s]

Pointkey:   0%|          | 0/25 [00:00<?, ?it/s]