In [2]:
# --- 1. SETUP AND IMPORTS ---
# Import custom modules and necessary third-party libraries.
# `appgeopy` and `my_packages` appear to be user-defined modules.
# It's assumed they contain helper functions and libraries like geopandas, pandas, os, glob, and matplotlib.
from appgeopy import *
from my_packages import *

In [4]:
# --- 2. DATA LOADING AND PREPARATION ---

# Load a shapefile containing geospatial point data (monitoring stations) into a GeoDataFrame.
# The 'r' prefix indicates a raw string, which prevents backslashes from being treated as escape characters.
mlcw_gdf = gpd.read_file(
    r"D:\1000_SCRIPTS\003_Project002\20250222_GTWR001\2_KrigingInterpolation\points_fld\mlcw_twd97.shp"
)
# Display the first 5 rows of the GeoDataFrame to verify it loaded correctly.
mlcw_gdf.head(5)
mlcw_gdf["PointKey"] = [
    f"X{int(x)}Y{int(y)}"
    for x, y in zip(mlcw_gdf["POINT_X"], mlcw_gdf["POINT_Y"])
]

#### $Layer_{n} = \text{CUMDISP} + \text{CUMDISP}^{2}$

In [35]:
# ==============================================================================
# --- 1. SCRIPT SETUP AND CONFIGURATION ---
# This section defines the file paths and initial parameters needed for the script.
# Modifying these variables is the primary way to change the script's behavior.
# ==============================================================================

# Define the full path to the input CSV file containing the model's coefficient data.
select_file = r"D:\1000_SCRIPTS\003_Project002\20250222_GTWR001\4_GTWR\13_TestRun_113\gtwr_Layer_1_kernel-tricube_lambda-0d008_bw-23_coefficients.csv"

# --- Prepare Output Directory ---
# Extract the folder and filename from the full path.
model_folder = os.path.dirname(select_file)
file_basename = os.path.basename(select_file).split(".")[0]

# Define the subfolder where all output plot images will be saved.
savefig_folder = os.path.join(model_folder, r"y_yhat_figs")

# Create the output folder if it doesn't already exist to prevent errors during saving.
if not os.path.exists(savefig_folder):
    os.makedirs(savefig_folder)

# Define the starting date for the time series. The 'time_stamp' column in the
# CSV is assumed to be an integer offset (in months) from this date.
initial_timepoint = datetime(2016, 5, 1)


# ==============================================================================
# --- 2. DATA LOADING AND PRE-PROCESSING ---
# This section loads the data from the CSV file and prepares it for analysis
# by creating a unique key for each geographical point.
# ==============================================================================

# Extract a "layer number" from the filename to use in plot titles and saved filenames.
# This assumes a consistent filename format like 'gtwr_Layer_1_...'.
layer_number = os.path.basename(select_file).split("_")[2]

# Load the selected CSV file into a pandas DataFrame.
df = pd.read_csv(select_file)

# --- Create a Unique Identifier for Each Point ---
# A 'PointKey' is created by combining the X and Y coordinates. This provides a
# simple, readable key to group all time-series entries for a single location.
pointkey_arr = [
    f"X{int(x)}Y{int(y)}" for x, y in zip(df["X_TWD97"], df["Y_TWD97"])
]
# Insert this new 'PointKey' column at the beginning of the DataFrame.
df.insert(loc=0, column="PointKey", value=pointkey_arr)
# Set 'PointKey' as the DataFrame's index for efficient data lookup using df.loc[].
df = df.set_index("PointKey")

# Get a list of all unique point keys (i.e., all unique locations) in the dataset.
unique_pointkey = df.index.unique()


# ==============================================================================
# --- 3. MAIN PROCESSING LOOP ---
# This loop iterates through each unique point (location) in the dataset,
# generating and saving a separate plot for each one.
# A try-except block is used to ensure that an error with one point
# does not stop the entire script.
# ==============================================================================

# Loop through each unique point. `tqdm` provides a progress bar.
# Note: `unique_pointkey[:1]` is currently set to only process the FIRST point for testing.
# To process all points, change it to `unique_pointkey`.
for select_pointkey in tqdm(unique_pointkey[:], desc="Pointkey", leave=False):
    try:
        # --- 3.1. Filter and Prepare Data for the Current Point ---

        # Find the station name corresponding to the current PointKey.
        # This assumes 'mlcw_gdf' is a GeoDataFrame loaded previously in the environment.
        mlcw_station = mlcw_gdf.query(
            "PointKey==@select_pointkey"
        ).STATION.values[0]

        # Filter the main DataFrame to get a new DataFrame containing only the
        # time-series data for the currently selected point.
        df_byPointKey = df.loc[select_pointkey]

        # Convert the integer 'time_stamp' column into actual datetime objects.
        # This is crucial for plotting the x-axis correctly.
        df_byPointKey["time_stamp"] = pd.to_datetime(
            [
                initial_timepoint + relativedelta(months=time_step)
                for time_step in df_byPointKey["time_stamp"]
            ]
        )

        # --- 3.2. Extract Data Series for Plotting ---
        # Assign columns to separate variables for cleaner plotting code.
        time_arr = df_byPointKey["time_stamp"]  # Time stamps for the x-axis
        obs_val = df_byPointKey["y"]  # Observed (actual) values
        predict_val = df_byPointKey["yhat"]  # Predicted values from the model
        relative_err = df_byPointKey[
            "residual"
        ]  # Residuals (observed - predicted)

        # Calculate the y-axis limits for the first plot, adding 20% padding for visual clarity.
        obs_toplim = obs_val.max() + abs(obs_val.max() * 0.2)
        obs_botlim = obs_val.min() - abs(obs_val.min() * 0.2)

        # --- 3.3. Calculate Model Performance Metrics ---
        # These metrics quantify how well the predictions match the observations.
        r2 = r2_score(obs_val, predict_val)
        rmse = root_mean_squared_error(obs_val, predict_val)
        mae = mean_absolute_error(obs_val, predict_val)
        pbias = 100.0 * np.sum(predict_val - obs_val) / np.sum(obs_val)

        # ==========================================================================
        # --- 3.4. VISUALIZATION ---
        # This section creates the multi-panel plot.
        # ==========================================================================

        # Create a figure and a set of 4 subplots stacked vertically.
        # `sharex=True` links the x-axes, so zooming one zooms them all.
        fig, axes = plt.subplots(4, 1, figsize=(11.69, 8.27), sharex=True)

        # Add a centered main title for the entire figure.
        fig.suptitle(
            f"{mlcw_station} - Layer {layer_number}",
            y=0.975,
            fontweight="bold",
            fontsize=20,
        )

        # --- Plot 1: Observations vs. Predictions ---
        ax1 = axes[0]
        # Plot observed values as a solid gray line.
        ax1.plot(
            time_arr,
            obs_val,
            color="darkgrey",
            linewidth=2,
            marker="o",
            ms=8,
            label="Observations",
            markevery=3,  # Reduces clutter by showing a marker only every 3 points.
        )
        # Plot predicted values as a blue dotted line.
        ax1.plot(
            time_arr,
            predict_val,
            color="blue",
            linestyle=(0, (1, 1)),  # Creates a dotted line style.
            marker="o",
            ms=8,
            linewidth=2,
            label="Predictions",
            markevery=3,
        )
        ax1.set_ylim(bottom=obs_botlim, top=obs_toplim)

        # Add a text box with the calculated performance metrics to the plot.
        stats_text = (
            f"R²   : {r2:.3f}\n"
            f"RMSE : {rmse:.3f}\n"
            f"MAE  : {mae:.3f}\n"
            f"PBIAS: {pbias:.2f}%"
        )
        ax1.text(
            0.99,
            0.99,
            stats_text,
            transform=ax1.transAxes,  # Positions text relative to the subplot axes.
            ha="right",  # Horizontal alignment.
            va="top",  # Vertical alignment.
            fontsize=8,
            fontfamily="monospace",  # Ensures text aligns nicely.
            bbox=dict(boxstyle="round,pad=0.5", fc="#EAEAF2", ec="black", lw=1),
        )

        # --- Plot 2 & 3: Model Coefficients ---
        # This loop efficiently plots the two CUMDISP coefficients on separate axes.
        ax2 = axes[1]
        ax3 = axes[2]
        for ax, colname, color in zip(
            [ax2, ax3],
            ["CUMDISP", "sq_CUMDISP"],
            ["blueviolet", "mediumorchid"],
        ):
            ax.plot(
                time_arr,
                df_byPointKey[colname],
                color=color,
                marker="s",
                markerfacecolor="none",  # Creates hollow markers.
                ms=8,
                linewidth=2,
                markevery=3,
                label=colname,  # Label for the legend.
            )

        # --- Plot 4: Model Intercept ---
        ax4 = axes[3]
        ax4.plot(
            time_arr,
            df_byPointKey["Intercept"],
            color="black",
            marker="D",
            markerfacecolor="none",
            ms=8,
            linewidth=2,
            markevery=3,
            label="Intercept",  # Label for the legend.
        )

        # ==========================================================================
        # --- 3.5. FINAL STYLING AND OUTPUT ---
        # ==========================================================================

        # Loop through all axes to apply consistent styling.
        for ax in axes:
            # `visualize.configure_axis` is a custom helper function for styling.
            visualize.configure_axis(
                ax=ax, tick_direction="out", hide_spines=["top", "right"]
            )
            # `visualize.configure_legend` is a custom helper for legend styling.
            # This creates a separate legend for each subplot that has a label.
            visualize.configure_legend(
                ax=ax,
                columnspacing=0.5,
                labelspacing=0.1,
                handletextpad=0.2,
                ncols=4,
                fontsize_base=14,
            )
            # `visualize.configure_datetime_ticks` is a custom helper for date formatting.
            visualize.configure_datetime_ticks(
                ax=ax,
                major_interval=12,
                minor_interval=3,
                fontsize=14,
                grid=False,
                start_date=datetime(2016, 1, 1),
                end_date=datetime(2022, 1, 1),
            )

        # --- Set Y-Axis Labels for Each Subplot ---
        ax1.set_ylabel(
            "Cumulative\nCompaction (mm)",
            loc="center",
            fontweight="bold",
            fontsize=14,
        )
        ax2.set_ylabel(
            r"$\text{CUMDISP}$" + "\nCoefficients",
            loc="center",
            fontweight="bold",
            fontsize=14,
        )
        ax3.set_ylabel(
            r"$\text{CUMDISP}^{2}$" + "\nCoefficients",
            loc="center",
            fontweight="bold",
            fontsize=14,
        )
        ax4.set_ylabel(
            "Intercept", loc="center", fontweight="bold", fontsize=14
        )

        # --- Final Adjustments and Saving ---
        # Adjust layout to prevent titles and labels from overlapping.
        fig.tight_layout(rect=[0, 0.05, 1, 0.96])
        # Automatically format x-axis date labels to prevent them from crowding.
        fig.autofmt_xdate(ha="center", rotation=90)

        # Save the figure to the designated folder with a descriptive name.
        visualize.save_figure(
            fig=fig,
            savepath=os.path.join(
                savefig_folder,
                "_".join([mlcw_station, "layer", layer_number]) + ".png",
            ),
        )

        # Close the figure to free up memory before the next loop iteration.
        plt.close(fig)

    # This 'except' block will catch any error during the processing of a single
    # point, print the error, and allow the loop to continue to the next point.
    except Exception as e:
        print(f"Failed to process point {select_pointkey}. Error: {e}")
        pass

Pointkey:   0%|          | 0/29 [00:00<?, ?it/s]


KeyboardInterrupt



Error in callback <function flush_figures at 0x0000026321013BA0> (for post_execute), with arguments args (),kwargs {}:


KeyboardInterrupt: 

#### CUMDISP ~ Layer_1 + Layer_2 + Layer_3 + Layer_4

In [None]:
select_pointkey