In [1]:
from appgeopy import *
from my_packages import *

# ==============================================================================
# CONFIGURATION
# ==============================================================================

initial_timepoint = datetime(2016, 5, 1)
TIME_FILTER_THRESHOLD = 68
DISPLAY_START_DATE = datetime(2016, 1, 1)
DISPLAY_END_DATE = datetime(2022, 1, 1)
VLINE_START_DATE = datetime(2016, 1, 1)
VLINE_END_DATE = datetime(2022, 4, 1)
VLINE_INTERVAL_MONTHS = 6

In [2]:
# ==============================================================================
# HELPER FUNCTIONS
# ==============================================================================


def create_pointkey(x, y):
    """Generate unique point identifier from coordinates."""
    return f"X{int(x*1000)}Y{int(y*1000)}"


def prepare_dataframe(filepath):
    """Load and prepare dataframe with PointKey indexing."""
    df = pd.read_csv(filepath)
    pointkey_arr = [
        create_pointkey(x, y) for x, y in zip(df["X_TWD97"], df["Y_TWD97"])
    ]
    df.insert(loc=0, column="PointKey", value=pointkey_arr)
    df = df.set_index("PointKey")
    return df.query(f"Time_value<={TIME_FILTER_THRESHOLD}")


def convert_time_values(time_values, initial_timepoint):
    """Convert time step values to datetime objects."""
    return pd.to_datetime(
        [
            initial_timepoint + relativedelta(months=int(step))
            for step in time_values
        ]
    )


def calculate_metrics(obs_val, predict_val):
    """Calculate model performance metrics."""
    r2 = r2_score(obs_val, predict_val)
    rmse = root_mean_squared_error(obs_val, predict_val)
    mae = mean_absolute_error(obs_val, predict_val)
    pbias = 100.0 * np.sum(predict_val - obs_val) / np.sum(obs_val)
    return r2, rmse, mae, pbias


def calculate_ylim(obs_val, predict_val, padding=0.2):
    """Calculate y-axis limits with padding."""
    _top_temp = max(obs_val.max(), predict_val.max())
    _bot_temp = min(obs_val.min(), predict_val.min())
    top_bot_range = abs(_top_temp - _bot_temp)
    return (
        _bot_temp - top_bot_range * padding,
        _top_temp + top_bot_range * padding,
    )


def generate_vline_dates(start, end, interval_months):
    """Generate list of dates for vertical grid lines."""
    vline_dates = []
    current = start
    while current <= end:
        vline_dates.append(current)
        current += relativedelta(months=interval_months)
    return vline_dates


def style_axes(axes, vline_dates):
    """Apply consistent styling to all axes."""
    for ax in axes:
        visualize.configure_axis(
            ax=ax,
            tick_direction="out",
            hide_spines=["top", "right"],
            major_tick_length=10,
            minor_tick_length=7,
        )

        visualize.configure_datetime_ticks(
            ax=ax,
            major_interval=12,
            minor_interval=3,
            fontsize=14,
            grid=False,
            start_date=DISPLAY_START_DATE,
            end_date=DISPLAY_END_DATE,
        )

        # Force first tick at display start
        current_ticks = list(ax.get_xticks())
        first_tick = mdates.date2num(DISPLAY_START_DATE)
        ax.set_xticks([first_tick] + current_ticks)

        # Add vertical grid lines
        for vline_date in vline_dates:
            ax.axvline(
                x=vline_date,
                color="gray",
                linestyle="--",
                linewidth=1,
                alpha=0.5,
                zorder=0,
            )


def create_visualization(
    df_byPointKey, layer_number, mlcw_station, r2, rmse, mae, pbias
):
    """Create three-panel visualization figure."""
    # Extract data
    time_arr = df_byPointKey["Time_value"]
    obs_val = df_byPointKey[f"input_Layer_{layer_number}"]
    predict_val = df_byPointKey["predicted_value"]

    # Calculate limits and statistics
    obs_botlim, obs_toplim = calculate_ylim(obs_val, predict_val)
    coeff_mean = np.mean(df_byPointKey["CUMDISP"])
    coeff_stdev = np.std(df_byPointKey["CUMDISP"])
    intercept_mean = np.mean(df_byPointKey["Intercept"])
    intercept_stdev = np.std(df_byPointKey["Intercept"])

    # Create figure
    fig, axes = plt.subplots(3, 1, figsize=(11.69, 8.27), sharex=True)
    fig.suptitle(
        f"{mlcw_station} - Layer {layer_number}",
        y=0.975,
        fontweight="bold",
        fontsize=20,
    )

    # Panel 1: Observations vs Predictions
    ax1 = axes[0]
    ax1.plot(
        time_arr,
        obs_val,
        color="darkgrey",
        ls="--",
        linewidth=2,
        marker="o",
        ms=8,
        label="Obs",
        markevery=3,
    )
    ax1.plot(
        time_arr,
        predict_val,
        color="dodgerblue",
        linestyle=(0, (1, 1)),
        marker="s",
        ms=8,
        linewidth=2,
        label="Pred",
        markevery=3,
    )
    ax1.set_ylim(bottom=obs_botlim, top=obs_toplim)
    ax1.set_ylabel(
        "Cumulative\nCompaction (mm)", fontsize=14, fontweight="bold"
    )

    stats_text = f"R²   : {r2:.3f}\nRMSE : {rmse:.3f}\nMAE  : {mae:.3f}\nPBIAS: {pbias:.2f}%"
    ax1.text(
        0.98,
        0.97,
        stats_text,
        transform=ax1.transAxes,
        ha="right",
        va="top",
        fontsize=10,
        fontfamily="monospace",
        bbox=dict(boxstyle="round,pad=0.5", fc="#EAEAF2", ec="black", lw=1),
    )

    visualize.configure_ticks(ax=ax1, y_minor_interval=5)
    visualize.configure_legend(
        ax=ax1,
        columnspacing=0.5,
        labelspacing=0.1,
        handletextpad=0.2,
        ncol=2,
        fontsize_base=14,
        loc="lower left",
    )

    # Panel 2: GTWR Coefficients
    ax2 = axes[1]
    ax2.plot(
        time_arr,
        df_byPointKey["CUMDISP"],
        color="darkviolet",
        marker="s",
        markerfacecolor="none",
        ms=8,
        linewidth=2,
        markevery=3,
    )
    ax2.axhline(coeff_mean, lw=1, color="darkmagenta", ls="--")
    ax2.set_ylabel("GTWR\nCoefficient", fontsize=14, fontweight="bold")
    ax2.text(
        0.025,
        0.1,
        rf"$\overline{{\beta_1}}$={coeff_mean:.2f}±{coeff_stdev:.2f}",
        transform=ax2.transAxes,
        fontsize=14,
    )

    # Panel 3: Model Intercept
    ax3 = axes[2]
    ax3.plot(
        time_arr,
        df_byPointKey["Intercept"],
        color="darkgreen",
        marker="D",
        markerfacecolor="none",
        ms=8,
        linewidth=2,
        markevery=3,
    )
    ax3.axhline(intercept_mean, lw=1, color="green", ls="--")
    ax3.set_ylabel("GTWR\nIntercept", fontsize=14, fontweight="bold")
    ax3.text(
        0.025,
        0.1,
        rf"$\overline{{\beta_0}}$={intercept_mean:.2f}±{intercept_stdev:.2f}",
        transform=ax3.transAxes,
        fontsize=14,
    )

    # Apply styling
    vline_dates = generate_vline_dates(
        VLINE_START_DATE, VLINE_END_DATE, VLINE_INTERVAL_MONTHS
    )
    style_axes(axes, vline_dates)

    # Finalize layout
    fig.tight_layout(rect=[0, 0.05, 1, 0.96])
    fig.autofmt_xdate(ha="center", rotation=90)

    return fig, coeff_mean, coeff_stdev, intercept_mean, intercept_stdev


# ==============================================================================
# MAIN PROCESSING LOOP
# ==============================================================================


def process_model_outputs(output_files, model_folder, mlcw_gdf):
    """Process all model output files and generate visualizations."""
    cache = {
        "STATION": [],
        "Layer": [],
        "r_sq": [],
        "RMSE": [],
        "MAE": [],
        "PBIAS": [],
        "Coeff_Mean": [],
        "Coeff_Stdev": [],
        "Intercept_Mean": [],
        "Intercept_Stdev": [],
    }

    for select_file in tqdm(output_files, desc="Layer"):
        # Setup output directory
        savefig_folder = os.path.join(
            model_folder,
            "y_yhat_figs_2",
            os.path.dirname(select_file).split("\\")[-1],
        )
        os.makedirs(savefig_folder, exist_ok=True)

        layer_number = os.path.basename(select_file).split("_")[2]

        # Load and prepare data
        df = prepare_dataframe(select_file)
        unique_pointkey = df.index.unique()

        # Process each location
        for select_pointkey in tqdm(
            unique_pointkey, desc="Pointkey", leave=False
        ):
            try:
                mlcw_station = mlcw_gdf.query(
                    "PointKey==@select_pointkey"
                ).STATION.values[0]

                # Prepare time series data
                df_byPointKey = df.loc[select_pointkey].copy()
                df_byPointKey["Time_value"] = convert_time_values(
                    df_byPointKey["Time_value"], initial_timepoint
                )

                # Calculate metrics
                obs_val = df_byPointKey[f"input_Layer_{layer_number}"]
                predict_val = df_byPointKey["predicted_value"]
                r2, rmse, mae, pbias = calculate_metrics(obs_val, predict_val)

                # Create visualization
                (
                    fig,
                    coeff_mean,
                    coeff_stdev,
                    intercept_mean,
                    intercept_stdev,
                ) = create_visualization(
                    df_byPointKey,
                    layer_number,
                    mlcw_station,
                    r2,
                    rmse,
                    mae,
                    pbias,
                )

                # Update cache
                for key, value in zip(
                    cache.keys(),
                    [
                        mlcw_station,
                        layer_number,
                        r2,
                        rmse,
                        mae,
                        pbias,
                        coeff_mean,
                        coeff_stdev,
                        intercept_mean,
                        intercept_stdev,
                    ],
                ):
                    cache[key].append(value)

                # Save figure
                save_filename = f"{mlcw_station}_layer_{layer_number}.png"
                visualize.save_figure(
                    fig, os.path.join(savefig_folder, save_filename)
                )
                plt.close(fig)

            except Exception as e:
                print(f"Error processing {select_pointkey}: {e}")

        # Save summary table for this layer
        summary_table = pd.DataFrame(cache)
        summary_table.to_excel(
            os.path.join(
                savefig_folder, f"summary_table_Layer_{layer_number}.xlsx"
            ),
            index=False,
        )

    return cache

In [3]:
# ==============================================================================
# USAGE EXAMPLE
# ==============================================================================
model_folder = os.getcwd()
mlcw_gdf = gpd.read_file(
    r"D:\1000_SCRIPTS\003_Project002\20250222_GTWR001\2_KrigingInterpolation\points_fld\mlcw_twd97.shp"
)

# Define the specific kernel name used in the model to find relevant result files.
kernel_name = "tricube"
# Use `glob` to find all files within `model_folder` that contain the kernel name in their filename.
output_files = glob(os.path.join(model_folder, "*", f"*{kernel_name}*.csv"))
cache = process_model_outputs(output_files, model_folder, mlcw_gdf)

['D:\\1000_SCRIPTS\\003_Project002\\20250917_GTWR002\\3D_TestRun_4\\gtwr_run_output_Layer_1\\gtwr_Layer_1_regression_kernel-tricube_lambda-0d03_bw-25_results.csv',
 'D:\\1000_SCRIPTS\\003_Project002\\20250917_GTWR002\\3D_TestRun_4\\gtwr_run_output_Layer_2\\gtwr_Layer_2_regression_kernel-tricube_lambda-0d002_bw-18_results.csv',
 'D:\\1000_SCRIPTS\\003_Project002\\20250917_GTWR002\\3D_TestRun_4\\gtwr_run_output_Layer_3\\gtwr_Layer_3_regression_kernel-tricube_lambda-0d07_bw-17_results.csv',
 'D:\\1000_SCRIPTS\\003_Project002\\20250917_GTWR002\\3D_TestRun_4\\gtwr_run_output_Layer_4\\gtwr_Layer_4_regression_kernel-tricube_lambda-0d1_bw-18_results.csv',
 'D:\\1000_SCRIPTS\\003_Project002\\20250917_GTWR002\\3D_TestRun_4\\gtwr_run_output_Layer_All\\gtwr_Layer_All_regression_kernel-tricube_lambda-0d002_bw-17_results.csv']