In [1]:
#!/usr/bin/env python
# coding: utf-8

from appgeopy import *
from my_packages import *

In [2]:
# ==============================================================================
# HELPER FUNCTIONS
# ==============================================================================


def calculate_time_series_stats(
    regpoints_data, time_col, coeff_col, intercept_col, start_date
):
    """Calculate time series statistics for coefficients and intercepts."""
    stats_data = {
        "time": [],
        "avg_coeffs": [],
        "coeffs_upper_std": [],
        "coeffs_lower_std": [],
        "avg_intercepts": [],
        "intercepts_upper_std": [],
        "intercepts_lower_std": [],
    }

    for t in regpoints_data[time_col].unique():
        # Calculate current datetime
        current_time = start_date + relativedelta(months=t)

        # Get data for current time period
        time_data = regpoints_data.query(f"{time_col} == @t")

        # Calculate coefficient stats
        coeffs = time_data[coeff_col]
        avg_coeff = coeffs.mean()
        std_coeff = coeffs.std()

        # Calculate intercept stats
        intercepts = time_data[intercept_col]
        avg_intercept = intercepts.mean()
        std_intercept = intercepts.std()

        # Store results
        stats_data["time"].append(current_time)
        stats_data["avg_coeffs"].append(avg_coeff)
        stats_data["coeffs_upper_std"].append(avg_coeff + std_coeff)
        stats_data["coeffs_lower_std"].append(avg_coeff - std_coeff)
        stats_data["avg_intercepts"].append(avg_intercept)
        stats_data["intercepts_upper_std"].append(avg_intercept + std_intercept)
        stats_data["intercepts_lower_std"].append(avg_intercept - std_intercept)

    return pd.DataFrame(stats_data).set_index("time")


def plot_individual_points(
    ax, regpoints_data, time_col, value_col, start_date, config
):
    """Plot individual data points as scatter cloud."""
    for t in regpoints_data[time_col].unique():
        current_time = start_date + relativedelta(months=t)
        time_data = regpoints_data.query(f"{time_col} == @t")

        ax.plot(
            [current_time] * len(time_data),
            time_data[value_col].values,
            marker=config["marker"],
            linestyle=" ",
            markeredgecolor=config["color"],
            markerfacecolor="none",
            markeredgewidth=0.1,
            alpha=config["alpha"],
            markersize=config["markersize"],
        )


def create_district_plot(
    regpoints_data,
    district_name,
    layer_name,
    output_folder,
    data_cols,
    plot_config,
    output_config,
    start_date,
):
    """Create and save time series plot for a district."""

    # Calculate statistics
    summary_df = calculate_time_series_stats(
        regpoints_data,
        data_cols["time_col"],
        data_cols["coefficient_col"],
        data_cols["intercept_col"],
        start_date,
    )

    # Create figure
    fig, (ax1, ax2) = plt.subplots(
        2, 1, figsize=plot_config["figure_size"], sharex=True
    )

    # Plot individual points
    plot_individual_points(
        ax1,
        regpoints_data,
        data_cols["time_col"],
        data_cols["coefficient_col"],
        start_date,
        plot_config["individual_points"],
    )
    plot_individual_points(
        ax2,
        regpoints_data,
        data_cols["time_col"],
        data_cols["intercept_col"],
        start_date,
        plot_config["individual_points"],
    )

    # Plot averages and confidence bands
    ax1.plot(
        summary_df.index,
        summary_df["avg_coeffs"],
        color=plot_config["average_line"]["color"],
        linewidth=plot_config["average_line"]["linewidth"],
        label=f"Average {data_cols['coefficient_col']}",
    )
    ax1.fill_between(
        summary_df.index,
        summary_df["coeffs_lower_std"],
        summary_df["coeffs_upper_std"],
        color=plot_config["coefficient_fill"]["color"],
        alpha=plot_config["coefficient_fill"]["alpha"],
        label="Std. Dev. Range",
    )

    ax2.plot(
        summary_df.index,
        summary_df["avg_intercepts"],
        color=plot_config["average_line"]["color"],
        linewidth=plot_config["average_line"]["linewidth"],
        label=f"Average {data_cols['intercept_col']}",
    )
    ax2.fill_between(
        summary_df.index,
        summary_df["intercepts_lower_std"],
        summary_df["intercepts_upper_std"],
        color=plot_config["intercept_fill"]["color"],
        alpha=plot_config["intercept_fill"]["alpha"],
        label="Std. Dev. Range",
    )

    # Configure axes
    ax1.set_ylabel(data_cols["coefficient_col"])
    ax2.set_ylabel(data_cols["intercept_col"])

    for ax in [ax1, ax2]:
        visualize.configure_axis(ax=ax, hide_spines=["top", "right"])
        visualize.configure_legend(ax=ax, fontsize_base=10)
        visualize.configure_datetime_ticks(ax=ax)
        ax.axhline(0, color="red", linestyle=":", linewidth=1)
        ax.text(
            0.02,
            0.05,
            f"{len(regpoints_data.index.unique())} grid points",
            transform=ax.transAxes,
            fontsize=10,
            fontweight="bold",
        )

    fig.suptitle(
        f"{district_name} - {layer_name}", fontsize=14, fontweight="bold"
    )
    fig.tight_layout(rect=[0, 0, 1, 0.96])

    # Save outputs
    fig_outpath = os.path.join(output_folder, f"{district_name}.png")
    visualize.save_figure_with_exact_dimensions(
        fig=fig,
        savepath=fig_outpath,
        width_px=output_config["width_px"],
        height_px=output_config["height_px"],
        dpi=output_config["dpi"],
    )

    if output_config["save_excel"]:
        excel_outpath = os.path.join(
            output_folder, f"{district_name}_summary_data.xlsx"
        )
        summary_df.to_excel(excel_outpath)

    plt.close(fig)

In [7]:
# ==============================================================================
# USER CONFIGURATION - Modify these parameters
# ==============================================================================

# --- File Paths ---
REGPOINTS_FILE = (
    "3__PredictionOutput/Layer_3.feather"  # Single input feather file
)
GTWR_LAYER = os.path.basename(REGPOINTS_FILE).split(".")[0]

SUBSIDENCE_AREAS_SHP = r"D:\1000_SCRIPTS\003_Project002\20250222_GTWR001\2_KrigingInterpolation\points_fld\support\sub_area_polygons.shp"
OUTPUT_FOLDER = os.path.join("figure_timeseries_coeffs", GTWR_LAYER)

# --- Column Names Configuration ---
DATA_COLUMNS = {
    "time_col": "pred_time",  # Time period column name
    "coefficient_col": "CUMDISP_coef",  # Coefficient column to analyze
    "intercept_col": "X_Intercept__coef",  # Intercept column to analyze
    "x_coord": "X_TWD97",  # X coordinate column
    "y_coord": "Y_TWD97",  # Y coordinate column
}

# District polygon columns
DISTRICT_COLUMNS = {
    "district_name": "E_NAME",  # District name column in shapefile
}

# --- Analysis Parameters ---
REFERENCE_TIME_PERIOD = 1  # Time period for unique spatial locations
START_DATE = pd.Timestamp(year=2016, month=5, day=1)  # Analysis start date

# --- Plotting Configuration ---
PLOT_CONFIG = {
    "figure_size": (12, 9),
    "individual_points": {
        "marker": "o",
        "color": "lightgrey",
        "alpha": 0.3,
        "markersize": 2,
    },
    "average_line": {"color": "black", "linewidth": 1},
    "coefficient_fill": {"color": "green", "alpha": 0.2},
    "intercept_fill": {"color": "blueviolet", "alpha": 0.2},
}

# --- Output Settings ---
OUTPUT_CONFIG = {
    "width_px": 2000,
    "height_px": 1120,
    "dpi": 300,
    "save_excel": True,  # Save summary data as Excel files
}

# --- Processing Options ---
DISTRICTS_TO_PROCESS = (
    None  # List of districts, or None for all (e.g., ["TUKU", "XIZHOU"])
)
# LAYER_NAME = "layer_1"  # Name for output files and plot titles

In [8]:
# ==============================================================================
# MAIN PROCESSING
# ==============================================================================

print("--- Starting Spatial-Temporal Coefficient Analysis ---")

# Load subsidence areas
subsidence_areas = gpd.read_file(SUBSIDENCE_AREAS_SHP)
subsidence_areas.to_crs(crs="EPSG:3826", inplace=True)

# Check if input file exists
if not os.path.exists(REGPOINTS_FILE):
    print(f"Error: Input file '{REGPOINTS_FILE}' not found")
    exit()

print(f"Processing file: {REGPOINTS_FILE}")

# Setup output folder
os.makedirs(OUTPUT_FOLDER, exist_ok=True)

# Load data
regpoints_df = pd.read_feather(REGPOINTS_FILE)
regpoints_df = regpoints_df.set_index("PointKey")

# Create unique points template
unique_regpoints = regpoints_df.query(
    f"{DATA_COLUMNS['time_col']} == @REFERENCE_TIME_PERIOD"
).copy()
unique_regpoints = geospatial.convert_to_geodata(
    df=unique_regpoints,
    xcoord_col=DATA_COLUMNS["x_coord"],
    ycoord_col=DATA_COLUMNS["y_coord"],
    crs_epsg="EPSG:3826",
)

# Assign points to districts
print("Assigning points to districts...")
points_by_district = {}
for idx, polygon in subsidence_areas.iterrows():
    district_name = polygon[DISTRICT_COLUMNS["district_name"]]
    regpoints_in_district = geospatial.find_points_within_polygon(
        points_gdf=unique_regpoints, polygon_geom=polygon.geometry
    )
    points_by_district[district_name] = regpoints_in_district

# Determine which districts to process
if DISTRICTS_TO_PROCESS:
    districts_to_analyze = [
        d for d in DISTRICTS_TO_PROCESS if d in points_by_district
    ]
else:
    districts_to_analyze = list(points_by_district.keys())

print(f"Processing {len(districts_to_analyze)} districts...")

# Process each district
for district in tqdm(districts_to_analyze, desc=f"Analyzing {GTWR_LAYER}"):

    # Get points in district
    district_pointkeys = points_by_district[district].index
    if len(district_pointkeys) == 0:
        continue

    # Extract district time series data
    district_data = regpoints_df.loc[district_pointkeys, :].sort_values(
        DATA_COLUMNS["time_col"]
    )

    # Create plot
    create_district_plot(
        district_data,
        district,
        GTWR_LAYER,
        OUTPUT_FOLDER,
        DATA_COLUMNS,
        PLOT_CONFIG,
        OUTPUT_CONFIG,
        START_DATE,
    )

print(f"\n--- Analysis Complete ---")
print(f"Results saved to: {OUTPUT_FOLDER}")

--- Starting Spatial-Temporal Coefficient Analysis ---
Processing file: 3__PredictionOutput/Layer_3.feather
Assigning points to districts...
Processing 7 districts...


Analyzing Layer_3:   0%|          | 0/7 [00:00<?, ?it/s]


--- Analysis Complete ---
Results saved to: figure_timeseries_coeffs\Layer_3
