In [1]:
from appgeopy import *
from my_packages import *
from scipy.stats import pearsonr
from sklearn.preprocessing import StandardScaler  # or MinMaxScaler

In [2]:
mainfld = r"D:\1000_SCRIPTS\003_Project002\20250222_GTWR001\5_GTWR_Prediction"
files = glob(os.path.join("calib_input/", "*Layer_*.csv"))
files

['calib_input\\20251014_GTWR_InputData_MLCW_InSAR_Layer_1.csv',
 'calib_input\\20251014_GTWR_InputData_MLCW_InSAR_Layer_2.csv',
 'calib_input\\20251014_GTWR_InputData_MLCW_InSAR_Layer_3.csv',
 'calib_input\\20251014_GTWR_InputData_MLCW_InSAR_Layer_4.csv',
 'calib_input\\20251014_GTWR_InputData_MLCW_InSAR_Layer_All.csv']

# correlation plot

In [3]:
# select_file = files[0]
for select_file in files[:]:

    curent_layer = (
        "Layer_" + os.path.basename(select_file).split("_")[-1].split(".")[0]
    )

    # Load data
    df = pd.read_csv(select_file)

    # Get unique stations
    stations = df["STATION"].unique()

    # Calculate correlations for each station
    results = []
    for station in stations:
        station_data = df[df["STATION"] == station]
        if len(station_data) >= 3:  # Need at least 3 points for correlation
            r, p = pearsonr(station_data[curent_layer], station_data["CUMDISP"])
            results.append(
                {
                    "STATION": station,
                    "r": r,
                    "p_value": p,
                    "n": len(station_data),
                }
            )

    results_df = pd.DataFrame(results)

    # Create correlation matrix visualization
    fig, ax = plt.subplots(figsize=(12, 8))

    # Sort by correlation value for better visualization
    results_df = results_df.sort_values("r", ascending=False)

    # Create bar plot
    colors = ["red" if r < 0 else "blue" for r in results_df["r"]]
    bars = ax.barh(
        results_df["STATION"], results_df["r"], color=colors, alpha=0.7
    )

    # Add reference line at r=0
    ax.axvline(x=0, color="black", linewidth=1, linestyle="-")

    # Add significance markers (p < 0.05)
    for i, (idx, row) in enumerate(results_df.iterrows()):
        if row["p_value"] < 0.05:
            ax.text(
                row["r"],
                i,
                " *",
                fontsize=16,
                va="center",
                ha="left" if row["r"] > 0 else "right",
            )

    # Styling
    ax.set_xlabel("Pearson Correlation (r)", fontsize=12, fontweight="bold")
    ax.set_ylabel("Station", fontsize=12, fontweight="bold")
    ax.set_title(
        f"Correlation between {curent_layer} and CUMDISP by Station\n(* indicates p < 0.05)",
        fontsize=14,
        fontweight="bold",
    )
    ax.set_xlim(-1, 1)
    ax.grid(axis="x", alpha=0.3, linestyle="--")

    fig.tight_layout()
    visualize.save_figure(fig=fig, savepath=f"correlation_{curent_layer}.png")
    plt.close()

# scatter plot

In [4]:
FIGURE_SIZE = (16.5, 11.7)  # A3 landscape
OUTPUT_DPI = 300

# select_file = files[0]
for select_file in files[-1:]:

    curent_layer = (
        "Layer_" + os.path.basename(select_file).split("_")[-1].split(".")[0]
    )
    # ==============================================================================
    # LOAD AND PREPARE DATA
    # ==============================================================================
    df = pd.read_csv(select_file)

    stations = sorted(df["STATION"].unique())
    n_stations = len(stations)

    # Calculate grid dimensions (aim for roughly square layout)
    n_cols = int(np.ceil(np.sqrt(n_stations)))
    n_rows = int(np.ceil(n_stations / n_cols))

    # ==============================================================================
    # CREATE SUBPLOTS
    # ==============================================================================
    fig, axes = plt.subplots(
        n_rows, n_cols, figsize=FIGURE_SIZE, sharex=False, sharey=False
    )
    axes = axes.flatten()  # Convert to 1D array for easy indexing

    # Main title
    fig.suptitle(
        f"{curent_layer}",
        fontsize=20,
        fontweight="bold",
        y=0.95,
    )

    # ==============================================================================
    # PLOT EACH STATION
    # ==============================================================================
    for idx, station in enumerate(stations):
        ax = axes[idx]

        # Get station data
        station_data = df[df["STATION"] == station]
        x = station_data[curent_layer].values
        y = station_data["CUMDISP"].values

        # Calculate correlation
        if len(x) >= 3:
            r, p = pearsonr(x, y)

            # Scatter plot
            ax.scatter(x, y, alpha=0.6, s=30, edgecolors="black", linewidth=0.5)

            # Best fit line
            z = np.polyfit(x, y, 1)
            p_fit = np.poly1d(z)
            x_line = np.linspace(x.min(), x.max(), 100)
            ax.plot(x_line, p_fit(x_line), "r-", linewidth=2, alpha=0.7)

            # Statistics text
            sig_marker = "*" if p < 0.05 else ""
            stats_text = f"r = {r:.3f}{sig_marker}\nn = {len(x)}"
            ax.text(
                0.05,
                0.95,
                stats_text,
                transform=ax.transAxes,
                va="top",
                fontsize=9,
                family="monospace",
                bbox=dict(boxstyle="round", facecolor="white", alpha=0.8),
            )

        # Styling
        ax.set_title(station, fontsize=11, fontweight="bold")
        ax.set_xlabel(curent_layer, fontsize=9)
        ax.set_ylabel("CUMDISP", fontsize=9)
        ax.grid(True, alpha=0.3, linestyle="--")
        ax.tick_params(labelsize=8)

    # Hide unused subplots
    for idx in range(n_stations, len(axes)):
        axes[idx].axis("off")

    # ==============================================================================
    # SAVE AND DISPLAY
    # ==============================================================================
    plt.tight_layout(rect=[0, 0, 1, 0.96])
    fig.tight_layout()
    visualize.save_figure(fig=fig, savepath=f"scatter_plot_{curent_layer}.png")
    plt.close()

# timeseries plot

In [5]:
FIGURE_SIZE = (16.5, 11.7)  # A3 landscape
OUTPUT_DPI = 300
SCALE_DATA = True

for select_file in files[:]:

    current_layer = (
        "Layer_" + os.path.basename(select_file).split("_")[-1].split(".")[0]
    )

    df = pd.read_csv(select_file)
    df["time"] = pd.to_datetime(df["time"])

    stations = sorted(df["STATION"].unique())
    n_stations = len(stations)

    n_cols = int(np.ceil(np.sqrt(n_stations)))
    n_rows = int(np.ceil(n_stations / n_cols))

    # Create subplots with tight spacing
    fig, axes = plt.subplots(
        n_rows, n_cols, figsize=FIGURE_SIZE, sharex=True, sharey=False
    )
    axes = axes.flatten()

    fig.suptitle(f"{current_layer}", fontsize=20, fontweight="bold", y=0.96)

    for idx, station in enumerate(stations):
        ax = axes[idx]
        row_idx = idx // n_cols
        col_idx = idx % n_cols

        station_data = df[df["STATION"] == station]
        time_arr = station_data["time"]
        x = station_data[current_layer].values
        y = station_data["CUMDISP"].values

        if SCALE_DATA:
            scaler = StandardScaler()
            x = scaler.fit_transform(x.reshape(-1, 1)).flatten()
            y = scaler.fit_transform(y.reshape(-1, 1)).flatten()

        ax.plot(time_arr, x, label="MLCW", linewidth=2)
        ax.plot(time_arr, y, label="InSAR", linewidth=2)

        ax.set_title(station, fontsize=11, fontweight="bold")

        # Only leftmost column gets y-label
        if col_idx == 0:
            ylabel = (
                "Scaled Cumulative\nDisplacement (mm)"
                if SCALE_DATA
                else "Cumulative\nDisplacement (mm)"
            )
            ax.set_ylabel(ylabel, fontsize=9, fontweight="bold")

        # Bottom row: show ticks but no label
        if row_idx == n_rows - 1:
            ax.tick_params(axis="x", labelbottom=True, labelsize=8, rotation=45)
        else:
            ax.tick_params(axis="x", labelbottom=False)

        ax.grid(True, alpha=0.3, linestyle="--")
        ax.tick_params(axis="y", labelsize=8)

    for idx in range(n_stations, len(axes)):
        axes[idx].axis("off")

    # Single legend
    handles, labels = axes[0].get_legend_handles_labels()
    fig.legend(
        handles,
        labels,
        loc="upper center",
        bbox_to_anchor=(0.5, 0.94),
        ncol=2,
        fontsize=12,
        frameon=True,
        fancybox=True,
    )

    # Minimize spacing between subplots
    plt.subplots_adjust(
        left=0.05,
        right=0.98,
        top=0.92,
        bottom=0.05,
        hspace=0.15,  # Vertical spacing
        wspace=0.15,  # Horizontal spacing
    )

    fig.autofmt_xdate(ha="center")

    savename = (
        f"scaled_timeseries_{current_layer}.png"
        if SCALE_DATA
        else f"timeseries_{current_layer}.png"
    )
    visualize.save_figure(fig=fig, savepath=savename)
    plt.close()