In [1]:
from appgeopy import *
from my_packages import *

In [2]:
# Import the PySAL libraries for spatial analysis
from esda import Moran, Moran_Local
from libpysal.weights import KNN

# Set pandas to display all columns
pd.set_option("display.max_columns", None)


class SpatioTemporalMoranAnalyzer:
    """
    A class to load spatio-temporal data and run Moran's I analysis
    for each time step.

    Parameters:
    ----------
    csv_path : str
        The file path to the input CSV file.

    x_col : str
        The column name for X coordinates.

    y_col : str
        The column name for Y coordinates.

    time_col : str
        The column name for the time step.

    crs_epsg : int
        The EPSG code for the coordinate system (e.g., 3826 for TWD97).
    """

    def __init__(self, csv_path, x_col, y_col, time_col, crs_epsg):
        # --- 1. Load and Prepare Data ---
        print(f"Loading data from {csv_path}...")
        try:
            # Read the CSV file
            df = pd.read_csv(csv_path)

            # Convert the DataFrame to a GeoDataFrame
            # This uses geopandas to create a 'geometry' column from X/Y
            self.gdf = gpd.GeoDataFrame(
                df,
                geometry=gpd.points_from_xy(df[x_col], df[y_col]),
                crs=f"EPSG:{crs_epsg}",
            )

            self.time_col = time_col
            self.unique_times = np.sort(self.gdf[self.time_col].unique())
            print(
                f"Data loaded successfully. Found {len(self.gdf)} rows and {len(self.unique_times)} unique time steps."
            )

            # These will store our results
            self.global_results = []
            self.local_results = {}

        except FileNotFoundError:
            print(f"Error: File not found at {csv_path}")
            self.gdf = None
        except KeyError as e:
            print(f"Error: Column not found. Make sure {e} is in your CSV.")
            self.gdf = None

    def _build_weights(self, current_gdf, k):
        """
        Private helper method to build a spatial weights matrix (w).
        We use KNN (K-Nearest Neighbors) which is robust for point data.
        """
        # Get coordinates as a NumPy array
        coords = np.column_stack(
            [current_gdf.geometry.x, current_gdf.geometry.y]
        )

        # Build KNN weights
        w = KNN.from_array(coords, k=k)

        # Row-standardize the weights (common practice)
        w.transform = "R"
        return w

    def run_analysis(self, var_col, k=5, permutations=999):
        """
        Runs the Global and Local Moran's I analysis for every time step.

        Parameters:
        ----------
        var_col : str
            The column name of the variable you want to test (e.g., 'InSAR_diffdisp').

        k : int
            The number of neighbors to use for KNN (default is 5).

        permutations : int
            The number of permutations for the p-value simulation (default is 999).
        """
        if self.gdf is None:
            print("Cannot run analysis. Data was not loaded correctly.")
            return

        if var_col not in self.gdf.columns:
            print(f"Error: Variable column '{var_col}' not found in the data.")
            return

        print(f"Running analysis for '{var_col}' with k={k}...")

        # --- This is the loop your code was missing ---
        for t in self.unique_times:
            # 1. Isolate the data for this one time step
            g_t = self.gdf[self.gdf[self.time_col] == t].copy()

            # Skip if there's no data for this time
            if g_t.empty:
                continue

            # 2. Build the spatial weights (w) for these stations
            w = self._build_weights(g_t, k=k)

            # 3. Get the variable values as an array
            y = g_t[var_col].values.astype(float)

            # --- 4. Calculate Global Moran's I ---
            # We pass the variable 'y' and the weights 'w'
            moran_global = Moran(y, w, permutations=permutations)

            # Store the global results
            self.global_results.append(
                {
                    "time": t,
                    "n": len(g_t),
                    "I": moran_global.I,
                    "p_value": moran_global.p_sim,  # p-value from simulation
                    "z_score": moran_global.z_sim,  # z-score from simulation
                }
            )

            # --- 5. Calculate Local Moran's I (LISA) ---
            moran_local = Moran_Local(y, w, permutations=permutations)

            # Add LISA results to the original GeoDataFrame slice
            # This makes it easy to map later!
            g_t["lisa_I"] = moran_local.Is  # Local I value
            g_t["lisa_q"] = moran_local.q  # Quadrant (1=HH, 2=LH, 3=LL, 4=HL)
            g_t["lisa_p_value"] = moran_local.p_sim  # Local p-value
            g_t["lisa_significant"] = (
                moran_local.p_sim < 0.05
            )  # Is it significant?

            # Store the full GeoDataFrame with local results
            self.local_results[t] = g_t

        print("Analysis complete.")

    def get_global_results_df(self):
        """
        Returns a DataFrame of the global Moran's I results.
        """
        if not self.global_results:
            print("No results. Did you run .run_analysis() first?")
            return pd.DataFrame()

        return pd.DataFrame(self.global_results).set_index("time")

    def get_local_results_for_time(self, time_step):
        """
        Returns a GeoDataFrame with local (LISA) results for a specific time step.
        """
        if not self.local_results:
            print("No results. Did you run .run_analysis() first?")
            return gpd.GeoDataFrame()

        if time_step not in self.local_results:
            print(f"Error: Time step {time_step} not found or has no results.")
            return gpd.GeoDataFrame()

        return self.local_results[time_step]

In [3]:
files = glob(os.path.join("calib_diffdisp/", "20251016*.csv"))
files

['calib_diffdisp\\20251016_GTWR_InputData_DiffDisp_Layer_1.csv',
 'calib_diffdisp\\20251016_GTWR_InputData_DiffDisp_Layer_2.csv',
 'calib_diffdisp\\20251016_GTWR_InputData_DiffDisp_Layer_3.csv',
 'calib_diffdisp\\20251016_GTWR_InputData_DiffDisp_Layer_4.csv',
 'calib_diffdisp\\20251016_GTWR_InputData_DiffDisp_Layer_All.csv']

In [18]:
# --------------------------------------------------------------------
# --- EXAMPLE: How to use this class in a Jupyter Notebook cell ---
# --------------------------------------------------------------------

# 1. Define your file and column names
# (Replace with your actual file path and column names)
CSV_FILE = files[0]
X_COL = "X_TWD97"  # Your X coordinate column
Y_COL = "Y_TWD97"  # Your Y coordinate column
TIME_COL = "monthly"  # Your time column
VAR_TO_TEST = "Layer_1"  # Variable to analyze
EPSG = 3826  # Your EPSG code

# 2. Create an instance of the analyzer
# This will load and prepare the data
analyzer = SpatioTemporalMoranAnalyzer(
    csv_path=CSV_FILE,
    x_col=X_COL,
    y_col=Y_COL,
    time_col=TIME_COL,
    crs_epsg=EPSG,
)

# 3. Run the analysis for all time steps
# This will loop through all 67 time steps
analyzer.run_analysis(var_col=VAR_TO_TEST, k=8)



Loading data from calib_diffdisp\20251016_GTWR_InputData_DiffDisp_Layer_1.csv...
Data loaded successfully. Found 1943 rows and 67 unique time steps.
Running analysis for 'Layer_1' with k=8...
Analysis complete.


In [19]:
# 4. Get the table of Global Moran's I results
global_results_table = analyzer.get_global_results_df()
print("\n--- Global Moran's I Results (all times) ---")
print(global_results_table)


--- Global Moran's I Results (all times) ---
       n         I  p_value   z_score
time                                 
1     29  0.180689    0.005  3.425285
2     29 -0.070103    0.332 -0.530574
3     29 -0.014032    0.337  0.278091
4     29  0.046799    0.118  1.132914
5     29 -0.095069    0.198 -0.863774
...   ..       ...      ...       ...
63    29 -0.047164    0.492 -0.159886
64    29 -0.014627    0.353  0.257568
65    29 -0.096403    0.140 -1.082847
66    29  0.224483    0.005  3.757075
67    29 -0.021921    0.366  0.219213

[67 rows x 4 columns]


In [20]:
# 5. Get the Local (LISA) results for one time step
# (e.g., the first time step)
first_time_step = analyzer.unique_times[0]
local_results_map = analyzer.get_local_results_for_time(first_time_step)

print(f"\n--- Local Moran's I Results (for time = {first_time_step}) ---")
# This is a GeoDataFrame, you can map it!
print(local_results_map[['geometry', 'lisa_I', 'lisa_q', 'lisa_significant']])


--- Local Moran's I Results (for time = 1) ---
                            geometry    lisa_I  lisa_q  lisa_significant
0     POINT (178859.959 2608228.949) -0.385170       4              True
67    POINT (173088.151 2608157.277)  0.769696       3              True
134   POINT (175783.145 2616755.314)  1.771461       3              True
201   POINT (190429.149 2629865.287)  0.180894       1              True
268   POINT (171859.184 2631894.183) -0.003736       4             False
335   POINT (179785.176 2632016.259)  0.086892       1             False
402   POINT (189084.098 2626508.298)  0.050670       1             False
469   POINT (171150.146 2629140.251) -0.010994       4              True
536   POINT (183487.976 2620454.902)  0.237646       3              True
603   POINT (197073.457 2649583.283)  0.361638       1              True
670   POINT (192041.068 2623606.307)  0.034392       1             False
737   POINT (163506.218 2614756.247) -0.563350       4              True
804

In [21]:
show(local_results_map)

0
Loading ITables v2.4.5 from the internet...  (need help?)


In [22]:
# --- Step 1: Prepare the data for mapping (Same as before) ---
def get_cluster_name(row):
    if not row['lisa_significant']:
        return "Not Significant"
    elif row['lisa_q'] == 1:
        return "High-High (Hot Spot)"
    elif row['lisa_q'] == 2:
        return "Low-High (Outlier)"
    elif row['lisa_q'] == 3:
        return "Low-Low (Cold Spot)"
    elif row['lisa_q'] == 4:
        return "High-Low (Outlier)"
    return "Other"

local_results_map['cluster_type'] = local_results_map.apply(get_cluster_name, axis=1)

# --- Step 2: Define a custom color map (Same as before) ---
color_mapping = {
    "High-High (Hot Spot)": "red",
    "Low-Low (Cold Spot)": "blue",
    "Low-High (Outlier)": "lightgreen",
    "High-Low (Outlier)": "purple",
    "Not Significant": "lightgray"
}

# Define the order of categories (Same as before)
categories_ordered = [
    "High-High (Hot Spot)",
    "Low-Low (Cold Spot)",
    "Low-High (Outlier)",
    "High-Low (Outlier)",
    "Not Significant"
]
local_results_map['cluster_type'] = local_results_map['cluster_type'].astype(
    pd.CategoricalDtype(categories=categories_ordered, ordered=True)
)


# === START: THE FIX ===
# Create a LIST of colors from our dictionary, in the correct order.
# This list will match the category codes (0 -> 'red', 1 -> 'blue', etc.)
color_list = [color_mapping[category] for category in categories_ordered]
# === END: THE FIX ===


# --- Step 3: Map the results using explore() ---
m = local_results_map.explore(
    column="cluster_type",
    cmap=color_list,                # <-- USE THE NEW color_list HERE
    categorical=True,
    legend=True,
    legend_kwds={
        'caption': "LISA Clusters",
        'loc': 'lower right'
    },
    tooltip=["time", "cluster_type", "lisa_I", VAR_TO_TEST],
    popup=True,
    style_kwds={
        'fillOpacity': 0.8,
        'radius': 10
    },
    name=f"LISA Clusters (Time {local_results_map['time'].iloc[0]})"
)

# Display the map
m