In [1]:
"""
InSAR Time Series Analysis with Predictive Model Integration - Class-based Implementation
"""

from appgeopy import *
from my_packages import *


class InSARProcessor:
    """
    Class for processing InSAR measurements with predictive model integration.
    Maintains state variables like current_layer for easy access in Jupyter notebooks.
    """

    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    def __init__(self, excel_file, top_folder):
        """
        Initialize processor with data sources.

        Args:
            excel_file: Path to InSAR measurement Excel file
            top_folder: Directory containing HDF5 model outputs
        """
        # Load and prepare InSAR data
        self.insar_df = pd.read_excel(excel_file)
        self.insar_df["time"] = pd.to_datetime(self.insar_df["time"])
        self.insar_df = self.insar_df.set_index("time")

        # Get unique identifiers
        self.unique_pointkeys = sorted(self.insar_df["PointKey"].unique())

        # Discover model files
        self.coeff_files = glob(
            os.path.join(top_folder, "*coeff*", "L*", "*h5")
        )
        self.intercept_files = glob(
            os.path.join(top_folder, "*intercept*", "L*", "*h5")
        )
        self.file_pairs = list(zip(self.coeff_files, self.intercept_files))

        # State variables
        self.current_layer = None
        self.current_coeff_data = None
        self.current_intercept_data = None

    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    @staticmethod
    def dict_to_table(data_dict, pointkey, quantity="future_predictions"):
        """Convert HDF5 dictionary data to DataFrame."""
        time_arr = pd.to_datetime(
            [x.decode("utf-8") for x in data_dict[pointkey][quantity]["index"]]
        )
        values_arr = data_dict[pointkey][quantity]["values"]
        return pd.DataFrame(data=values_arr, index=time_arr, columns=[quantity])

    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    def load_layer(self, layer_idx):
        """
        Load HDF5 data for specific layer and update state.

        Args:
            layer_idx: Index of layer to load
        """
        coeff_file, intercept_file = self.file_pairs[layer_idx]
        self.current_layer = os.path.dirname(coeff_file).split("\\")[-1]

        self.current_coeff_data, _ = gwatertools.h5pytools.open_HDF5(coeff_file)
        self.current_intercept_data, _ = gwatertools.h5pytools.open_HDF5(
            intercept_file
        )

        print(f"Loaded layer: {self.current_layer}")

    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    def process_pointkey(self, pointkey, quantity="future_predictions"):
        """
        Process single pointkey with currently loaded layer.

        Args:
            pointkey: Point identifier to process
            quantity: Data quantity to extract

        Returns:
            pd.DataFrame: Merged observational and predicted data
        """
        if self.current_coeff_data is None:
            raise ValueError("No layer loaded. Call load_layer() first.")

        # Get observational data
        insar_data = self.insar_df.query("PointKey==@pointkey").copy()

        # Extract predictions
        pred_coeffs = self.dict_to_table(
            self.current_coeff_data, pointkey, quantity
        )
        pred_intercept = self.dict_to_table(
            self.current_intercept_data, pointkey, quantity
        )

        # Merge with observations
        insar_data["pred_coeffs"] = insar_data.index.map(pred_coeffs[quantity])
        insar_data["pred_intercept"] = insar_data.index.map(
            pred_intercept[quantity]
        )

        # Add metadata
        insar_data = insar_data.dropna(how="any")
        insar_data["layer"] = self.current_layer
        insar_data = insar_data.reset_index(drop=False)

        return insar_data

    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    def process_layer_all_points(self, layer_idx):
        """Process all pointkeys for specific layer."""
        self.load_layer(layer_idx)
        results = []

        for pointkey in self.unique_pointkeys:
            try:
                result = self.process_pointkey(pointkey)
                results.append(result)
            except KeyError:
                continue

        return (
            pd.concat(results, ignore_index=True) if results else pd.DataFrame()
        )

    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    def process_point_all_layers(self, pointkey):
        """Process all layers for specific pointkey."""
        results = []

        for i in range(len(self.file_pairs)):
            self.load_layer(i)
            try:
                result = self.process_pointkey(pointkey)
                results.append(result)
            except KeyError:
                continue

        return (
            pd.concat(results, ignore_index=True) if results else pd.DataFrame()
        )

    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    def get_info(self):
        """Display current processor state."""
        print(f"Loaded pointkeys: {len(self.unique_pointkeys)}")
        print(f"Available layers: {len(self.file_pairs)}")
        print(f"Current layer: {self.current_layer}")

In [2]:
# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
# USAGE IN JUPYTER
# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =

# Initialize processor
processor = InSARProcessor(
    excel_file="20250714_GTWR_InSAR_2016_to_2024.xlsx",
    top_folder=r"D:\1000_SCRIPTS\003_Project002\20250222_GTWR001\7_CurveFitting\curvefit_scripts\test003",
)

# Batch processing examples
for layer_index in range(4):
    output_table = processor.process_layer_all_points(layer_index)

    output_table = output_table.sort_values(by=["STATION", "monthly"])

    output_table["pred_MLCW"] = (
        output_table["InSAR_CUMDISP"] * output_table["pred_coeffs"]
        + output_table["pred_intercept"]
    )

    current_layer = processor.current_layer

    today_string = datetime.now().strftime("%Y%m%d")
    output_table.to_csv(
        f"{today_string}_Future_InSAR_and_Predicted_Coeffs_{current_layer}.csv",
        index=False,
    )

Loaded layer: Layer_1
Loaded layer: Layer_2
Loaded layer: Layer_3
Loaded layer: Layer_4
