In [1]:
"""
GTWR Model Output Integration with Predicted Coefficients and MLCW Data
Streamlined workflow for processing coefficient predictions with GTWR outputs and MLCW time series.
"""

from appgeopy import *
from my_packages import *

# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
# CONFIGURATION
# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =

# File paths
GTWR_DIR = r"D:\1000_SCRIPTS\003_Project002\20250222_GTWR001\5_GTWR_Prediction"
MLCW_H5_PATH = r"D:\1000_SCRIPTS\003_Project002\20250222_GTWR001\1_PrepareDatasets\1_MLCWs\20250415_MLCW_CRFP_monthly_v2.h5"

# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
# UTILITY FUNCTIONS
# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =


def discover_file_pairs():
    """Discover and pair coefficient and GTWR model files."""
    predict_coeffs_files = glob("*Predicted_Coeffs*.csv")
    gtwr_model_files = glob(os.path.join(GTWR_DIR, "*gtwr_Layer*.csv"))
    return list(zip(predict_coeffs_files, gtwr_model_files))


def load_and_prepare_data(coeffs_file, gtwr_file):
    """Load and prepare data from file pair."""
    predicted_coeffs_df = pd.read_csv(coeffs_file)
    gtwr_model_output_df = pd.read_csv(gtwr_file)

    # Generate PointKey for GTWR data
    gtwr_model_output_df["PointKey"] = [
        f"X{int(x*1000)}Y{int(y*1000)}"
        for x, y in zip(
            gtwr_model_output_df["X_TWD97"], gtwr_model_output_df["Y_TWD97"]
        )
    ]

    unique_pointkeys = predicted_coeffs_df["PointKey"].unique()
    return predicted_coeffs_df, gtwr_model_output_df, unique_pointkeys


def process_pointkey(pointkey, predicted_coeffs_df, gtwr_model_output_df):
    """Process single pointkey by merging coefficients with GTWR outputs."""
    coeffs_data = predicted_coeffs_df.query("PointKey==@pointkey").copy()
    gtwr_data = gtwr_model_output_df.query("PointKey==@pointkey").copy()

    coeffs_data = coeffs_data.set_index("monthly")
    gtwr_data = gtwr_data.set_index("time_stamp")

    # Map GTWR values to coefficient timepoints
    for colname in ["y", "yhat"]:
        coeffs_data[colname] = coeffs_data.index.map(gtwr_data[colname])

    return coeffs_data.reset_index()


def load_mlcw_data(station_name, layer_idx):
    """Load MLCW data for specific station and layer."""
    mlcw_data, _ = gwatertools.h5pytools.open_HDF5(MLCW_H5_PATH)
    station_data = mlcw_data[station_name]

    values = station_data["monthly_values"]["compactbylayer_PCA"][layer_idx]
    dates = pd.to_datetime(
        [x.decode("utf-8") for x in station_data["monthly_date"]]
    )

    return pd.DataFrame(
        data=values, index=dates, columns=[f"MLCW_Layer_{layer_idx+1}"]
    )


# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
# MAIN PROCESSING CLASS
# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =


class GTWRProcessor:
    def __init__(self):
        self.file_pairs = discover_file_pairs()
        print(f"Found {len(self.file_pairs)} file pairs")

        self.current_pair_idx = None
        self.coeffs_df = None
        self.gtwr_df = None
        self.pointkeys = None

    def load_pair(self, pair_idx):
        """Load specific file pair."""
        self.current_pair_idx = pair_idx
        self.coeffs_df, self.gtwr_df, self.pointkeys = load_and_prepare_data(
            *self.file_pairs[pair_idx]
        )
        print(f"Loaded pair {pair_idx}: {len(self.pointkeys)} pointkeys")

    def process_single_point(self, pointkey_idx, include_mlcw=True):
        """Process single pointkey with optional MLCW integration."""
        if self.coeffs_df is None:
            raise ValueError("No data loaded. Call load_pair() first.")

        pointkey = self.pointkeys[pointkey_idx]
        station_name = self.coeffs_df.query("PointKey==@pointkey").STATION.iloc[
            0
        ]

        # Process GTWR data
        result = process_pointkey(pointkey, self.coeffs_df, self.gtwr_df)
        result = result.set_index("time")

        # Add MLCW data if requested
        if include_mlcw:
            mlcw_df = load_mlcw_data(station_name, self.current_pair_idx)
            layer_col = f"MLCW_Layer_{self.current_pair_idx+1}"

            result[layer_col] = result.index.map(mlcw_df[layer_col])
            # Apply relative displacement (subtract first value)
            result[layer_col] = result[layer_col] - result[layer_col].iloc[0]

        return result, station_name

    def process_all_points(self, include_mlcw=True):
        """Process all pointkeys for current pair."""
        results = []
        for i, pointkey in enumerate(self.pointkeys):
            try:
                result, station = self.process_single_point(i, include_mlcw)
                result["station"] = station
                results.append(result.reset_index())
            except (KeyError, IndexError):
                continue

        return (
            pd.concat(results, ignore_index=True) if results else pd.DataFrame()
        )


# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
# EXECUTION
# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =

# Initialize processor
today_string = datetime.now().strftime("%Y%m%d")

processor = GTWRProcessor()
for pair_idx in range(4):
    processor.load_pair(pair_idx)
    output_table = processor.process_all_points()
    output_table.to_csv(
        f"{today_string}_Future_and_Predicted_MLCW_Layer_{pair_idx+1}.csv",
        index=False,
    )
# # Process specific pair and pointkey
# processor.load_pair(0)
# single_result, station_name = processor.process_single_point(
#     10, include_mlcw=True
# )

# print(f"Processed station: {station_name}")
# single_result

Found 4 file pairs


TypeError: 'NoneType' object is not iterable

# GTWR Processor System Architecture

## Overview
The GTWR processor implements a modular workflow for integrating coefficient predictions with GTWR model outputs and MLCW time series data. The system follows a class-based architecture that maintains state across processing operations.

## Component Relationships

### Configuration Layer
```
CONFIGURATION
├── GTWR_DIR (file path)
└── MLCW_H5_PATH (file path)
```

### Utility Function Dependencies
```
discover_file_pairs()
├── Searches for "*Predicted_Coeffs*.csv"
├── Searches for "*gtwr_Layer*.csv" in GTWR_DIR
└── Returns paired file tuples

load_and_prepare_data(coeffs_file, gtwr_file)
├── Loads coefficient predictions DataFrame
├── Loads GTWR model outputs DataFrame
├── Generates PointKey identifiers
└── Returns (coeffs_df, gtwr_df, unique_pointkeys)

process_pointkey(pointkey, coeffs_df, gtwr_df)
├── Filters data by PointKey
├── Sets indices for temporal alignment
├── Maps GTWR values to coefficient timepoints
└── Returns merged DataFrame

load_mlcw_data(station_name, layer_idx)
├── Opens HDF5 file using MLCW_H5_PATH
├── Extracts station-specific data
├── Processes temporal and value arrays
└── Returns formatted MLCW DataFrame
```

### Main Processing Class Workflow
```
GTWRProcessor.__init__()
├── Calls discover_file_pairs()
├── Initializes state variables
└── Displays file pair count

GTWRProcessor.load_pair(pair_idx)
├── Calls load_and_prepare_data()
├── Updates current_pair_idx state
├── Stores coeffs_df, gtwr_df, pointkeys
└── Displays loading confirmation

GTWRProcessor.process_single_point(pointkey_idx, include_mlcw)
├── Validates data loading state
├── Extracts pointkey and station information
├── Calls process_pointkey() for GTWR integration
├── Conditionally calls load_mlcw_data()
├── Applies relative displacement calculation
└── Returns (result_df, station_name)

GTWRProcessor.process_all_points(include_mlcw)
├── Iterates through all pointkeys
├── Calls process_single_point() for each
├── Handles exceptions gracefully
├── Concatenates results
└── Returns consolidated DataFrame
```

## Data Flow Architecture

### Input Sources
The system processes three primary data sources that flow through the processing pipeline:

**Coefficient Files** → **GTWR Model Files** → **MLCW HDF5 Data**

### Processing Pipeline
```
File Discovery → Data Loading → Point Processing → Result Consolidation
```

The coefficient predictions serve as the primary temporal framework, with GTWR model outputs providing observed and predicted values that are temporally aligned through index mapping. MLCW data integration occurs optionally as an enhancement layer, applying relative displacement calculations to normalize the time series data.

### State Management
The GTWRProcessor class maintains processing state through instance variables that track the current file pair index, loaded DataFrames, and available pointkeys. This design enables efficient batch processing while preserving the ability to process individual points interactively.

### Error Handling Strategy
The system implements defensive programming practices with validation checks for data loading states and exception handling during point processing iterations. Missing pointkeys or data inconsistencies are handled gracefully without terminating the entire processing workflow.

## Integration Points

The architecture supports both single-point processing for interactive analysis and batch processing for comprehensive dataset processing. The modular function design allows individual components to be tested and utilized independently, while the class-based wrapper provides a cohesive interface for complex workflows involving multiple data sources and processing steps.