| Component | Description | Shape | Type | Example | Notes |
|-----------|-------------|-------|------|---------|-------|
| `coords` | Spatial coordinates | n × 2 | numpy array or pandas DataFrame | `[[x₁,y₁], [x₂,y₂], ..., [xₙ,yₙ]]` | First column usually longitude, second latitude |
| `t` | Time coordinates | n × 1 | numpy array | `[[t₁], [t₂], ..., [tₙ]]` | Could be year, day, or any time unit |
| `X` | Independent variables | n × k | numpy array or pandas DataFrame | `[[x₁₁, x₁₂, ..., x₁ₖ], ..., [xₙ₁, xₙ₂, ..., xₙₖ]]` | Each column represents one variable |
| `y` | Dependent variable | n × 1 | numpy array, pandas DataFrame, or Series | `[[y₁], [y₂], ..., [yₙ]]` | Target variable being modeled |

In [1]:
from appgeopy import *
from my_packages import *

#### Extract predicted values from output shapefiles and save to pickle files

In [2]:
kriging_interp_fld = (
    r"D:\1000_SCRIPTS\003_Project002\20250222_GTWR001\2_KrigingInterpolation\4_Interpolation"
)
target_flds = [f.path for f in os.scandir(kriging_interp_fld) if f.is_dir()]
subflds = [f.path for fld in target_flds for f in os.scandir(fld) if f.is_dir()]

In [3]:
def get_shapefiles(select_fld, fld_type="Validation", point_type="grid"):
    points_fld = [
        f for f in glob(os.path.join(select_fld, f"*{fld_type}*{point_type}*")) if os.path.isdir(f)
    ][0]
    points_shp = glob(os.path.join(points_fld, "*.shp"))
    return points_shp

In [15]:
# select_fld = subflds[0]

for select_fld in tqdm(subflds):
    try:
        fld_basename = os.path.basename(select_fld)
        points_shp = get_shapefiles(select_fld, fld_type="Points", point_type="mlcw")
        # combined_df = pd.DataFrame(data=None)
        print(fld_basename)
        
        for i in range(len(points_shp)):
        # for i in range(5):
            select_shp = points_shp[i]
        
            shp_basename = os.path.basename(select_shp).split(".")[0]
            extract_datetime = shp_basename.split("_")[-1]
            
            select_shp_gdf = gpd.read_file(select_shp, read_geometry=True)
            output_shp_gdf = select_shp_gdf.loc[
                :, ["STATION", "LandSubsid", "POINT_X", "POINT_Y", "Predicted"]
            ].copy()
            output_shp_gdf = output_shp_gdf.rename(
                {
                    "LandSubsid": "WellCode",
                    "POINT_X": "X_TWD97",
                    "POINT_Y": "Y_TWD97",
                    "Predicted": extract_datetime,
                },
                axis=1,
            )
        
            if i == 0:
                combined_df = output_shp_gdf.copy()
                combined_df = combined_df.set_index("STATION")
            else:
                output_shp_gdf = output_shp_gdf.set_index("STATION")
                combined_df[extract_datetime] = combined_df.index.map(output_shp_gdf[extract_datetime])
        
        combined_df.to_pickle(f"{fld_basename}.xz")
    except Exception as e:
        print(extract_datetime)
        pass

  0%|          | 0/8 [00:00<?, ?it/s]

NSCORE_Monthly_GWL_CRFP_L1
NSCORE_Monthly_GWL_CRFP_L2D
NSCORE_Monthly_GWL_CRFP_L2S
NSCORE_Monthly_GWL_CRFP_L3
NSCORE_Monthly_GWL_CRFP_L4
QuantileNormTrans_Monthly_Electricity_CRFP_Full
QuantileNormTrans_Monthly_Rainfall_CRFP
NSCORE_Monthly_DISPLACEMENT_dU_CRFP_Full
