| Component | Description | Shape | Type | Example | Notes |
|-----------|-------------|-------|------|---------|-------|
| `coords` | Spatial coordinates | n × 2 | numpy array or pandas DataFrame | `[[x₁,y₁], [x₂,y₂], ..., [xₙ,yₙ]]` | First column usually longitude, second latitude |
| `t` | Time coordinates | n × 1 | numpy array | `[[t₁], [t₂], ..., [tₙ]]` | Could be year, day, or any time unit |
| `X` | Independent variables | n × k | numpy array or pandas DataFrame | `[[x₁₁, x₁₂, ..., x₁ₖ], ..., [xₙ₁, xₙ₂, ..., xₙₖ]]` | Each column represents one variable |
| `y` | Dependent variable | n × 1 | numpy array, pandas DataFrame, or Series | `[[y₁], [y₂], ..., [yₙ]]` | Target variable being modeled |

In [2]:
from appgeopy import *
from my_packages import *

#### Extract predicted values from output shapefiles and save to pickle files

In [3]:
kriging_interp_fld = r"D:\1000_SCRIPTS\003_Project002\20250917_GTWR002\2_KrigingInterpolation\4_Interpolation"
target_flds = [f.path for f in os.scandir(kriging_interp_fld) if f.is_dir()]
subflds = [f.path for fld in target_flds for f in os.scandir(fld) if f.is_dir()]
subflds

['D:\\1000_SCRIPTS\\003_Project002\\20250917_GTWR002\\2_KrigingInterpolation\\4_Interpolation\\001\\NSCORE_CORRECTED_Monthly_DISPLACEMENT_CRFP_saveqgis_Oct2025',
 'D:\\1000_SCRIPTS\\003_Project002\\20250917_GTWR002\\2_KrigingInterpolation\\4_Interpolation\\002\\NSCORE_Monthly_DISPLACEMENT_dU_CRFP_2025_Full']

In [4]:
def get_shapefiles(select_fld, fld_type="Validation", point_type="grid"):
    points_fld = [
        f
        for f in glob(os.path.join(select_fld, f"*{fld_type}*{point_type}*"))
        if os.path.isdir(f)
    ][0]
    points_shp = glob(os.path.join(points_fld, "*.shp"))
    return points_shp

**2025/4/21**

This code is used to extract the interpolated transformed values at grid points

**Single-run Process**

Run this one because I have only one folder to process

In [22]:
# 2025/09/20
# Run this one because I have only one folder to process

# select_fld = r"D:\1000_SCRIPTS\003_Project002\20250222_GTWR001\2_KrigingInterpolation\4_Interpolation\003\NSCORE_Monthly_DISPLACEMENT_dU_CRFP_Full"

select_fld = r"D:\1000_SCRIPTS\003_Project002\20250917_GTWR002\2_KrigingInterpolation\4_Interpolation\001\NSCORE_CORRECTED_Monthly_DISPLACEMENT_CRFP_saveqgis_Oct2025"

point_type = "mlcw"  # point_type="grid" or "mlcw"
fld_basename = os.path.basename(select_fld)
points_shp = get_shapefiles(
    select_fld, fld_type="Points", point_type=point_type
)
# combined_df = pd.DataFrame(data=None)

for i in trange(len(points_shp)):
    try:
        # for i in range(5):
        select_shp = points_shp[i]

        shp_basename = os.path.basename(select_shp).split(".")[0]
        extract_datetime = shp_basename.split("_")[-1]

        select_shp_gdf = gpd.read_file(select_shp, read_geometry=True)
        output_shp_gdf = select_shp_gdf.loc[
            :,
            [  # "STATION",
                # "LandSubsid",
                "POINT_X",
                "POINT_Y",
                "Predicted",
            ],
        ].copy()

        output_shp_gdf = output_shp_gdf.rename(
            {
                # "LandSubsid": "WellCode",
                "POINT_X": "X_TWD97",
                "POINT_Y": "Y_TWD97",
                "Predicted": extract_datetime,
            },
            axis=1,
        )
        # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
        # 2025/4/21: add PointKey column, converted from meters to milimeters
        # to make sure the PointKey is unique
        # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
        pointkey = [
            f"X{int(x_twd97*1000)}Y{int(y_twd97*1000)}"
            for x_twd97, y_twd97 in zip(
                output_shp_gdf["X_TWD97"], output_shp_gdf["Y_TWD97"]
            )
        ]
        output_shp_gdf.insert(loc=0, column="PointKey", value=pointkey)

        if i == 0:
            combined_df = output_shp_gdf.copy()
            combined_df = combined_df.set_index("PointKey")
        else:
            output_shp_gdf = output_shp_gdf.set_index("PointKey")
            combined_df[extract_datetime] = combined_df.index.map(
                output_shp_gdf[extract_datetime]
            )
    except Exception as e:
        print(os.path.basename(select_shp))
        pass

# combined_df.to_pickle(f"{fld_basename}_{point_type}.xz")

  0%|          | 0/112 [00:00<?, ?it/s]

In [23]:
for col in combined_df.columns:
    temp = combined_df[col].isnull().unique()
    temp2 = (combined_df[col] == 0).unique()
    if len(temp) > 1 or len(temp2) > 1:
        print(col)