In [1]:
from appgeopy import *
from my_packages import *

#### nscore-transformed displacement

In [2]:
def backtransform_func(
    original_df, transformed_df, original_df_indexcol, cols2transform
):
    import geostatspy.geostats as geostats
    import geostatspy.GSLIB as GSLIB
    from tqdm import tqdm

    output_df = transformed_df.loc[
        :, ~transformed_df.columns.isin(cols2transform)
    ].copy()

    for col in tqdm(cols2transform):
        temp = (
            original_df[[original_df_indexcol, col]].dropna(subset=[col]).copy()
        )
        init_transformed, sorted_init, sorted_weighting = geostats.nscore(
            temp, col
        )
        # Performing back-transformation using geostatspy
        output_df[col] = geostats.backtr(
            df=transformed_df,
            vcol=col,
            vr=sorted_init,
            vrg=sorted_weighting,
            zmin=-1e21,
            zmax=1e21,
            ltail=0,
            ltpar=0,
            utail=60,
            utpar=60,
        )

    return output_df

In [3]:
# transformed data obtained from Kriging interpolation
transformed_data_fpath = (
    r"NSCORE_CORRECTED_Monthly_DISPLACEMENT_updateNov_Full_mlcw.xz"
)
transformed_df = pd.read_pickle(transformed_data_fpath)
transformed_df.shape

(39, 114)

In [4]:
# read the original data, from which the transformed data was achieved
# original_data_fpath = r"D:\1000_SCRIPTS\003_Project002\20250917_GTWR002\2_KrigingInterpolation\Monthly_DISPLACEMENT_dU_CRFP_2025.xz"
original_data_fpath = r"D:\1000_SCRIPTS\003_Project002\20251111_GTWR003\1_PrepareDatasets\Leveling\CORRECTED_Monthly_CUMDISP_saveqgis_Oct2025_updateNov.xz"
original_df = pd.read_pickle(original_data_fpath)
original_df = original_df.reset_index(drop=False)
trans_cols = [col for col in original_df.columns if col.startswith("N")]

# perform backtransform
backtransformed_df = backtransform_func(
    original_df,
    transformed_df,
    original_df_indexcol="PointKey",
    cols2transform=trans_cols,
)

0it [00:00, ?it/s]


In [7]:
# save output to pickle
transformed_data_basename = os.path.basename(transformed_data_fpath)

backtransformed_savename = transformed_data_basename.replace(
    "NSCORE", "BACKTRANSFORMED"
)

# grid or mlcw???
out_fld = "mlcw"

backtransformed_savepath = os.path.join(
    # "PostKriging_Backtransform_Data",
    os.getcwd(),
    backtransformed_savename.replace(".xz", f"_{out_fld}.xz"),
)

if out_fld == "mlcw":
    _, mlcw_metadata = h5pytools.open_HDF5(
        r"D:\1000_SCRIPTS\003_Project002\20251111_GTWR003\1_PrepareDatasets\MLCW\20251114_MLCW_CRFP_monthly_v4.h5"
    )

    available_stations = [
        key
        for key in mlcw_metadata.keys()
        if isinstance(mlcw_metadata[key], dict)
    ]
    metadata_df = (
        pd.DataFrame(mlcw_metadata)
        .loc[["X_TWD97", "Y_TWD97", "Code"], available_stations]
        .T
    )
    metadata_df = metadata_df.reset_index()
    metadata_df = metadata_df.rename({"index": "STATION"}, axis=1)
    metadata_df["PointKey"] = [
        f"X{int(x*1000)}Y{int(y*1000)}"
        for x, y in zip(metadata_df["X_TWD97"], metadata_df["Y_TWD97"])
    ]
    metadata_df = metadata_df.set_index("PointKey")

    # add the `STATION` and `WellCode` column into the `backtransformed_df`,
    # then rearrange the columns
    for col in ["STATION", "Code"]:
        backtransformed_df[col] = metadata_df.index.map(metadata_df[col])

    backtransformed_df = backtransformed_df.reset_index(drop=False)

    new_cols = ["STATION", "Code", "X_TWD97", "Y_TWD97"] + [
        col for col in backtransformed_df.columns if col.startswith("N")
    ]
    backtransformed_df = backtransformed_df[new_cols]
    backtransformed_df = backtransformed_df.set_index("STATION")

backtransformed_df.to_pickle(backtransformed_savepath)

In [8]:
show(backtransformed_df)

0
Loading ITables v2.4.5 from the internet...  (need help?)
