In [1]:
from appgeopy import *
from my_packages import *

In [2]:
mlcw_h5_fpath = "20241105_MLCW_CRFP_v6.h5"
mlcw_data, mlcw_metadata = open_HDF5(mlcw_h5_fpath)

In [3]:
# initiate MLCW class object
mlcw = MLCW(h5_fpath=mlcw_h5_fpath)

mlcw_stations = mlcw.list_stations()
mlcw_stations[:5]

['ANHE', 'ANNAN', 'BEICHEN', 'BUDAI_XIN', 'CANLIN']

In [4]:
wellcode_wellname_pair = {}

for key in mlcw_metadata.keys():
    try:
        well_code = mlcw_metadata[key]["LandSubsidenceMonitoringWellIdentifier"].split("_")[-1]
        wellcode_wellname_pair[well_code] = key
    except Exception as e:
        print(key, e)
        pass

DAZHUANG 'LandSubsidenceMonitoringWellIdentifier'
Description string indices must be integers, not 'str'
Update001 string indices must be integers, not 'str'
col_diff string indices must be integers, not 'str'
original string indices must be integers, not 'str'
ref2base string indices must be integers, not 'str'


In [8]:
all_measurement_data = []
all_measurement_metadata = []


# cleaned_mlcw_fpath = "pca_result_n/AHES_pca_data.xlsx"

all_mlcw_excel_files = glob(os.path.join("pca_result_n/", "*.xlsx"))

# excel file of cleaned MLCW data
for cleaned_mlcw_fpath in all_mlcw_excel_files:

    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    # get the well code from name
    wellcode_from_fpath = os.path.basename(cleaned_mlcw_fpath).split("_")[0]

    # get the well name (station name) from the well code (from the base name)
    wellname_from_wellcode = wellcode_wellname_pair[wellcode_from_fpath]
    
    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    # read the cleaned MLCW data from excel file
    cleaned_mlcw_df = pd.read_excel(cleaned_mlcw_fpath, index_col=[0])
    
    # change the name of index (as datetime strings)
    # the columns of this dataframe is depths
    cleaned_mlcw_df.index.name = "time"
    
    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    # transpose, so now the indexes are depths, and the columns are datetime strings
    cleaned_mlcw_df = cleaned_mlcw_df.transpose()
    # change the name of indexes
    cleaned_mlcw_df.index.name = "depth"
    # turn the datetime strings to timestamps for plotting
    cleaned_mlcw_df.columns = pd.to_datetime(cleaned_mlcw_df.columns)
    
    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    # to get the cumulative compaction at each ring
    # perform the row-to-row differencing in reverse
    reverse_diffencing_mlcw_df = cleaned_mlcw_df.diff(periods=-1, axis=0)
    reverse_diffencing_mlcw_df.iloc[-1, :] = cleaned_mlcw_df.iloc[-1, :]
    
    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    # extract depth array from the original data
    cleaned_mlcw_depth_arr = mlcw_data[wellname_from_wellcode]["depth"]
    cleaned_mlcw_date_arr = [ele.strftime("%Y%m%d") for ele in cleaned_mlcw_df.columns]
    
    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    # turn the dataframe of cumulative compaction reference to the base foundation into numpy for saving
    cleaned_ref2base = cleaned_mlcw_df.to_numpy()
    # similarly, ring-by-ring cumulative compaction converted to numpy for saving
    cleaned_ref2firstday = reverse_diffencing_mlcw_df.to_numpy()

    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    # generate a dictionary of data
    cleaned_mlcw_data_dict = {
        wellname_from_wellcode: {
            "date": cleaned_mlcw_date_arr,
            "depth": cleaned_mlcw_depth_arr,
            "values": {"cleaned_ref2base": cleaned_ref2base, "cleaned_ringbyring": cleaned_ref2firstday},
        }
    }
    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    all_measurement_data.append(cleaned_mlcw_data_dict)
    all_measurement_metadata.append({wellname_from_wellcode:mlcw_metadata[wellname_from_wellcode]})

In [9]:
# -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --
all_measurement_data = gwatertools.merge_dicts(*all_measurement_data)
all_measurement_metadata = gwatertools.merge_dicts(*all_measurement_metadata)
# -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --
all_measurement_metadata["Description"] = "2025/2/28: Convert cleaned MLCW data, monthly sampling, into HDF5 format for further analysis. Data processed and cleaned by Cheng Hsin (FAFALAB)"
all_measurement_metadata["cleaned_ref2base"] = "cumulative compaction reference to the base (bottommost) of the well"
all_measurement_metadata["cleaned_ringbyring"] = "cumulative compaction time series at each ring"
# -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --
today_string = datetime.now().strftime("%Y%m%d")

# Write updated data and metadata back to the HDF5 file
with h5py.File(f"{today_string}_CLEANED_MLCW_monthly_v1.h5", "w") as hdf5_file:
    gwatertools.h5pytools.metadata_to_hdf5(hdf5_file, all_measurement_metadata)
    gwatertools.h5pytools.data_to_hdf5(hdf5_file, all_measurement_data)