In [1]:
from appgeopy import *
from my_packages import *

# PART 1: CONVERT ORIGINAL DATA TO MONTHLY DATA

# PART 2: EXTRACT DATA FROM 2014 TO 2021 FOR FURTHER PROCESSING

In [2]:
h5_fpath = "20250314_MLCW_CRFP_v12.h5"
# initiate MLCW class object
mlcw_obj = MLCW(h5_fpath=h5_fpath)
mlcw_data, mlcw_metadata = mlcw_obj.get_data()
available_stations = mlcw_obj.list_stations()
available_stations[:5]

['ANHE', 'ANNAN', 'BEICHEN', 'CANLIN', 'DONGGUANG']

In [3]:
station2process = []

# select_station = available_stations[0]
for select_station in tqdm(available_stations):

    df = mlcw_obj.build_dataframe(station=select_station, value_type="original")

    df_year_list = df.columns.strftime("%Y").unique().tolist()

    expected_years = np.arange(2014, 2022).astype(str).tolist()

    check_flag = set(expected_years).issubset(df_year_list)

    if check_flag:
        station2process.append(select_station)

  0%|          | 0/38 [00:00<?, ?it/s]

In [4]:
mlcw_obj.list_structure(mlcw_data)

├── ANHE
│   ├── classify
│   │   ├── depth
│   │   └── layer
│   ├── date
│   ├── depth
│   ├── monthly_date
│   ├── monthly_values
│   │   ├── compactbylayer
│   │   ├── ref2base
│   │   └── ringbyring
│   └── values
│       ├── compactbylayer
│       ├── original
│       ├── ref2base
│       └── ringbyring
├── ANNAN
│   ├── classify
│   │   ├── depth
│   │   └── layer
│   ├── date
│   ├── depth
│   ├── monthly_date
│   ├── monthly_values
│   │   ├── compactbylayer
│   │   ├── ref2base
│   │   └── ringbyring
│   └── values
│       ├── compactbylayer
│       ├── original
│       ├── ref2base
│       └── ringbyring
├── BEICHEN
│   ├── classify
│   │   ├── depth
│   │   └── layer
│   ├── date
│   ├── depth
│   ├── monthly_date
│   ├── monthly_values
│   │   ├── compactbylayer
│   │   ├── ref2base
│   │   └── ringbyring
│   └── values
│       ├── compactbylayer
│       ├── original
│       ├── ref2base
│       └── ringbyring
├── CANLIN
│   ├── classify
│   │   ├── depth
│   │   └── layer

In [8]:
cache = []

# select_station = station2process[0]
for select_station in tqdm(station2process):

    temp = [
        {select_station: {segment: mlcw_data[select_station][segment]}}
        for segment in ["depth", "classify", "monthly_date", "monthly_values"]
    ]
    cache.extend(temp)

new_mlcw_data = merge_dicts(*cache)

  0%|          | 0/32 [00:00<?, ?it/s]

In [9]:
cache = []

exclude_station = [stat for stat in available_stations if stat not in station2process]

for key in mlcw_metadata.keys():
    if key not in exclude_station:
        cache.append({key:mlcw_metadata[key]})

new_mlcw_metadata = merge_dicts(*cache)

In [10]:
# - - - - - - - - - - - - - - - - -
# Add new description and metadata
# - - - - - - - - - - - - - - - - -
today_string = datetime.now().strftime("%Y%m%d")

# Write updated data and metadata back to the HDF5 file
with h5py.File(f"{today_string}_MLCW_CRFP_monthly_v1.h5", "w") as hdf5_file:
    gwatertools.h5pytools.data_to_hdf5(hdf5_file, new_mlcw_data)
    gwatertools.h5pytools.metadata_to_hdf5(hdf5_file, new_mlcw_metadata)