In [1]:
from appgeopy import *
from my_packages import *

In [2]:
# Set the working directory to the script folder
script_folder = os.getcwd()

In [3]:
# Change the current working directory to the main folder containing groundwater level data
mainfolder = r"D:\VINHTRUONG\004_MODELING\001_STUDY_AREA\GroundwaterObservation\@DOWNLOAD_WRA_GWOB_YEARBOOK_PROJECT\@groundwater_level_PDF\GWL_DATA_NEW_Nov2023"
os.chdir(mainfolder)
os.getcwd()  # Verify the current working directory

'D:\\VINHTRUONG\\004_MODELING\\001_STUDY_AREA\\GroundwaterObservation\\@DOWNLOAD_WRA_GWOB_YEARBOOK_PROJECT\\@groundwater_level_PDF\\GWL_DATA_NEW_Nov2023'

In [4]:
# Load station information from an Excel file
station_info_excel = pd.read_excel(
    r"D:\VINHTRUONG\004_MODELING\001_STUDY_AREA\GroundwaterObservation\@DOWNLOAD_WRA_GWOB_YEARBOOK_PROJECT\Well_Information_CRAF_Active_Inactive_OneSheetOnly.xlsx"
)
station_info_excel.head(
    5
)  # Display the first 5 rows of station information for verification

Unnamed: 0,AREA_CODE,AREA_NAME,WELL_CODE,DISTRICT_IDENTIFIER,LAYER_IDENTIFIER,OLDNEW_IDENTIFIER,CNAME,ENAME,ECODE,WELL_NAME,X_TWD97,Y_TWD97,ADDRESS,WELL_ELEV(m),WELL_DEPTH(m),WELL_SCREEN(m),NOTE,ACTIVE
0,50,濁水溪沖積扇,7010111,70101,1,1,國聖,guosheng,GSG,國聖(1),206194.06,2665352.132,彰化縣彰化市國聖里中山路三段608號(國聖國小),21.053,24.0,8.00~14.00 24.00~30.00,1995/01~,1
1,50,濁水溪沖積扇,7010121,70101,2,1,國聖,guosheng,GSG,國聖(2),206194.06,2665352.132,彰化縣彰化市國聖里中山路三段608號(國聖國小),21.176,131.21,120.00~126.00,1994/11~,1
2,50,濁水溪沖積扇,7010131,70101,3,1,國聖,guosheng,GSG,國聖(3),206194.06,2665352.132,彰化縣彰化市國聖里中山路三段608號(國聖國小),21.528,200.0,185.00~197.00,1997/01~,1
3,50,濁水溪沖積扇,7010211,70102,1,1,東芳,dongfang,DFG,東芳(1),200779.08,2662059.143,彰化縣彰化市東芳里彰馬路45號(東芳國小),10.866,132.0,101.00~125.00,1997/07~,1
4,50,濁水溪沖積扇,7010221,70102,2,1,東芳,dongfang,DFG,東芳(2),200779.08,2662059.143,彰化縣彰化市東芳里彰馬路45號(東芳國小),10.86,181.0,162.00~174.00,1997/06~,1


In [5]:
# Initialize dictionaries for metadata and monitoring data
all_stations_metadata = {}
all_stations_monitoring_data = {}
all_wells_metadata = {}

# Get today's date for metadata
formatted_date = datetime.now().strftime("%Y/%m/%d")

# Process each station folder
error_names = []
gwl_folders = [f for f in os.listdir(mainfolder) if os.path.isdir(f)]

for select_folder in tqdm(gwl_folders):
    try:
        ename, cname, abbrev = select_folder.upper().split("_")
    except ValueError:
        error_names.append(select_folder)
        continue  # Skip if folder name format is incorrect

    # Filter station information by English name
    station_info = station_info_excel.query("ENAME == @ename.lower()")

    # Initialize station metadata
    if station_info.empty:
        # If no station information is available, set metadata to null
        station_metadata = {"information": "null"}
    else:
        x_twd97, y_twd97 = station_info.iloc[0][["X_TWD97", "Y_TWD97"]]
        address = station_info["ADDRESS"].iloc[0]

        station_metadata = {
            "Chinese": cname,
            "Abbreviation": abbrev,
            "EPSG": 3826,
            "X": x_twd97,
            "Y": y_twd97,
            "BasinENG": "Choshuichi Fan",
            "BasinCHN": "濁水溪沖積扇",
            "Num_of_Wells": len(glob(select_folder + "\\*.xlsx")),
            "Address": address,
            "CreatedDate": formatted_date,
        }
    
    all_stations_metadata[ename] = station_metadata

    # Initialize well data for the station
    station_monitoring_data = {}
    well_metadata = {}

    # Process each well file in the station folder
    for select_file in glob(f"{select_folder}\\*.xlsx"):
        wellcode = os.path.basename(select_file).split(".")[0]

        well_info = station_info.query("WELL_CODE == @wellcode")
        if well_info.empty:
            well_metadata[wellcode] = {"information": "null"}
        else:
            wellname, well_elev, well_depth, well_screen, active = well_info.iloc[0][
                ["WELL_NAME", "WELL_ELEV(m)", "WELL_DEPTH(m)", "WELL_SCREEN(m)", "ACTIVE"]
            ]
            well_status = "Active" if active == 1 else "Inactive"
    
            # Update well metadata
            well_metadata[wellcode] = {
                "WellName": wellname,
                "Well_Elev(m)": well_elev,
                "Well_Depth(m)": well_depth,
                "Well_Screen(m)": well_screen,
                "Status": well_status,
            }

        # Load and align groundwater level data
        df = pd.read_excel(select_file, parse_dates=[0], index_col=[0])
        temp = pd.DataFrame(index=pd.date_range(start="2001-01-01", end="2024-12-31"))
        temp["daily_value"] = temp.index.map(df.iloc[:, 0])

        well_metadata[wellcode]["FIRST_OBS"] = temp.first_valid_index().strftime("%Y%m%d")
        well_metadata[wellcode]["LAST_OBS"] = temp.last_valid_index().strftime("%Y%m%d")

        # Store aligned time series data
        station_monitoring_data[wellcode] = temp["daily_value"]

    # Update global dictionaries
    all_stations_monitoring_data[ename] = station_monitoring_data
    all_wells_metadata[ename] = well_metadata

100%|████████████████████████████████████████████████████████████████████████████████| 103/103 [01:29<00:00,  1.15it/s]


In [6]:
# Transform data for HDF5 storage
transformed_data = gwatertools.h5pytools.transform_data_for_hdf5(
    all_stations_monitoring_data
)

# Initialize the HDF5 file and write data
file_name = os.path.join(script_folder, "20240826_GWL_MonitoringData_Choushuixi.h5")
hdf5_file = gwatertools.h5pytools.initialize_hdf5_file(
    file_name, transformed_data, metadata=all_stations_metadata, sensor_metadata=all_wells_metadata
)
hdf5_file.close()