# Hydrogeology attributes

Notebook to create the file `CAMELS_DE_hydrogeology_attributes.csv`.  

columns:
- gauge_id [%]
- aquitard_perc [%]
- aquifer_perc [%]
- aquifer_aquitard_mixed_perc [%]
- kf_very_high_perc [%]
- kf_high_perc [%]
- kf_medium_perc [%]
- kf_moderate_perc [%]
- kf_low_perc [%]
- kf_very_low_perc [%]
- kf_extremely_low_perc [%]
- kf_very_high_to_high_perc [%]
- kf_medium_to_moderate_perc [%]
- kf_low_to_extremely_low_perc [%]
- kf_highly_variable_perc [%]
- kf_moderate_to_low_perc [%]
- cavity_fissure_perc [%]
- cavity_pores_perc [%]
- cavity_fissure_karst_perc [%]
- cavity_fissure_pores_perc [%]
- rocktype_sediment_perc [%]
- rocktype_metamorphite_perc [%]
- rocktype_magmatite_perc [%]
- consolidation_solid_rock_perc [%]
- consolidation_unconsolidated_rock_perc [%]
- geochemical_rocktype_silicate_perc [%]
- geochemical_rocktype_silicate_carbonatic_perc [%]
- geochemical_rocktype_carbonatic_perc [%]
- geochemical_rocktype_sulfatic_perc [%]
- geochemical_rocktype_silicate_organic_components_perc [%]
- geochemical_rocktype_anthropogenically_modified_through_filling_perc [%]
- geochemical_rocktype_sulfatic_halitic_perc [%]
- geochemical_rocktype_halitic_perc [%]
- waterbody_perc [%]
- no_data_perc [%]

In [1]:
import os
from glob import glob
import pandas as pd

In [2]:
# get camels_ids from hydromet timeseries
camels_ids = [camels_id.split("_")[-1].split(".csv")[0] for camels_id in glob("../output_data/camels_de/timeseries/*.csv")]

# sort camels_ids
camels_ids = sorted(camels_ids)

print(f"Total number of stations in CAMELS-DE v1: {len(camels_ids)}")

Total number of stations in CAMELS-DE v1: 5


## Read, process and save hydrogeology data

We extracted the hydrogeology attributes from the HUEK250 dataset, read and process here.

In [1]:
# dataframe to store all hydrogeo data for all camels_ids
df_all = pd.DataFrame()

# huek250 variables
variables = ["ch", "kf", "ha", "ga", "vf", "gc"]

for camels_id in camels_ids:
    # dataframe to store all hydrogeo data for a single camels_id
    df = pd.DataFrame()
    df["gauge_id"] = [camels_id]

    # read all variables
    for variable in variables:
        path = os.path.join('../../../camels_hydrogeo/huek250/output_data', f'{camels_id}/data/huek250_{variable}.csv')
        df_variable = pd.read_csv(path)

        # drop column camels_id
        df_variable.drop("camels_id", axis=1, inplace=True)

        # concatenate dataframes
        df = pd.concat([df, df_variable], axis=1)

    # check that all waterbody columns have the same value
    waterbody_cols = [col for col in df.columns if "waterbody" in col]
    assert len(set(df[waterbody_cols].values.flatten().tolist())) == 1

    # make one waterbody column
    df["waterbody_perc"] = df[waterbody_cols[0]]
    df.drop(waterbody_cols, axis=1, inplace=True)

    # check that all no_data columns have the same value
    no_data_cols = [col for col in df.columns if "no_data" in col]
    assert len(set(df[no_data_cols].values.flatten().tolist())) == 1

    # make one no_data column
    df["no_data_perc"] = df[no_data_cols[0]]
    df.drop(no_data_cols, axis=1, inplace=True)
    
    # add to df_all
    df_all = pd.concat([df_all, df], axis=0)

# round to 2 decimal places
df_all = df_all.round(2)

# save results
df_all.to_csv("../output_data/camels_de/CAMELS_DE_hydrogeology_attributes.csv", index=False) 

NameError: name 'pd' is not defined