# Create abstraction level dataframe based on independent variable classes

In [1]:
import pandas as pd
from pathlib import Path

from sklearn import preprocessing

In [2]:
ROOT = Path('/gpfs/work1/0/wtrcycle/users/jaerts/model_refinement_pub/')
ATTRDIR = f'{ROOT}/catchment_attributes/'
AUXDIR = f'{ROOT}/aux_data/'
RESULTS = f'{ROOT}/results/'

In [3]:
df_abs = pd.DataFrame()
features = []
classes = []

### Meteorology

In [4]:
# Climate class
df = pd.read_csv(f"{ATTRDIR}/camels-gb_attributes/CAMELS_GB_climatic_attributes.csv", index_col='gauge_id')

features.append(df.columns.to_list())
classes.append(['meteorology']*len(df.columns))

### Land Surface

In [5]:
# Topo class
df = pd.read_csv(f"{ATTRDIR}/camels-gb_attributes/CAMELS_GB_topographic_attributes.csv", index_col='gauge_id')
df = df.drop(columns=['gauge_name', 'gauge_lat', 'gauge_lon', 'gauge_easting','gauge_northing', 'elev_min', 'elev_10', 'elev_50', 'elev_90', 'elev_max'])

features.append(df.columns.to_list())
classes.append(['land surface']*len(df.columns))

# Landcover class
df = pd.read_csv(f"{ATTRDIR}/camels-gb_attributes/CAMELS_GB_landcover_attributes.csv", index_col='gauge_id')

features.append(df.columns.to_list())
classes.append(['land surface']*len(df.columns))

### Subsurface

In [6]:
# Hydrogeology class
df = pd.read_csv(f"{ATTRDIR}/camels-gb_attributes/CAMELS_GB_hydrogeology_attributes.csv", index_col='gauge_id')
df = df.drop(columns=['low_nsig_perc','nsig_low_perc'])
features.append(df.columns.to_list())
classes.append(['subsurface']*len(df.columns))

# Soil class
df = pd.read_csv(f"{ATTRDIR}/camels-gb_attributes/CAMELS_GB_soil_attributes.csv", index_col='gauge_id')
df = df.drop(columns=['sand_perc_missing', 'silt_perc_missing','clay_perc_missing', 'organic_perc_missing','bulkdens_missing', 'tawc_missing',
                      'porosity_cosby_missing', 'conductivity_cosby_missing','conductivity_hypres_missing', 'root_depth_missing',
                      'soil_depth_pelletier_missing','porosity_hypres_missing','bulkdens_5','bulkdens_50','bulkdens_95', 'tawc_5', 'tawc_50','tawc_95',
                     'porosity_cosby_5','porosity_cosby_50','porosity_cosby_95','conductivity_cosby_5','conductivity_cosby_50','conductivity_cosby_95','porosity_hypres_5',
                      'porosity_hypres_50','porosity_hypres_95','conductivity_hypres_5','conductivity_hypres_50','conductivity_hypres_95','root_depth_5','root_depth_50',
                     'root_depth_95','soil_depth_pelletier_5','soil_depth_pelletier_50','soil_depth_pelletier_95'])


features.append(df.columns.to_list())
classes.append(['subsurface']*len(df.columns))

### Hydrology

In [7]:
# Hydrology class
df = pd.read_table(f"{ATTRDIR}/hydrological_signatures/CAMELS_GB_signatures.txt", sep=',')

df = df.drop(columns=['Q_mean_error_str', 'TotalRR_error_str','QP_elasticity_error_str', 'FDC_slope_error_str','BFI_error_str', 'HFD_mean_error_str',
                      'Q5_error_str', 'Q95_error_str','high_Q_freq_error_str', 'high_Q_dur_error_str','low_Q_freq_error_str','low_Q_dur_error_str',
                      'low_Q_dur_error_str','zero_Q_freq_error_str','zero_Q_freq','TotalRR'])


features.append(df.columns.to_list())
classes.append(['hydrology']*len(df.columns))

# Hydrological Signatures - surface water
df = pd.read_table(f"{ATTRDIR}/hydrological_signatures/McMillan_Groundwater_signatures.txt", sep=',',
                   usecols=['TotalRR','Recession_a_Seasonality','AverageStorage','BFI','BaseflowRecessionK'])

features.append(df.columns.to_list())
classes.append(['hydrology']*len(df.columns))

# Hydrological Signatures - groundwater
# Hydrological Signatures - surface water
df = pd.read_table(f"{ATTRDIR}/hydrological_signatures/McMillan_Overland_signatures.txt", sep=',',
                  usecols=['IE_thresh','SE_thresh','Storage_thresh','SE_slope'])

features.append(df.columns.to_list())
classes.append(['hydrology']*len(df.columns))

## Create dataframe

In [8]:
features = [item for sublist in features for item in sublist]
classes = [item for sublist in classes for item in sublist]

features.append('BFI.1')
classes.append('hydrology')

df_abs['Features'] = features
df_abs['Classes'] = classes

df_abs = df_abs.set_index('Features')
df_abs.to_csv(f'{AUXDIR}/rf_features/independent_variable_abstraction_level_new.csv')