In [1]:
from ema_workbench import ema_logging, save_results, load_results
import pandas as pd
import os



The use of a full factorial design for scenario generation, while comprehensive, inevitably results in a subset of parameter combinations that are implausible or infeasible under real-world conditions. To maintain the analytical integrity and interpretability of the results, these non-viable scenarios must be systematically identified and excluded from the dataset.

#### I Exclusion of Cases with High Nourishment Frequency and Volume but Low Dry Bulk Density

Sand nourishments are known to elevate the concentration of suspended mineral sediments in the water column, thereby promoting mineral sediment deposition. In scenarios characterized by frequent nourishment interventions and high volumes of sand input, it is implausible for the resulting sediment deposits to exhibit low dry bulk density values $\rho=400$. Such combinations likely reflect unrealistic or inconsistent model behavior. Therefore, cases exhibiting both high nourishment frequency and quantity $C_{\mathcal{N}}$, coupled with low dry bulk density, are excluded from the analysis to ensure the physical plausibility and integrity of the modeled outcomes.

The following combinations of parameters are excluded due to their physical implausibility, as described above:

| N. concentration           | N. frequency             | Dry bulk density          |
|:----------------------------:|:-----------------------------:|:-------------------------:|
| 0.3                         | 1                              | 400                       |
| 0.5                         | 1                              | 400                       |
| 0.5                         | 5                              | 400                       |

#### II Exclusion of Cases with Implausible Sediment Deposition Fractions and Vegetation States

The parameter $f_d$ represents the fraction of sediment retained and reflects both vegetation-mediated trapping and management interventions. On mudflats ($E_0=0.4$), high values of $f_d \in (0.6, 0.8)$ imply strong sediment trapping, which is unrealistic for unvegetated or sparsely vegetated areas, and are therefore excluded. In the pioneer zone ($E_0=0.7$), extreme values ($f_d=0.2$ and $f_d=0.8$) are removed, as they do not plausibly reflect vegetation structure under either conservation or restoration conditions.

For the low marsh ($E_0=1.2$) and high marsh ($E_0=1.7$), values of $f_d=0.4$, $f_d=0.4$, and $f_d=0.8$ are retained, corresponding to management strategies such as no restoration, conservation, and active restoration in the low marsh, and excessive grazing, grazing, and no grazing in the high marsh. In both zones, $f_d=0.2$ is excluded due to incompatibility with the expected sediment trapping under vegetated conditions. These constraints help ensure the physical plausibility of modeled scenarios across the elevation gradient.

The following combinations of parameters are excluded due to their physical implausibility, as described above:
| Elevation                    | Depositing fraction           |
|:----------------------------:|:-----------------------------:|
| 0.4                          | 0.6, 0.8                      | 
| 0.7                          | 0.2, 0.8                      | 
| 1.2                          | 0.2                           | 
| 1.7                          | 0.2                           | 

####  III Exclusion of Cases with Nourishment Frequency but No Input

Cases with a non-zero nourishment frequency but zero nourishment amount ($C_{\mathcal{N}} = 0$) are excluded, as they imply sediment input events without actual material added—an implausible scenario.

The following combinations of parameters are excluded due to their physical implausibility, as described above:
| N. concentration             | N. frequency          |
|:----------------------------:|:-----------------------------:|
| 0.0                          | 5                    | 
| 0.0                          |10                     | 


#### 1. Define filter code

In [2]:
def apply_filters(df):
    filter_I = [
        (0.3, 1, 400),
        (0.5, 1, 400),
        (0.5, 5, 400),
    ]
    mask_I = df[['c_flood_nourishment', 'nourishment_frequency', 'rho_deposit']].apply(tuple, axis=1).isin(filter_I)
    df_I = df[~mask_I]

    filter_II = [
        (0.4, 0.6),
        (0.4, 0.8),
        (0.7, 0.2),
        (0.7, 0.8),
        (1.2, 0.2),
        (1.7, 0.2),
    ]
    mask_II = df_I[['z_init', 'fd']].apply(tuple, axis=1).isin(filter_II)
    df_II = df_I[~mask_II]

    filter_III = [
        (0.0, 10),
        (0.0, 5),
    ]
    mask_III = df_II[['c_flood_nourishment', 'nourishment_frequency']].apply(tuple, axis=1).isin(filter_III)
    return df_II[~mask_III]

#### 2. Load datasets for East-Frisian focus area S15

In [3]:
# RCP 8.5 dataset
experiments_1585, outcomes_1585=load_results('model_output_S15_raw/S15_rcp85_full_factorial.tar.gz')
outcomes_1585 = pd.DataFrame(outcomes_1585)
out_S15_85_raw=pd.concat([experiments_1585, outcomes_1585], axis = 1)
# RCP 4.5 dataset
experiments_1545, outcomes_1545=load_results('model_output_S15_raw/S15_rcp45_full_factorial.tar.gz')
outcomes_1545 = pd.DataFrame(outcomes_1545)
out_S15_45_raw=pd.concat([experiments_1545, outcomes_1545], axis = 1)
# RCP 2.6 dataset
experiments_1526, outcomes_1526=load_results('model_output_S15_raw/S15_rcp26_full_factorial.tar.gz')
outcomes_1526 = pd.DataFrame(outcomes_1526)
out_S15_26_raw=pd.concat([experiments_1526, outcomes_1526], axis = 1)

#### 3. Apply filter, name and store filtered datasets for S15

In [4]:
input_datasets_15 = [out_S15_85_raw, out_S15_45_raw, out_S15_26_raw]
df_names_15 = ["out_S15_85", "out_S15_45", "out_S15_26"]
output_folder = "model_output_S15_clean"

# Apply filters and store results with names
filtered_datasets_15 = {
    name: apply_filters(df) for name, df in zip(df_names_15, input_datasets_15)
}

# Store in predefined output folder
for name, df in filtered_datasets_15.items():
    df.to_csv(os.path.join(output_folder, f"{name}.txt"), sep='\t', index=False)

#### 4. Load datasets for Weser-Elbe focus area S33

In [7]:
# RCP 8.5 dataset
experiments_3385, outcomes_3385=load_results('model_output_S33_raw/S33_rcp85_full_factorial.tar.gz')
outcomes_3385 = pd.DataFrame(outcomes_3385)
out_S33_85_raw=pd.concat([experiments_3385, outcomes_3385], axis = 1)
# RCP 4.5 dataset
experiments_3345, outcomes_3345=load_results('model_output_S33_raw/S33_rcp45_full_factorial.tar.gz')
outcomes_3345 = pd.DataFrame(outcomes_3345)
out_S33_45_raw=pd.concat([experiments_3345, outcomes_3345], axis = 1)
# RCP 2.6 dataset
experiments_3326, outcomes_3326=load_results('model_output_S33_raw/S33_rcp26_full_factorial.tar.gz')
outcomes_3326 = pd.DataFrame(outcomes_3326)
out_S33_26_raw=pd.concat([experiments_3326, outcomes_3326], axis = 1)

#### 3. Apply filter, name and store filtered datasets for S33

In [9]:
input_datasets_33 = [out_S33_85_raw, out_S33_45_raw, out_S33_26_raw]
df_names_33 = ["out_S33_85", "out_S33_45", "out_S33_26"]
output_folder = "model_output_S33_clean"

# Apply filters and store results with names
filtered_datasets_33 = {
    name: apply_filters(df) for name, df in zip(df_names_33, input_datasets_33)
}
# Store in predefined output folder
for name, df in filtered_datasets_33.items():
    df.to_csv(os.path.join(output_folder, f"{name}.txt"), sep='\t', index=False)