### Filter cell entry DMS data
Read in data and filter based on parameters defined in the config file and save to csv files.

In [None]:
import pandas as pd

In [None]:
# Input data
entry_df_path = snakemake.input.entry_df

# Output data
entry_effects_filtered_path = snakemake.output.entry_effects_filtered
entry_effects_filtered_mean_path = snakemake.output.entry_effects_filtered_mean

# Parameters
TIMES_SEEN_ENTRY = snakemake.params.times_seen_entry
ENTRY_STD_DEV = snakemake.params.entry_std_dev

In [None]:
# read in data
func_data = pd.read_csv(entry_df_path)

# Filter data
# 1. seen at least TIMES_SEEN_ENTRY times
# 2. standard deviation of effect less than ENTRY_STD_DEV
# 3. mutant is not "*" or "-"
func_data_filtered = func_data[
    (func_data["times_seen"] >= TIMES_SEEN_ENTRY)
    & (func_data["effect_std"] <= ENTRY_STD_DEV)
    & (func_data["mutant"] != "*")
    & (func_data["mutant"] != "-")
]

# Calculate mean effect and stddev per site
func_data_filtered_mean = func_data_filtered.groupby(["site"]).agg(
    effect=("effect", "mean"),
    wildtype=("wildtype", "first"),
    effect_std=("effect_std", "mean"),
    site_mut_std=('effect', 'std')
).round(2).reset_index()

# Save filtered data
func_data_filtered.to_csv(entry_effects_filtered_path, index=False)
func_data_filtered_mean.to_csv(entry_effects_filtered_mean_path, index=False)