In [2]:
import pandas as pd
from pathlib import Path
from collections import defaultdict
import re

from utils.prepare_data import save_data, read_data

In [3]:
results = [Path(p) for p in snakemake.input]
result_dfs = []

for res in results:
    this_df = pd.read_table(res)
    target, training, _, rep_str = res.stem.split('_')
    replicate = int(rep_str.split('-')[-1])
    this_df = this_df.assign(replicate=replicate)
    result_dfs.append(this_df)

In [4]:
def get_locus(s):
    match = re.search(r"sweep-(.+?)_", s)
    if match:
        return match.groups()[0]
    else:
        return None

In [5]:
df = pd.concat(result_dfs)
df['locus'] = [get_locus(i) for i in df.uuid]
df = df.dropna(axis='rows', how='any', subset=['locus'])
df = df.sort_values(by='locus').drop('uuid', axis='columns').dropna(axis='columns', how='all')
if 'predicted_log_selection_coefficient' in df.columns:
    df = (
        df
        .assign(predicted_selection_coefficient=10**df.predicted_log_selection_coefficient)
        .drop('predicted_log_selection_coefficient', axis='columns')
    )
df = df.pivot(index='replicate', columns='locus').reset_index()

In [20]:
summary = df.describe().drop('replicate', axis='columns', level=0)
undesired_cols = ['replicate', 'predicted_ix']
for col in undesired_cols:
    if col in summary.columns.levels[0]:
        summary = summary.drop(col, axis='columns', level=0)

In [24]:
summary.loc['IQR'] = summary.loc['75%'] - summary.loc['25%']

In [26]:
save_data(df, snakemake.output["replicates_results"])
summary.to_csv(snakemake.output["replicates_statistics"], sep='\t', index=True)