In [2]:
from pathlib import Path

import numpy as np
import pandas as pd

In [5]:
logs = [Path(filename) for filename in snakemake.input if filename.endswith('.log')]
results = []

for logfile in logs:
    freqs = np.load(logfile.parent/(logfile.stem + '.freqs.npy'))
    epochs = np.load(logfile.parent/(logfile.stem + '.epochs.npy'))
    posterior = np.load(logfile.parent/(logfile.stem + '.post.npy'))
    
    with open(logfile) as f:
        line = f.readline()
        while not line.startswith('epoch'):
            line = f.readline()
        s_line = f.readline()
        s_estimate = float(s_line.split()[1])
        
    data = (
        pd.DataFrame(posterior)
        .stack()
        .reset_index()
        .rename({'level_0': 'freq', 'level_1': 'gen', 0: 'log_prob'}, axis='columns')
    )
    gens_of_interest = sorted(data.gen.unique())[::20]
    data = data.loc[np.isin(data.gen, gens_of_interest)]
    data = data.assign(
        freq = [freqs[i] for i in data.freq],
        gen = [epochs[i] for i in data.gen],
        s_estimate = s_estimate,
        locus = logfile.stem.split('_')[1]
    )
    results.append(data)
    
data = pd.concat(results)

In [7]:
data.to_csv(snakemake.output[0], sep='\t', index=False)