In [4]:
import numpy as np
import pandas as pd

import os, glob

In [2]:
def create_sims(n, file):
    """
    Parameters:
        n (int): Number of SLiM simulations to create.
        file (str): SLiM file location.

    Returns:
        arr (np.array): Array with the SLiM simulations frequency.
        scs (np.array): Array with the SLiM simulations selection coeficients.
    """

    arr = np.zeros((n,8,3))
    scs = np.random.uniform(-0.1, 0.1, n)
    df = pd.DataFrame([])

    for ind, freq in enumerate(scs):
        
        i_str = f"{freq:.5f}"
        rep_freqs = []

        for rep in range(3):
            
            print(f"Running SLiM with sc={i_str}, replicate={rep+1}")
            os.system(f"slim -d sc={i_str} {file}")

            # Find all matching output files for this sc value
            files = glob.glob(f"./sims/freqs_{i_str}_*.txt")
            if not files:
                print('A')
                print(f"No output file found for sc={i_str} replicate={rep+1}")
                rep_freqs.append(np.nan)
                continue
            latest_file = max(files, key=os.path.getmtime)

            # Create DF
            df[rep] = pd.read_csv(latest_file, header=None)

        arr[ind]=df.to_numpy()

    return arr, scs

In [None]:
arr, scs = create_sims(4, './2Hap_70G.slim')

In [12]:
arr2 = np.random.binomial(20, arr)/20
np.savez("train.npz", arr2, scs)

In [7]:
a = np.load("../data/train.npz")
a['arr_0'].shape

(4000, 8, 3)