In [22]:
import pandas as pd
import numpy as np
from glob import glob

## get all subs

In [16]:
abcd_data = pd.read_csv('abcd_data/minimal_abcd_clean.csv')

all_subs = abcd_data.NARGUID.unique()

## get finished subs

In [14]:
def strip_paths_to_subs(filelist, key):
    return [f.split(key+'_')[-1].replace('.csv', '') for f in filelist]

finished_sub_dict = {}
for sim_key in ['standard', 'guesses', 'graded_mu_go_log']:
    finished_sub_dict[sim_key] = set(strip_paths_to_subs(glob('simulated_data/individual_data/%s_*.csv' % sim_key), sim_key))

In [32]:
finished_subs = finished_sub_dict['standard'].intersection(finished_sub_dict['guesses'], finished_sub_dict['guesses'])

## get remainder and write script sh file

In [50]:
from tempfile import mkstemp
from shutil import move, copymode
from os import fdopen, remove

def replace(file_path, pattern, subst):
    #Create temp file
    fh, abs_path = mkstemp()
    with fdopen(fh,'w') as new_file:
        with open(file_path) as old_file:
            for line in old_file:
                if pattern in line:
                    new_file.write(subst)
                else:
                    new_file.write(line)
    #Copy the file permissions from the old file to the new file
    copymode(file_path, abs_path)
    #Remove original file
    remove(file_path)
    #Move new file
    move(abs_path, file_path)

In [51]:
remaining_subs = set(all_subs).difference(finished_subs)
assert len(remaining_subs)==(len(all_subs) - len(finished_subs))

remaining_subs = np.array(list(remaining_subs))

In [52]:
nsubs_per_job = 48
nlines= 0
with open('run_remaining_sims.sh', 'w') as f:
    for start_idx in range(0, len(remaining_subs), nsubs_per_job):
        end_idx = start_idx + nsubs_per_job
        if end_idx > len(remaining_subs):
            end_idx = len(remaining_subs)
        substr = ' '.join(remaining_subs[start_idx:end_idx])
        f.write(f'python simulate_individuals.py --subjects {substr}\n')
        nlines+=1
        
replace('launch_remaining_sim_cmds.slurm', '#SBATCH -N', '#SBATCH -N %d # number of nodes requested - set to ceil(n rows in command script / 48)\n' % int(np.ceil(nlines/nsubs_per_job)))
replace('launch_remaining_sim_cmds.slurm', '#SBATCH -n', '#SBATCH -n %s # total number of mpi tasks requested - set to n rows in command script\n' % nlines)