### Instructions

Refer to `Example script.ipynb` for examples of using this notebook.

### UPDATE THE MODEL PATH

In [126]:
#model_path = '/data00/projects/megameta/scripts/jupyter_megameta/l1analysis/darpa1/task-share_model-test.json'

##### Update first level model script and singularity paths if necessary (unlikely)

In [127]:
script_path = '/data00/projects/megameta/scripts/jupyter_megameta/cnlab_pipeline/cnlab/GLM/l2analysis_SPM.py'
singularity_path = '/data00/tools/singularity_images/neurodocker.sqsh'

### GENERATE SLURM JOBS FOR SECOND-LEVEL MODELS

In [128]:
import os, glob, json, copy, re, shutil
import pandas as pd
import numpy as np
import nibabel as nib

import scipy.io as sio

In [196]:
def ensure_exist(path, file):
    if os.path.exists(os.path.join(path, file)):
        return file
    else:
        raise Exception("File missing: " + file)

def ensure_relative(path):
    if path.startswith('/'):
        raise Exception("Make sure path is relative to the data path: " + path)
    else:
        return path
        
def ensure_list(obj):
    if type(obj) is not list:
        return [obj]
    else:
        return obj
    
def remove_key(keys, obj):
    for key in ensure_list(keys):
        if obj.get(key):
            del obj[key]
        
def copy_from_template(target, template):
    for key, item in template.items():
        if key not in target.keys():
            target[key] = copy.deepcopy(template[key])
        elif type(item) is dict:
            copy_from_template(target[key], template[key])

In [197]:
with open(model_path, 'r') as f:
    model = json.load(f)

descriptions = model.get("Description", [])

# Use copy_from_template to combine templates described in model .json into one .json object
for template_path in ensure_list(model.get("Template", [])):
   
    with open(template_path, 'r') as f:
        template = json.load(f)
    
    print(f"Using template {os.path.basename(template_path)}: ", end="")
    print("\n\t".join(ensure_list(template.get("Description", ["No description found"]))))

    descriptions += ensure_list(template.get("Description", []))
    
    copy_from_template(model, template)
    
if len(descriptions) > 0:
    model['Description'] = descriptions

print("----")

Using template template-megameta.json: Mega-meta default pipeline
	4mm FWHM smoothing
	No global scaling
	FAST correlation
	6 motion parameters and framewise displacement
	Trash FD >= 0.75
Using template task-share.json: darpa1 share
----


In [198]:
# Take task name to define job name, create output and working directories, and changes permissions
task = model['Info']['task']
job_name = 'task-{}_model-{}'.format(model['Info']['task'], model['Info']['model'])

env = model['Environment']

os.makedirs(env['output_path'], exist_ok=True)
os.makedirs(env['working_path'], exist_ok=True)

# Sets permissions for output and working directories to 777 - all users all permissions 
try:
    os.chmod(env['output_path'], 0o0777)
    os.chmod(env['working_path'], 0o0777)
except:
    pass
    
l2_path = os.path.join(env['output_path'], job_name)

if not os.path.exists(l2_path):
    raise Exception(f"First-level output directory not found: {l2_path}")

In [194]:
con_path = os.path.join(env['output_path'], job_name, 'contrasts.csv')
print("Aggregating contrast files", end=" ")

if os.path.exists(con_path) and model.get('SecondLevel',{}).get('force_update', False) == False:
    con_df = pd.read_csv(con_path)
    
else:
    con_df = []

    for sub_folder in glob.glob(os.path.join(env['output_path'], job_name, 'sub-*')):
        print("*",end="")
        
        if not os.path.isdir(sub_folder):
            continue

        sub = os.path.basename(sub_folder)

        spm_file = os.path.join(sub_folder, 'SPM.mat')
        if not os.path.exists(spm_file):
            continue

        spm_mat = sio.loadmat(spm_file, 
                   squeeze_me=True, struct_as_record=False)

        for n, con in enumerate(spm_mat['SPM'].xCon, start=1):
            con_file = os.path.join(sub_folder, f"con_{n:04d}.nii")
            if os.path.exists(con_file):
                con_df.append({'sub': sub,
                              'con': con.name,
                              'path': con_file.replace(os.path.join(env['output_path'], job_name) + os.path.sep, '')})

    con_df = pd.DataFrame(con_df)
    con_df.to_csv(con_path, index=False)
    
print(" ")    
print(con_df.pivot_table(index='con', values='sub', aggfunc='count'))
print("----")

con_df = con_df.pivot(index="sub", columns='con', values='path').reset_index()

Aggregating contrast files  
                     sub
con                     
clue_Content          41
clue_Friend           41
clue_Read             41
clue_Wall             41
post_button_Content   41
post_button_Friend    41
post_button_Read      41
post_button_Wall      41
rate_Content          41
rate_Friend           41
rate_Read             41
rate_Wall             41
read_Content          41
read_Friend           41
read_Read             41
read_Wall             41
----


In [199]:
env['job_path'] = ensure_relative(env['job_path'])

job_path = os.path.join(env['data_path'], env['job_path'], job_name, 'jobs') 
os.makedirs(job_path, exist_ok=True)

l2_jobs = ensure_list(model['SecondLevel']['job'])

for l2_job in l2_jobs:    
    issues = []

    l2_name = l2_job.get('name', '{analysis}_{contrast}').format(analysis = l2_job['analysis'],
                                                               contrast = "_".join(ensure_list(l2_job['contrast'])))
    l2_name = re.sub(r'\W+', '_', l2_name)

    l2_job['name'] = l2_name
    print(l2_name, ": ", end="")
    
    output_path = os.path.join(job_name, f'l2-{l2_name}')
    os.makedirs(os.path.join(env['output_path'], output_path), exist_ok=True)

    l2_analysis = l2_job['analysis']
    if l2_analysis not in ['OneSampleTTestDesign', 'PairedTTestDesign', 'TwoSampleTTestDesign', 'MultipleRegressionDesign']:
        raise Exception("Only these analyses available: OneSampleTTestDesign, PairedTTestDesign, TwoSampleTTestDesign, MultipleRegressionDesign")
           
    l2_job['contrast'] = ensure_list(l2_job['contrast'])
        
    l2_contrasts = ensure_list(l2_job.get('l2_contrasts', []))
    conestimate = {}
    
    if l2_job.get("explicit_mask_file"):
        shutil.copyfile(l2_job["explicit_mask_file"], os.path.join(env['output_path'], output_path))
        l2_job["explicit_mask_file"] = os.path.join(output_path, l2_job["explicit_mask_file"])
        
    if l2_analysis in ['OneSampleTTestDesign', 'MultipleRegressionDesign']:
        if len(l2_job['contrast']) != 1:
            raise Exception(f"Only one contrast allowed for {l2_analysis}.")
            
        selected_con_df = con_df[['sub'] + l2_job['contrast']].dropna()
        
        if l2_job.get('include_intercept', True):
            l2_contrast_names = ['mean']
        else:
            l2_contrast_names = []

        if l2_job.get("covariate_file"):
            
            if l2_job['covariate_file'].endswith('tsv'):
                covariate_df = pd.read_csv(l2_job['covariate_file'], sep='\t')
            else:
                covariate_df = pd.read_csv(l2_job['covariate_file'])
            
            covariate_names = l2_job.get('covariate_names', list(covariate_df.columns))
            if 'sub' in covariate_names:
                covariate_names.remove('sub')
            
            selected_con_df = selected_con_df.merge(covariate_df[['sub'] + covariate_names], on='sub', how='left').dropna()

            covariate_file = os.path.join(output_path, 'covariates.csv')
            l2_job['covariate_file'] = covariate_file
            
            selected_con_df[['sub'] + covariate_names].to_csv(os.path.join(env['output_path'], covariate_file), index=False)

            l2_contrast_names += covariate_names
            
        l2_job['in_files'] = (job_name + os.path.sep + selected_con_df[l2_job['contrast'][0]]).to_list()
        
        for cn in l2_contrast_names:
            l2_contrasts.append([cn, 'T', [cn], [1]])    

    elif l2_analysis == 'PairedTTestDesign':       

        if len(l2_job['contrast']) != 2:
            raise Exception(f"Only two contrasts allowed for {l2_analysis}.")

        selected_con_df = con_df[['sub'] + l2_job['contrast']].dropna()

        if l2_job.get("covariate_file"):
            raise Exception("Covariates for PairedTTestDesign not supported yet.")

        cond1_files = (job_name + os.path.sep + selected_con_df[l2_job['contrast'][0]]).to_list()
        cond2_files = (job_name + os.path.sep + selected_con_df[l2_job['contrast'][1]]).to_list()
        
        l2_job['paired_files'] = [[c1,c2] for c1, c2 in zip(cond1_files, cond2_files)]
        
        #l2_contrasts.append([l2_job['contrast'][0], 'T', ['Condition_{1}'], [1]])
        #l2_contrasts.append([l2_job['contrast'][1], 'T', ['Condition_{2}'], [1]])
        l2_contrasts.append(['>'.join(l2_job['contrast']), 'T', ['Condition_{1}','Condition_{2}'], [1,-1]])

    elif l2_analysis == 'TwoSampleTTestDesign':
        
        if len(l2_job['contrast']) != 1:
            raise Exception(f"Only one contrast allowed for {l2_analysis}.")

        selected_con_df = con_df[['sub'] + l2_job['contrast']].dropna()
        
        if l2_job.get("groups"):
            
            group1_con_df = selected_con_df[selected_con_df['sub'].isin(l2_job['groups'][0])]
            group2_con_df = selected_con_df[selected_con_df['sub'].isin(l2_job['groups'][1])]
            
            l2_job['group1_files'] = (job_name + os.path.sep + group1_con_df[l2_job['contrast'][0]]).to_list()
            l2_job['group2_files'] = (job_name + os.path.sep + group2_con_df[l2_job['contrast'][0]]).to_list()
            
            l2_group_names = ['Group1', 'Group2']
            
        elif l2_job.get("covariate_file"):

            grouping_var = l2_job['grouping_variable']

            if l2_job.get('covariate_names'):
                raise Exception("Covariates for TwoSampleTTestDesign not supported yet.")

            if l2_job['covariate_file'].endswith('tsv'):
                covariate_df = pd.read_csv(l2_job['covariate_file'], sep='\t')[['sub',grouping_var]]
            else:
                covariate_df = pd.read_csv(l2_job['covariate_file'])[['sub',grouping_var]]
            
            selected_con_df = selected_con_df.merge(covariate_df, on='sub', how='left').dropna()
            
            if len(selected_con_df[grouping_var].unique()) != 2:
                raise Exception("Grouping variable does not have exactly two levels.")
                
            group_files = []
            l2_group_names = []
            
            for group, group_df in selected_con_df.groupby(grouping_var):
                group_files.append((job_name + os.path.sep + group_df[l2_job['contrast'][0]]).to_list())
                l2_group_names.append(f"{grouping_var} = {group}")
                
            l2_job['group1_files'] = group_files[0]
            l2_job['group2_files'] = group_files[1]

            remove_key("covariate_file", l2_job)
        
        l2_contrasts.append([l2_group_names[0], 'T', ['Group_{1}'], [1]])
        l2_contrasts.append([l2_group_names[1], 'T', ['Group_{2}'], [1]])
        l2_contrasts.append(['>'.join(l2_group_names), 'T', ['Group_{1}','Group_{2}'], [1,-1]])
    
    remove_key(["contrast", "covariate_names", "groups", "grouping_variable", "l2_contrasts"], l2_job)
        
    job = { 'Environment': copy.deepcopy(model['Environment']),
            'Info': copy.deepcopy(model['Info']),
            'SecondLevel': l2_job,
            'EstimateContrast': { 'contrasts': l2_contrasts } 
          }
    
    job['Environment']['data_path'] = env['output_path']
    
    remove_key("job_path", job["Environment"])
    remove_key(["sub_container", "exclude", "run", "tr"], job["Info"])

    if len(issues) == 0:
        print("job created")
    else:
        print("issues found - \n\t" + "\n\t".join(issues))

    job_output = os.path.join(job_path, f"l2-{l2_name}.json")
    with open(job_output, 'w') as f:
        json.dump(job, f)
        
print("----")

OneSampleTTestDesign_read_Read : job created
MultipleRegressionDesign_read_Read : job created
PairedTTestDesign_read_Wall_read_Content : job created
TwoSampleTTestDesign_read_Wall : job created
----


In [168]:
slurm_path = os.path.join(env['data_path'], env['job_path'], job_name, 'slurm') 
os.makedirs(slurm_path, exist_ok=True)
try:
    os.chmod(slurm_path, 0o0777)
    
    os.makedirs(os.path.join(slurm_path, 'out'), exist_ok=True)
    os.chmod(os.path.join(slurm_path, 'out'), 0o0777)
except:
    pass
    

for l2_job in l2_jobs:
    
    l2_name = l2_job['name']
    
    slurm_header = []
    slurm_header.append("#!/bin/bash")
    slurm_header.append(f"#SBATCH --job-name=l2-{l2_name}.job")
    slurm_header.append(f"#SBATCH --output=out/l2-{l2_name}.job")
    slurm_header.append(f"#SBATCH --error=out/l2-{l2_name}.err")
    slurm_header.append("#SBATCH --time=5-00:00")
    slurm_header.append("")
    slurm_header.append("srun ")

    data_path = env['output_path']
    output_path = env['output_path']
    working_path = env['working_path']

    json_path = os.path.join(job_path, f"l2-{l2_name}.json")
    
    cmd = []
    cmd.append("singularity run --cleanenv")
    cmd.append(f"-B {script_path}:/worker.py")
    cmd.append(f"-B {data_path}:/data")
    cmd.append(f"-B {output_path}:/output")
    cmd.append(f"-B {working_path}:/working")
    cmd.append(f"-B {json_path}:/job.json")
    cmd.append(f"{singularity_path} python /worker.py /job.json")
    
    slurm_output = os.path.join(output_path, job_name, f"l2-{l2_name}")
    slurm_working = os.path.join(working_path, job_name, f"l2-{l2_name}")
    
    slurm_footer = []
    slurm_footer.append("")
    slurm_footer.append("")
    slurm_footer.append(f"chmod -R 775 {slurm_output}")
    slurm_footer.append(f"chmod -R 775 {slurm_working}")
    
    slurm_cmd = "\n".join(slurm_header) + " \\\n  ".join(cmd) + "\n".join(slurm_footer) 
    
    with open(os.path.join(slurm_path, f"l2-{l2_name}.job"), 'w') as f:
        f.write(slurm_cmd)

### TEST DRIVE ONE MODEL (SLURM JOB)

In [169]:
print(f"To test drive one job (l2-{l2_name}), copy and paste the following lines in terminal:")
print("")

print(" \\\n  ".join(cmd))

print("----")

To test drive one job (l2-TwoSampleTTestDesign_read_Wall), copy and paste the following lines in terminal:

singularity run --cleanenv \
  -B /data00/projects/megameta/scripts/jupyter_megameta/cnlab_pipeline/cnlab/GLM/l2analysis_SPM.py:/worker.py \
  -B /data00/projects/megameta/darpa1/derivatives/nipype:/data \
  -B /data00/projects/megameta/darpa1/derivatives/nipype:/output \
  -B /data00/projects/megameta/darpa1/working/nipype:/working \
  -B /data00/projects/megameta/darpa1/models/task-share_model-test/jobs/l2-TwoSampleTTestDesign_read_Wall.json:/job.json \
  /data00/tools/singularity_images/neurodocker.sqsh python /worker.py /job.json
----


### RUN ALL MODELS (SLURM JOBS)

In [170]:
print("Alternatively, submit the whole batch by copying and pasting the following lines in terminal:")
print("")

print(f"cd {slurm_path}")
for l2_job in l2_jobs:
    
    l2_name = l2_job['name']
    print(f"sbatch -D {slurm_path} -c 8 l2-{l2_name}.job")
print(" ")

Alternatively, submit the whole batch by copying and pasting the following lines in terminal:

cd /data00/projects/megameta/darpa1/models/task-share_model-test/slurm
sbatch -D /data00/projects/megameta/darpa1/models/task-share_model-test/slurm -c 8 l2-OneSampleTTestDesign_read_Read.job
sbatch -D /data00/projects/megameta/darpa1/models/task-share_model-test/slurm -c 8 l2-MultipleRegressionDesign_read_Read.job
sbatch -D /data00/projects/megameta/darpa1/models/task-share_model-test/slurm -c 8 l2-PairedTTestDesign_read_Wall_read_Content.job
sbatch -D /data00/projects/megameta/darpa1/models/task-share_model-test/slurm -c 8 l2-TwoSampleTTestDesign_read_Wall.job
 
