In this notebook one can:
- load a notebook's settings as a dictionary
- change it 
- save it as a new notebook 
- submit it as an array job to SLURM cluster. 

In [None]:
import sys
sys.path.append('/dls_sw/e02/software/epsic_tools')
import epsic_tools.api as ep
import pprint
import re
import subprocess
import os
import subprocess
import glob
import time

year = '2024'
session = 'mgXXXXX-X'
au_cal_folder = 'Au_xgrating'

In [None]:
current = time.strftime("%s%s%s_%s%s%s"%(time.gmtime()[0], 
                               time.gmtime()[1], 
                               time.gmtime()[2],
                              time.gmtime()[3],
                              time.gmtime()[4],
                              time.gmtime()[5]))
print(current)
starting_notebook_path = os.getcwd() #'/dls/science/groups/e02/Sample_data/Test_data_ePSIC_User_notebooks/scripts_folder'
starting_notebook_name = 'au_xgrating_cal_submit' #'template_BraggAnalysis-submit'
nb = ep.notebook_utils.NotebookHelper(starting_notebook_path, starting_notebook_name)

In [None]:
old_settings = nb.get_settings(1) # settings should be cell index 1
old_settings = old_settings.split(' ')
old_keys = [i.split('=')[0] for i in old_settings]
old_vals = [i.split('=')[1] for i in old_settings]
old_dict = dict(zip(old_keys, old_vals))
pprint.pprint(old_dict)

In [None]:
# Specify the root directory for the Merlin folders
merlin_root = '/dls/e02/data/' + year + '/' + session + '/processing/Merlin/' + au_cal_folder
print(merlin_root)
hdf5_file_paths = glob.glob(merlin_root+ '/*/*.hdf5', recursive=True)

# Output the paths
hdf5_file_paths.sort()
print(len(hdf5_file_paths))
print(*hdf5_file_paths, sep="\n")

In [None]:
# make some changes in new setting
# log files from the cluster jobs and the bash script will be saved here:
code_path = merlin_root + '/cluster_logs'
if not os.path.exists(code_path):
    os.mkdir(code_path)

concurrent_jobs = 3 #Integer number of concurrent jobs to run in the array

new_notebook_paths_list = []
for file in hdf5_file_paths:
    # update the settings
    new_setting = old_dict.copy()
    new_setting['file_path'] = file
    new_setting['save_path_name'] = 'automatic_Au_calibration'
    pprint.pprint(new_setting)

    save_path = os.path.join(os.path.dirname(file), new_setting['save_path_name'])
    print(save_path)
    if not os.path.exists(save_path):
        os.mkdir(save_path)

    new_notebook_path = os.path.join(save_path, 'submitted_notebook.ipynb')
    nb.set_settings(new_setting, new_notebook_path)
    print(f'new notebook path: {new_notebook_path}')
    new_notebook_paths_list.append(new_notebook_path)

note_book_path_file = os.path.join(code_path, 'notebook_list.txt')
with open (note_book_path_file, 'w') as f:
    f.write(
        '\n'.join(new_notebook_paths_list)
    )

bash_script_path = os.path.join(code_path, 'cluster_submit.sh')
with open (bash_script_path, 'w') as f:
    f.write('''#!/usr/bin/env bash
#SBATCH --partition cs04r
#SBATCH --job-name epsic_notebook
#SBATCH --time 05:00:00
#SBATCH --nodes 1
#SBATCH --tasks-per-node 1
#SBATCH --mem 200G
'''
f"#SBATCH --array=0-{len(new_notebook_paths_list)-1}%{int(concurrent_jobs)}\n"
f"#SBATCH --error={code_path}{os.sep}logs_{current}{os.sep}error_%j.out\n"
f"#SBATCH --output={code_path}{os.sep}logs_{current}{os.sep}output_%j.out\n"
f"module load python/epsic3.10\n"
f"mapfile -t paths_array < {note_book_path_file}\n"
'''
echo ${paths_array[$SLURM_ARRAY_TASK_ID]}
jupyter nbconvert --to notebook --inplace --ClearMetadataPreprocessor.enabled=True ${paths_array[$SLURM_ARRAY_TASK_ID]}
jupyter nbconvert --to notebook --allow-errors --execute ${paths_array[$SLURM_ARRAY_TASK_ID]}

'''
           )
        
sshProcess = subprocess.Popen(['ssh',
                               '-tt',
                               'wilson'],
                               stdin=subprocess.PIPE, 
                               stdout = subprocess.PIPE,
                               universal_newlines=True,
                               bufsize=0)
sshProcess.stdin.write("ls .\n")
sshProcess.stdin.write("echo END\n")
sshProcess.stdin.write(f"sbatch {bash_script_path}\n")
sshProcess.stdin.write("uptime\n")
sshProcess.stdin.write("logout\n")
sshProcess.stdin.close()


for line in sshProcess.stdout:
    if line == "END\n":
        break
    print(line,end="")

#to catch the lines up to logout
for line in  sshProcess.stdout: 
    print(line,end="")