In this notebook one can:
- load a notebook's settings as a dictionary
- change it 
- save it as a new notebook 
- submit it as an array job to SLURM cluster. 

In [8]:
import sys
sys.path.append('/dls_sw/e02/software/epsic_tools')
import epsic_tools.api as ep
import pprint
import re
import subprocess
import os
import subprocess
import glob

In [4]:
starting_notebook_path = '/dls/science/groups/e02/Sample_data/Test_data_ePSIC_User_notebooks/scripts_folder'
starting_notebook_name = 'template_BraggAnalysis-submit'
nb = ep.notebook_utils.NotebookHelper(starting_notebook_path, starting_notebook_name)

In [5]:
old_settings = nb.get_settings(1) # settings should be cell index 1
old_settings = old_settings.split(' ')
old_keys = [i.split('=')[0] for i in old_settings]
old_vals = [i.split('=')[1] for i in old_settings]
old_dict = dict(zip(old_keys, old_vals))
pprint.pprint(old_dict)

{'Au_calib_path': '/dls/e02/data/2024/mg37302-1/processing/Merlin/Au_xgrating',
 'bin_factor': '2',
 'crop_q': '',
 'fill_cross': '0',
 'hot_pix_thresh': '0.1',
 'load_prepared_data': '0',
 'peak_finding_minAbsoluteIntensity': '0.25',
 'peak_finding_minPeakSpacing': '5',
 'prepared_data_path': '',
 'probe_kernel_post': '5',
 'probe_kernel_pre': '0.5',
 'probe_path': '',
 'raw_data_path': '/dls/e02/data/2024/mg37302-1/processing/Merlin/SP41/20240418_151824/20240418_151824_data.hdf5',
 'save_path_name': 'manual_test',
 'syn_probe_rad': '4',
 'syn_probe_width': '4',
 'synthetic_probe': '1',
 'v_max': '0.99',
 'v_min': '0.01'}


In [10]:
# def find_hdf5_files(root_dir):
#     hdf5_files = []
#     # Loop through each subdirectory in the root directory
#     for subfolder in os.listdir(root_dir):
#         # Check if the subfolder matches the 'SPXX' pattern
#         if subfolder.startswith("Li_metal"):
#             spxx_path = os.path.join(root_dir, subfolder)
#             # Loop through each dataset subfolder inside the SPXX directory
#             for dataset_subfolder in os.listdir(spxx_path):
#                 dataset_path = os.path.join(spxx_path, dataset_subfolder)
#                 # Check for .hdf5 files in the dataset subfolder
#                 for file in os.listdir(dataset_path):
#                     if file.endswith('.hdf5'):
#                         # Append the full path of the .hdf5 file
#                         hdf5_files.append(os.path.join(dataset_path, file))
#     return hdf5_files

# Specify the root directory for the Merlin folders
merlin_root = '/dls/science/groups/e02/Sample_data/Test_data_ePSIC_User_notebooks/experimental_test_data'
#'/dls/e02/data/2024/mg37302-1/processing/Merlin'
# Get all .hdf5 files under the specified directory
hdf5_file_paths = data_files = glob.glob(merlin_root+ '/*/*.hdf5')
#find_hdf5_files(merlin_root)

# Output the paths
hdf5_file_paths.sort()
print(len(hdf5_file_paths))
print(*hdf5_file_paths, sep="\n")

4
/dls/science/groups/e02/Sample_data/Test_data_ePSIC_User_notebooks/experimental_test_data/20240420_174439/20240420_174439_data.hdf5
/dls/science/groups/e02/Sample_data/Test_data_ePSIC_User_notebooks/experimental_test_data/20240420_174947/20240420_174947_data.hdf5
/dls/science/groups/e02/Sample_data/Test_data_ePSIC_User_notebooks/experimental_test_data/20240420_175434/20240420_175434_data.hdf5
/dls/science/groups/e02/Sample_data/Test_data_ePSIC_User_notebooks/experimental_test_data/20240420_184308/20240420_184308_data.hdf5


In [12]:
# make some changes in new setting
# log files from the cluster jobs and the bash script will be saved here:
code_path = '/dls/science/groups/e02/Sample_data/Test_data_ePSIC_User_notebooks/scripts_folder'
concurrent_jobs = 3 #Integer number of concurrent jobs to run in the array

new_notebook_paths_list = []
for file in hdf5_file_paths:
    # update the settings
    new_setting = old_dict.copy()
    new_setting['crop_q'] = ''
    new_setting['raw_data_path'] = file
    new_setting['save_path_name'] = 'array_cluster_processed'
    pprint.pprint(new_setting)

    save_path = os.path.join(os.path.dirname(file), new_setting['save_path_name'])
    print(save_path)
    if not os.path.exists(save_path):
        os.mkdir(save_path)

    new_notebook_path = os.path.join(save_path, 'submitted_notebook.ipynb')
    nb.set_settings(new_setting, new_notebook_path)
    print(f'new notebook path: {new_notebook_path}')
    new_notebook_paths_list.append(new_notebook_path)

note_book_path_file = os.path.join(code_path, 'notebook_list.txt')
with open (note_book_path_file, 'w') as f:
    f.write(
        '\n'.join(new_notebook_paths_list)
    )


bash_script_path = os.path.join(code_path, 'cluster_submit.sh')
with open (bash_script_path, 'w') as f:
    f.write('''#!/usr/bin/env bash
#SBATCH --partition cs05r
#SBATCH --job-name epsic_notebook
#SBATCH --time 05:00:00
#SBATCH --nodes 1
#SBATCH --gpus-per-node 0
#SBATCH --tasks-per-node 1
#SBATCH --mem 256G
'''
f"#SBATCH --array=0-{len(new_notebook_paths_list)-1}%{int(concurrent_jobs)}\n"
f"#SBATCH --error={code_path}{os.sep}logs{os.sep}error_%j.out\n"
f"#SBATCH --output={code_path}{os.sep}logs{os.sep}output_%j.out\n"
f"module load python/epsic3.10\n"
f"mapfile -t paths_array < {note_book_path_file}\n"
'''
echo ${paths_array[$SLURM_ARRAY_TASK_ID]}
jupyter nbconvert --to notebook --inplace --ClearMetadataPreprocessor.enabled=True ${paths_array[$SLURM_ARRAY_TASK_ID]}
jupyter nbconvert --to notebook --allow-errors --execute ${paths_array[$SLURM_ARRAY_TASK_ID]}

'''
           )
        
sshProcess = subprocess.Popen(['ssh',
                               '-tt',
                               'wilson'],
                               stdin=subprocess.PIPE, 
                               stdout = subprocess.PIPE,
                               universal_newlines=True,
                               bufsize=0)
sshProcess.stdin.write("ls .\n")
sshProcess.stdin.write("echo END\n")
sshProcess.stdin.write(f"sbatch {bash_script_path}\n")
sshProcess.stdin.write("uptime\n")
sshProcess.stdin.write("logout\n")
sshProcess.stdin.close()


for line in sshProcess.stdout:
    if line == "END\n":
        break
    print(line,end="")

#to catch the lines up to logout
for line in  sshProcess.stdout: 
    print(line,end="")
            


{'Au_calib_path': '/dls/e02/data/2024/mg37302-1/processing/Merlin/Au_xgrating',
 'bin_factor': '2',
 'crop_q': '',
 'fill_cross': '0',
 'hot_pix_thresh': '0.1',
 'load_prepared_data': '0',
 'peak_finding_minAbsoluteIntensity': '0.25',
 'peak_finding_minPeakSpacing': '5',
 'prepared_data_path': '',
 'probe_kernel_post': '5',
 'probe_kernel_pre': '0.5',
 'probe_path': '',
 'raw_data_path': '/dls/science/groups/e02/Sample_data/Test_data_ePSIC_User_notebooks/experimental_test_data/20240420_174439/20240420_174439_data.hdf5',
 'save_path_name': 'array_cluster_processed',
 'syn_probe_rad': '4',
 'syn_probe_width': '4',
 'synthetic_probe': '1',
 'v_max': '0.99',
 'v_min': '0.01'}
/dls/science/groups/e02/Sample_data/Test_data_ePSIC_User_notebooks/experimental_test_data/20240420_174439/array_cluster_processed
new notebook path: /dls/science/groups/e02/Sample_data/Test_data_ePSIC_User_notebooks/experimental_test_data/20240420_174439/array_cluster_processed/submitted_notebook.ipynb
{'Au_calib_path

Connection to wilson closed.
