In this notebook one can:
- load a notebook's settings as a dictionary
- change it 
- save it as a new notebook 
- submit it as a job to SLURM cluster. 

In [None]:
import sys
sys.path.append('/dls_sw/e02/software/epsic_tools')
import epsic_tools.api as ep
import pprint
import re
import subprocess
import os
from __future__ import print_function,unicode_literals
import subprocess

In [None]:
starting_notebook_path = '/dls/e02/data/2024/mg37302-1/processing/Notebooks/'
starting_notebook_name = 'template_BraggAnalysis-submit'
nb = ep.notebook_utils.NotebookHelper(starting_notebook_path, starting_notebook_name)

In [None]:
old_settings = nb.get_settings(1) # settings should be cell index 1
old_settings = old_settings.split(' ')
old_keys = [i.split('=')[0] for i in old_settings]
old_vals = [i.split('=')[1] for i in old_settings]
old_dict = dict(zip(old_keys, old_vals))
pprint.pprint(old_dict)

In [None]:
def find_hdf5_files(root_dir):
    hdf5_files = []
    # Loop through each subdirectory in the root directory
    for subfolder in os.listdir(root_dir):
        # Check if the subfolder matches the 'SPXX' pattern
        if subfolder.startswith("Li_metal"):
            spxx_path = os.path.join(root_dir, subfolder)
            # Loop through each dataset subfolder inside the SPXX directory
            for dataset_subfolder in os.listdir(spxx_path):
                dataset_path = os.path.join(spxx_path, dataset_subfolder)
                # Check for .hdf5 files in the dataset subfolder
                for file in os.listdir(dataset_path):
                    if file.endswith('.hdf5'):
                        # Append the full path of the .hdf5 file
                        hdf5_files.append(os.path.join(dataset_path, file))
    return hdf5_files

# Specify the root directory for the Merlin folders
merlin_root = '/dls/e02/data/2024/mg37302-1/processing/Merlin'
# Get all .hdf5 files under the specified directory
hdf5_file_paths = find_hdf5_files(merlin_root)

# Output the paths
hdf5_file_paths.sort()
print(len(hdf5_file_paths))
print(*hdf5_file_paths, sep="\n")

In [None]:
# make some changes in new setting
hours_between_submit = 2 #submit a new batch every X hours
seconds_between_prints = hours_between_submit * 3600
import time
n_once = 5
for i in range(0, len(hdf5_file_paths), n_once):
    batch = hdf5_file_paths[i:i+n_once]
    for path in batch:
        file_path = path

        print(file_path)
        
        new_setting = old_dict.copy()
        new_setting['crop_q'] = ''
        new_setting['raw_data_path'] = file_path
        new_setting['save_path_name'] = 'cluster_processed'
        
        pprint.pprint(new_setting)
        
        os.path.dirname(new_setting['raw_data_path'])
        save_path = os.path.join(os.path.dirname(new_setting['raw_data_path']), new_setting['save_path_name'])
        if not os.path.exists(save_path):
            os.mkdir(save_path)
        # new_setting['save_path_name']
        
        # Save a new version of the notebook with new settings:
        new_notebook_path = os.path.join(save_path, 'submitted_notebook.ipynb')
        nb.set_settings(new_setting, new_notebook_path)
        
        print(f'new notebook path: {new_notebook_path}')
        print(f"#SBATCH --output={save_path}{os.sep}output_%j.out\n")
        # Create a bash script to submit to SLURM 
        bash_script_path = os.path.join(save_path, 'cluster_submit.sh')
        with open (bash_script_path, 'w') as f:
            f.write('''#!/usr/bin/env bash
#SBATCH --partition cs05r
#SBATCH --job-name epsic_notebook
#SBATCH --time 05:00:00
#SBATCH --nodes 1
#SBATCH --gpus-per-node 0
#SBATCH --tasks-per-node 1
#SBATCH --mem 256G
        '''
        f"#SBATCH --error={save_path}{os.sep}error_%j.out\n"
        f"#SBATCH --output={save_path}{os.sep}output_%j.out\n"
        f"module load python/epsic3.10\n"
        f"jupyter nbconvert --to notebook --inplace --ClearMetadataPreprocessor.enabled=True {new_notebook_path}\n"
        f"jupyter nbconvert --to notebook --allow-errors --execute {new_notebook_path}\n"
                   )
        
        sshProcess = subprocess.Popen(['ssh',
                                       '-tt',
                                       'wilson'],
                                       stdin=subprocess.PIPE, 
                                       stdout = subprocess.PIPE,
                                       universal_newlines=True,
                                       bufsize=0)
        sshProcess.stdin.write("ls .\n")
        sshProcess.stdin.write("echo END\n")
        sshProcess.stdin.write(f"sbatch {bash_script_path}\n")
        sshProcess.stdin.write("uptime\n")
        sshProcess.stdin.write("logout\n")
        sshProcess.stdin.close()
        
        
        for line in sshProcess.stdout:
            if line == "END\n":
                break
            print(line,end="")
        
        #to catch the lines up to logout
        for line in  sshProcess.stdout: 
            print(line,end="")
            
    time.sleep(seconds_between_prints)