# Posing the Problem

We have set of docked models for a range of sequences, classified according to a particular conformation. Each model has its own folder for simulation, with a common folder structure for saving a range of different files for MD preparation and submission .

**GOALS**

* Identify what models I can submit for emmd/prmd/urmd for a particular mutant and conformation
* Submit simulations while keeping track of these

**SIMULATION SUBMISSION** : Want to submit simulations up to a final stage. Here, MD 100ns simulations.

* __If__ `.gro` and `.xtc` files for URMD stage are present, no need for submission
* __Else__, determine what is the last MD stage simulated (prmd or emmd) -  check `.gro` and `.xtc` files for this stage, with PRMD simulations having priority over EMMD
* Next, __If__ `.gro` and `.xtc` files for PRMD stage are present, then check for AND/OR prepare submission files for URMD stage
* __Else__ check for `.gro` (at least) and `.xtc` files for EMMD stage are present, then check for AND/OR prepare submission files for PRMD stage

**POSSIBLE SCENARIOS**

Pre-submission stage

* Submission files AVAILABLE, and coordinates and trajectory UNAVAILABLE. Then, CAN submit. Preparation NO.
* Submission files UNAVAILABLE, and coordinates and trajectory AVAILABLE. Then, CANNOT submit. Preparation NO.
* Submission files UNAVAILABLE, and coordinates and trajectory UNAVAILABLE. Then, CANNOT submit. Preparation YES.


Post-preparation stage

* Submission files AVAILABLE, and coordinates and trajectory UNAVAILABLE. Then, CAN submit. Preparation NO.

# Modules for Testing

In [27]:
import os
import glob
import json
import subprocess

Methods to check existence of key files for submission and to determine last MD stage simulated

In [28]:
# Sorted by Priority for Submission
MD_STAGES = [
    ('urmd','md_100ns'),
    ('prmd','prmd'),
    ('emmd','em_20000stps')
]

def check_for_md_files(model_dir, sname_main):
    """Check if range of file types with common name found in folder """
    out = {}
    # GROMACS file extensions
    FILETYPES = ['.gro', '.xtc']
    path_suffix = 'complex/mdf'
    for extension in FILETYPES:
        file_path = os.path.join(model_dir, path_suffix, sname_main + extension)
        out[extension] = os.path.isfile(file_path)
    
    return out

def check_for_submission_files(model_dir, sname_main):
    FILETYPES = ['.tpr' ,'.slurm']
    SUFFICES = ['complex/mdf', 'complex/jobf']
    out = {}
    for i in range(len(FILETYPES)):
        path_suffix = SUFFICES[i]
        extension = FILETYPES[i]
        file_path = os.path.join(model_dir, path_suffix, sname_main + extension)
        out[extension] = os.path.isfile(file_path)
    
    return out

def determine_last_md_stage(model_dir):
    for i in range(len(MD_STAGES)):
        stage, filename = MD_STAGES[i]
        if all(check_for_md_files(model_dir, filename).values()):
            break
        elif stage == 'emmd' and check_for_md_files(model_dir, filename)['.gro']:
            pass
    return stage

Methods for generation of submission files:  SBATCH file (`.slurm`) and GROMAC initiation file (`.tpr`)

In [29]:
def generate_slurm(model_dir, sname_main, stage):
    #######################################
    # Default params per MD stage: BlueGem SLURM format
    #######################################
    path_prefix_slurm = "complex/jobf"
    path_prefix_md = "complex/mdf"

    if stage == 'urmd':
        n_nodes = 2
        sim_time = "5-12:30"

    elif stage == 'prmd':
        n_nodes = 1
        sim_time = "1-12:30"

    elif stage == 'emmd':
        n_nodes = 1
        sim_time = "12:30"        
    #######################################
    # Script content
    #######################################
    slurm_template = (
        "#!/bin/bash -login \n"
        "#SBATCH -p cpu \n"
        "#SBATCH --ntasks-per-node=16 \n"
        "#SBATCH -N "+str(n_nodes)+" \n"
        "#SBATCH -t "+sim_time+" \n"
        "#SBATCH -A S2.1 \n"
        "#SBATCH -o "+path_prefix_slurm+"/"+sname_main+"_slurm.log \n"
        "#SBATCH -e "+path_prefix_slurm+"/"+sname_main+"_slurm.error \n"
        "\n"
        "# Load GROMACS module \n"
        "module load apps/gromacs-5.0.6 \n"
        "\n"
        "mpiexec.hydra -psm -bootstrap slurm gmx_mpi mdrun "
        "-s "+path_prefix_md+"/"+sname_main+".tpr "
        "-deffnm "+path_prefix_md+"/"+sname_main+" \n"
    )
    #######################################
    # Write submission file for BG
    #######################################
    path_output = os.path.join(model_dir, path_prefix_slurm, sname_main+'.bg.slurm')
    with open(path_output, 'w') as fp:
        fp.write(slurm_template)
    fp.close()

def generate_tpr(model_dir, sname_main, sname_prev):
    """Generate GROMACS run input file"""
    #######################################
    # GROMACS command with input parameters
    #######################################
    if check_for_md_files(model_dir, sname_prev)['.gro']:
        cmd = [
            'gmx_mpi','grompp',
            '-f','/home/ba13026/mpmodeling/protocols/gmx_protocols/templates/'+sname_main+'.mdp',
            '-c',model_dir+'/complex/mdf/'+sname_prev+'.gro',
            '-p',model_dir+'/complex/'+'topol.top',
            '-o',model_dir+'/complex/mdf/'+sname_main+'.tpr',
            '-maxwarn','3',
        ]
        #######################################
        # Run GROMACS command 
        #######################################
        p = subprocess.Popen(cmd)
        p.wait()
        #######################################
        # Clean backup files to reduce storage
        #######################################
        files2remove = glob.glob('./'+'#mdout.mdp.*')
        for f in files2remove:
            os.remove(f)
    else:
        mssg = "ERROR: Coordinates from previous MD stage missing!"
        print(mssg)
        
def prepare_submission_files(model_dir, sname_main, stage_last, stage_next):
    out_check = check_for_submission_files(model_dir, sname_main)
    if out_check['.tpr']:
        generate_tpr(model_dir, sname_main, stage_last)
    elif out_check['.slurm']:
        generate_slurm(model_dir, sname_main, stage_next)

# Input Cases

## All docked models

**INPUT, CASE 1** Make list of all available docked model directories. ALL MUTANTS, ALL CONFORMATIONS, ALL MODELS.

In [4]:
# List of cWza Cys-mutant sequence names
MUTANTS = ['cWza','cWza-K375C','cWza-S355C','cWza-Y373C']

# Dict of Conformation names per sequence name
CONFORMATIONS = {
    'cWza':['conformation0', 'conformation1'],
    'cWza-K375C':['conformation0', 'conformation1'],
    'cWza-S355C':['conformation0', 'conformation1'],
    'cWza-Y373C':['conformation1']
}

wdir = '/projects/s21/ba13026/Wza_Modeling/L-structures/rosetta/bg_test/md_relax'
path_suffix = 'complex/mdf'

MODELS_DIRS = []
for mutant in MUTANTS:
    for conformation in CONFORMATIONS[mutant]:
        mutant_dir = os.path.join(wdir, mutant, conformation)
        for model_dir in [os.path.join(mutant_dir, pdbname) for pdbname in os.listdir(mutant_dir)]:
            MODELS_DIRS.append(model_dir)

## All Models per Conformation

**INPUT, CASE 2** Make list of all available docked model directories only for `cWza-K375C` models.

In [15]:
# List of cWza Cys-mutant sequence names
MUTANTS = ['cWza-K375C']

# Dict of Conformation names per sequence name
CONFORMATIONS = {
    'cWza-K375C':['conformation0', 'conformation1']
}

wdir = '/projects/s21/ba13026/Wza_Modeling/L-structures/rosetta/bg_test/md_relax'
path_suffix = 'complex/mdf'

MODELS_DIRS = []
for mutant in MUTANTS:
    for conformation in CONFORMATIONS[mutant]:
        mutant_dir = os.path.join(wdir, mutant, conformation)
        for model_dir in [os.path.join(mutant_dir, pdbname) for pdbname in os.listdir(mutant_dir)]:
            MODELS_DIRS.append(model_dir)

## All Models per Interaction Partition

In [2]:
import os
import sys
import numpy
import json
import subprocess
import isambard_dev
import operator
from operator import itemgetter
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
sns.set_style('darkgrid')
%matplotlib notebook

In [3]:
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker

modules_path = "/home/ba13026/mpmodeling/analysis/"
if modules_path not in sys.path:
    sys.path.append(modules_path)

import setup_geometry_interactions_db
import importlib
importlib.reload(setup_geometry_interactions_db)
from setup_geometry_interactions_db import \
    Json,Tags,RigidBody,RadialProfiles,Miscellaneous,Interhelix_Interactions, Base
from insert2db_geometry_interactions import interaction_direction

In [4]:
# path0 = '/home/ba13026/dbs/'
path0 = '/projects/s21/ba13026/Wza_Modeling/L-structures/rosetta/bg_test/md_relax/'
dbfile = path0 + 'mutants_docked_geometry-interactions.db'
engine = create_engine('sqlite:///'+dbfile)
Base.metadata.bind = engine
DBSession = sessionmaker()
DBSession.bind = engine
session = DBSession()

In [5]:
wd = '/projects/s21/ba13026/Wza_Modeling/L-structures/rosetta/bg_test/md_relax/'
filename = wd+'filtered_ids_new.json'

with open(filename,'r') as fp:
    Filtered_IDs = json.load(fp)

In [6]:
wd = '/projects/s21/ba13026/Wza_Modeling/L-structures/rosetta/bg_test/md_relax/'
filename = wd+'filtered_ids_interaction_groups.json'
with open(filename,'r') as fp:
    Models_Out_IDs = json.load(fp)

In [7]:
MyTags  = [
    json.dumps(['cWza', 'conformation0']),
    json.dumps(['cWza', 'conformation1']),
    json.dumps(['cWza-K375C', 'conformation0']),
    json.dumps(['cWza-K375C', 'conformation1']),
    json.dumps(['cWza-S355C', 'conformation0']),
    json.dumps(['cWza-S355C', 'conformation1']),
    json.dumps(['cWza-Y373C', 'conformation0'])
]

In [8]:
Models_IDs = {}

for tags in MyTags:
    Models_IDs[tags]= {}
    mutant, C_x = json.loads(tags)
    ###############################################
    X_all = set(list(Filtered_IDs[mutant][C_x]))
    X_hbonds = set(Models_Out_IDs['hbonds'][tags])
    X_kihs = set(Models_Out_IDs['kihs'][tags])
    X_hbonds_NOT_kihs = set(X_hbonds - X_kihs)
    X_kihs_NOT_hbonds = set(X_kihs - X_hbonds)
    X_hbonds_AND_kihs = set(X_hbonds & X_kihs)
    X_hbonds_NOR_kihs = X_all - set(X_hbonds | X_kihs)
    ###############################################
    Models_IDs[tags]['hbonds_NOR_kihs'] = list(X_hbonds_NOR_kihs)
    Models_IDs[tags]['hbonds_NOT_kihs'] = list(X_hbonds_NOT_kihs)
    Models_IDs[tags]['kihs_NOT_hbonds'] = list(X_kihs_NOT_hbonds)
    Models_IDs[tags]['hbonds_AND_kihs'] = list(X_hbonds_AND_kihs)

In [9]:
Models_PDBs = {}

for tags in MyTags:
    mutant, C_x = json.loads(tags)
    ##################################
    # Correct mislabelling
    if mutant == 'cWza-Y373C':
        C_x = "conformation1"
    elif mutant == 'cWza-K375C' and C_x == "conformation0":
        C_x = "conformation1"
    elif mutant == 'cWza-K375C' and C_x == "conformation1":
        C_x = "conformation0"
    ##################################
    Models_PDBs[tags] = {}
    for partition in list(Models_IDs[tags].keys()):
        Models_PDBs[tags][partition] = []
        for id in list(map(int,Models_IDs[tags][partition])):
            pdbname = session.query(Tags.pdb_name).filter_by(id=id).all()[0][0][:-4]
            modeldir = mutant+"/"+C_x+"/"+pdbname
            Models_PDBs[tags][partition].append(modeldir)

Get list of PDB folders for models from interaction partions of `cWza-K375C` docked conformations

In [77]:
Tags_cWzaK375C = [
    json.dumps(['cWza-K375C', 'conformation0'])
]

MODELS_DIRS = []
for tag in Tags_cWzaK375C:
    S = Models_PDBs[tag]
    for key in S.keys():
        print( tag, key, len(S[key]) )
        if len(S[key]) > 0 and key == 'hbonds_NOT_kihs':
            for pdb_dir in S[key]:
                MODELS_DIRS.append( os.path.join(wd, pdb_dir) )

["cWza-K375C", "conformation0"] kihs_NOT_hbonds 0
["cWza-K375C", "conformation0"] hbonds_AND_kihs 0
["cWza-K375C", "conformation0"] hbonds_NOT_kihs 22
["cWza-K375C", "conformation0"] hbonds_NOR_kihs 38


In [69]:
Tags_cWzaK375C = [
    json.dumps(['cWza-K375C', 'conformation1'])
]

MODELS_DIRS = []
for tag in Tags_cWzaK375C:
    S = Models_PDBs[tag]
    for key in S.keys():
        print( tag, key, len(S[key]) )
        if len(S[key]) > 0 and key == 'hbonds_AND_kihs':
            for pdb_dir in S[key]:
                MODELS_DIRS.append( os.path.join(wd, pdb_dir) )

["cWza-K375C", "conformation1"] kihs_NOT_hbonds 75
["cWza-K375C", "conformation1"] hbonds_AND_kihs 9
["cWza-K375C", "conformation1"] hbonds_NOT_kihs 0
["cWza-K375C", "conformation1"] hbonds_NOR_kihs 0


# Proof of Concept

Determine last and next possible stage of MD simulation for every model directory from above

**NOTE**: `MODELS_DIRS` can be replaced by any list of arbitrary and valid model directories, e.g., for a sample of models per conformation, or a sample of models according to particular interaction groups.

In [78]:
COUNTER_2submit = 0 
for model_dir in MODELS_DIRS:
    # Determine LAST MD stage successfully simulated
    stage_last = determine_last_md_stage(model_dir)
    if stage_last == 'urmd':
        print('/'.join(model_dir.split('/')[-3:]), stage_last)
        COUNTER_2submit += 1

print("Models already simulated: ", COUNTER_2submit)

cWza-K375C/conformation1/refined1_0001_INPUT_0027_ignorechain urmd
cWza-K375C/conformation1/refined1_0001_INPUT_0042_ignorechain urmd
cWza-K375C/conformation1/refined1_0001_INPUT_0747_ignorechain urmd
cWza-K375C/conformation1/refined1_0001_INPUT_0495_ignorechain urmd
cWza-K375C/conformation1/refined1_0001_INPUT_0304_ignorechain urmd
cWza-K375C/conformation1/refined1_0001_INPUT_0955_ignorechain urmd
cWza-K375C/conformation1/refined1_0001_INPUT_0121_ignorechain urmd
cWza-K375C/conformation1/refined1_0001_INPUT_0125_ignorechain urmd
cWza-K375C/conformation1/refined1_0001_INPUT_0578_ignorechain urmd
Models already simulated:  9


**PREPARATION PHASE**: Check for missing submission files for next MD simualtion stage and generate them

*LINEAR IMPLEMENTATION* 

```python
for model_dir in MODELS_DIRS:
    # Determine LAST MD stage successfully simulated
    stage_last = determine_last_md_stage(model_dir)
    
    # Prepare submission files for NEXT MD stage needed to simulate
    if stage_last == 'prmd':
        stage_next = 'urmd'
        sname_main = 'md_100ns'
        prepare_submission_files(model_dir, sname_main, stage_last, stage_next)
    
    elif stage_last == 'emmd':
        stage_next = 'prmd'
        sname_main = 'prmd'
        prepare_submission_files(model_dir, sname_main, stage_last, stage_next)
```

*PARALLEL IMPLEMENTATION*

* Define input parameters as 1D list

``` python
params_list = []
for model_dir in MODELS_DIRS:
    # Determine LAST MD stage successfully simulated
    stage_last = determine_last_md_stage(model_dir)
    
    # Prepare submission files for NEXT MD stage needed to simulate
    if stage_last == 'prmd':
        stage_next = 'urmd'
        sname_main = 'md_100ns'
        params = [model_dir, sname_main, stage_last, stage_next]
        params_list.append(params)
   
    elif stage_last == 'emmd':
        stage_next = 'prmd'
        sname_main = 'prmd'
        params = [model_dir, sname_main, stage_last, stage_next]
        params_list.append(params)
```

* Define linear map to parallelise

```python
def func(params):
    model_dir, sname_main, stage_last, stage_next = params
    prepare_submission_files(model_dir, sname_main, stage_last, stage_next)
```

* Define Parallel scheme. Here, Multi-processing.

```python
import concurrent.futures

n_cores = 4
with concurrent.futures.ProcessPoolExecutor(max_workers = n_cores) as executor:
    executor.map(func, params_list)
```

**SUBMISSION PHASE**: Given a list of slurm files determined according to MD phases needed, submit to cluster.


# Python Module and A Script

In [29]:
%%writefile /home/ba13026/mpmodeling/protocols/md_submission_preparation.py
import os
import glob
import subprocess

# Sorted by Priority for Submission
MD_STAGES = [
    ('urmd','md_100ns'),
    ('prmd','prmd'),
    ('emmd','em_20000stps')
]

def check_for_md_files(model_dir, sname_main):
    """Check if range of file types with common name found in folder """
    out = {}
    # GROMACS file extensions
    FILETYPES = ['.gro', '.xtc']
    path_suffix = 'complex/mdf'
    for extension in FILETYPES:
        file_path = os.path.join(model_dir, path_suffix, sname_main + extension)
        out[extension] = os.path.isfile(file_path)
    
    return out

def check_for_submission_files(model_dir, sname_main):
    FILETYPES = ['.tpr' ,'.slurm']
    SUFFICES = ['complex/mdf', 'complex/jobf']
    out = {}
    for i in range(len(FILETYPES)):
        path_suffix = SUFFICES[i]
        extension = FILETYPES[i]
        file_path = os.path.join(model_dir, path_suffix, sname_main + extension)
        out[extension] = os.path.isfile(file_path)
    
    return out

def determine_last_md_stage(model_dir):
    for i in range(len(MD_STAGES)):
        stage, filename = MD_STAGES[i]
        if all(check_for_md_files(model_dir, filename).values()):
            break
        elif stage == 'emmd' and check_for_md_files(model_dir, filename)['.gro']:
            pass
    return stage

def generate_slurm(model_dir, sname_main, stage):
    #######################################
    # Default params per MD stage: BlueGem SLURM format
    #######################################
    path_prefix_slurm = "complex/jobf"
    path_prefix_md = "complex/mdf"

    if stage == 'urmd':
        n_nodes = 2
        sim_time = "5-12:30"

    elif stage == 'prmd':
        n_nodes = 1
        sim_time = "1-12:30"

    elif stage == 'emmd':
        n_nodes = 1
        sim_time = "12:30"        
    #######################################
    # Script content
    #######################################
    slurm_template = (
        "#!/bin/bash -login \n"
        "#SBATCH -p cpu \n"
        "#SBATCH --ntasks-per-node=16 \n"
        "#SBATCH -N "+str(n_nodes)+" \n"
        "#SBATCH -t "+sim_time+" \n"
        "#SBATCH -A S2.1 \n"
        "#SBATCH -o "+path_prefix_slurm+"/"+sname_main+"_slurm.log \n"
        "#SBATCH -e "+path_prefix_slurm+"/"+sname_main+"_slurm.error \n"
        "\n"
        "# Load GROMACS module \n"
        "module load apps/gromacs-5.0.6 \n"
        "\n"
        "mpiexec.hydra -psm -bootstrap slurm gmx_mpi mdrun "
        "-s "+path_prefix_md+"/"+sname_main+".tpr "
        "-deffnm "+path_prefix_md+"/"+sname_main+" \n"
    )
    #######################################
    # Write submission file for BG
    #######################################
    path_output = os.path.join(model_dir, path_prefix_slurm, sname_main+'.bg.slurm')
    with open(path_output, 'w') as fp:
        fp.write(slurm_template)
    fp.close()

def generate_tpr(model_dir, sname_main, sname_prev):
    """Generate GROMACS run input file"""
    #######################################
    # GROMACS command with input parameters
    #######################################
    if check_for_md_files(model_dir, sname_prev)['.gro']:
        cmd = [
            'gmx_mpi','grompp',
            '-f','/home/ba13026/mpmodeling/protocols/gmx_protocols/templates/'+sname_main+'.mdp',
            '-c',model_dir+'/complex/mdf/'+sname_prev+'.gro',
            '-p',model_dir+'/complex/'+'topol.top',
            '-o',model_dir+'/complex/mdf/'+sname_main+'.tpr',
            '-maxwarn','3',
        ]
        #######################################
        # Run GROMACS command 
        #######################################
        p = subprocess.Popen(cmd)
        p.wait()
        #######################################
        # Clean backup files to reduce storage
        #######################################
        files2remove = glob.glob('./'+'#mdout.mdp.*')
        for f in files2remove:
            os.remove(f)
    else:
        mssg = "ERROR: Coordinates from previous MD stage missing!"
        print(mssg)
        
def prepare_submission_files(model_dir, sname_main, stage_last, stage_next):
    out_check = check_for_submission_files(model_dir, sname_main)
    if out_check['.tpr']:
        generate_tpr(model_dir, sname_main, stage_last)
    elif out_check['.slurm']:
        generate_slurm(model_dir, sname_main, stage_next)

Overwriting /home/ba13026/mpmodeling/protocols/md_submission_preparation.py


In [32]:
%%writefile /projects/s21/ba13026/Wza_Modeling/L-structures/rosetta/bg_test/md_relax/prepare_prmd_submission_cwzak375c_test.py
import os
import sys
import glob
import subprocess
import concurrent.futures

sys.path.append('/home/ba13026/mpmodeling/protocols')
from md_submission_preparation import prepare_submission_files, determine_last_md_stage
#################################################
# Make list of model directories

# List of cWza Cys-mutant sequence names
MUTANTS = ['cWza-K375C']

# Dict of Conformation names per sequence name
CONFORMATIONS = {
    'cWza-K375C':['conformation0', 'conformation1']
}

wdir = '/projects/s21/ba13026/Wza_Modeling/L-structures/rosetta/bg_test/md_relax'
path_suffix = 'complex/mdf'

MODELS_DIRS = []
for mutant in MUTANTS:
    for conformation in CONFORMATIONS[mutant]:
        mutant_dir = os.path.join(wdir, mutant, conformation)
        for model_dir in [os.path.join(mutant_dir, pdbname) for pdbname in os.listdir(mutant_dir)]:
            MODELS_DIRS.append(model_dir)
        
#################################################
# Define inout parameters as 1D list
with open(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'prmd_slurm_list_cwzak375c.txt'), 'w') as fp:
    params_list = []
    for model_dir in MODELS_DIRS:
        # Determine LAST MD stage successfully simulated
        stage_last = determine_last_md_stage(model_dir)

        # Prepare submission files for NEXT MD stage needed to simulate
        if stage_last == 'prmd':
            stage_next = 'urmd'
            sname_main = 'md_100ns'
            params = [model_dir, sname_main, stage_last, stage_next]
            params_list.append(params)
            fp.write('/'.join(model_dir.split('/')[-3:])+'\n')
fp.close()

#################################################
# Define linear map for parallelisation

def func(params):
    model_dir, sname_main, stage_last, stage_next = params
    prepare_submission_files(model_dir, sname_main, stage_last, stage_next)
    model_dir_list.append('/'.join(model_dir.split('/')[-3:]))
    
    
#################################################
# Prepare submission files
n_cores = 10
with concurrent.futures.ProcessPoolExecutor(max_workers = n_cores) as executor:
    executor.map(func, params_list)

Overwriting /projects/s21/ba13026/Wza_Modeling/L-structures/rosetta/bg_test/md_relax/prepare_prmd_submission_cwzak375c_test.py


BASH command line for SLURM submission given lists for models

```bash
for f in `cat prmd_slurm_list_cwzak375c.txt`; do cd $f; sbatch complex/jobf/md_100ns.bg.slurm ; cd - ; done
```

# Applications

## URMD simulations per Interaction Group

**PROBLEM**: From all Interaction Partitions per docked conformation, per sequence, determine how many PDB models have been already URMD simulated, and how many more can be simulated.

In [93]:
MyTags  = [
    json.dumps(['cWza', 'conformation0']),
    json.dumps(['cWza', 'conformation1']),
    json.dumps(['cWza-K375C', 'conformation0']),
    json.dumps(['cWza-K375C', 'conformation1']),
    json.dumps(['cWza-S355C', 'conformation0']),
    json.dumps(['cWza-S355C', 'conformation1']),
    json.dumps(['cWza-Y373C', 'conformation0'])
]

In [102]:
stage_target = 'urmd'

for tag in MyTags:
    
    S = Models_PDBs[tag]
    for key in S.keys():
        if len(S[key]) > 0:
            MODELS_DIRS = []
            for pdb_dir in S[key]:
                MODELS_DIRS.append( os.path.join(wd, pdb_dir) )

            COUNTER_2submit = 0 
            for model_dir in MODELS_DIRS:
                # Determine LAST MD stage successfully simulated
                stage_last = determine_last_md_stage(model_dir)
                if stage_last == stage_target:
                    COUNTER_2submit += 1
            COUNTER_remaining = len(S[key]) - COUNTER_2submit
            if COUNTER_remaining > 0:
                print(tag, key, stage_target+"-Simulated : ", COUNTER_2submit, "Remaining :", COUNTER_remaining)
            else:
                print(tag, key, stage_target+"-Simulated : ", COUNTER_2submit)
                pass

["cWza", "conformation0"] kihs_NOT_hbonds urmd-Simulated :  5
["cWza", "conformation0"] hbonds_AND_kihs urmd-Simulated :  40 Remaining : 246
["cWza", "conformation0"] hbonds_NOT_kihs urmd-Simulated :  14 Remaining : 100
["cWza", "conformation0"] hbonds_NOR_kihs urmd-Simulated :  10 Remaining : 21
["cWza", "conformation1"] kihs_NOT_hbonds urmd-Simulated :  10 Remaining : 65
["cWza", "conformation1"] hbonds_AND_kihs urmd-Simulated :  10 Remaining : 3
["cWza", "conformation1"] hbonds_NOT_kihs urmd-Simulated :  9 Remaining : 6
["cWza", "conformation1"] hbonds_NOR_kihs urmd-Simulated :  11 Remaining : 56
["cWza-K375C", "conformation0"] hbonds_NOT_kihs urmd-Simulated :  9 Remaining : 13
["cWza-K375C", "conformation0"] hbonds_NOR_kihs urmd-Simulated :  10 Remaining : 28
["cWza-K375C", "conformation1"] kihs_NOT_hbonds urmd-Simulated :  10 Remaining : 65
["cWza-K375C", "conformation1"] hbonds_AND_kihs urmd-Simulated :  9
["cWza-S355C", "conformation0"] kihs_NOT_hbonds urmd-Simulated :  10 Remai