In [1]:
import sidechainnet as scn
import sidechainnet.utils.minimize_scn as minscn
from tqdm.notebook import tqdm
from glob import glob
import os

# Path setup

In [2]:
SCRIPT_PATH = "/net/pulsar/home/koes/jok120/repos/sidechainnet/sidechainnet/utils/minimize_scn.py"
DATAPATH = "/net/pulsar/home/koes/jok120/scn221001/sidechainnet_casp100_12.pkl"
UNMIN_PATH = "/net/pulsar/home/koes/jok120/scnmin221001/unmin"
MIN_PATH = "/net/pulsar/home/koes/jok120/scnmin221013/min"
SLURM_OUT = "/net/pulsar/home/koes/jok120/repos/sidechainnet/sidechainnet/research/cluster/221013/"
JOBID = "14355759"  #"10382630"

In [3]:
!ls -1 $UNMIN_PATH | wc -l

103954


In [4]:
protein_fnames_in = sorted(glob(os.path.join(UNMIN_PATH, "*.pkl")))
protein_fnames_in[:10]

['/net/pulsar/home/koes/jok120/scnmin221001/unmin/10#1HF2_1_A.pkl',
 '/net/pulsar/home/koes/jok120/scnmin221001/unmin/10#1WAZ_1_A.pkl',
 '/net/pulsar/home/koes/jok120/scnmin221001/unmin/10#1Z9V_1_A.pkl',
 '/net/pulsar/home/koes/jok120/scnmin221001/unmin/10#2EEM_1_A.pkl',
 '/net/pulsar/home/koes/jok120/scnmin221001/unmin/10#2FK5_1_A.pkl',
 '/net/pulsar/home/koes/jok120/scnmin221001/unmin/10#2IJR_1_A.pkl',
 '/net/pulsar/home/koes/jok120/scnmin221001/unmin/10#2KW1_1_A.pkl',
 '/net/pulsar/home/koes/jok120/scnmin221001/unmin/10#2MFQ_2_B.pkl',
 '/net/pulsar/home/koes/jok120/scnmin221001/unmin/10#2MHD_1_A.pkl',
 '/net/pulsar/home/koes/jok120/scnmin221001/unmin/10#2MNJ_1_A.pkl']

In [5]:
protein_fnames_out_expected = [os.path.join(MIN_PATH, os.path.basename(fn)) for fn in protein_fnames_in]
protein_fnames_out_expected[:10]

['/net/pulsar/home/koes/jok120/scnmin221013/min/10#1HF2_1_A.pkl',
 '/net/pulsar/home/koes/jok120/scnmin221013/min/10#1WAZ_1_A.pkl',
 '/net/pulsar/home/koes/jok120/scnmin221013/min/10#1Z9V_1_A.pkl',
 '/net/pulsar/home/koes/jok120/scnmin221013/min/10#2EEM_1_A.pkl',
 '/net/pulsar/home/koes/jok120/scnmin221013/min/10#2FK5_1_A.pkl',
 '/net/pulsar/home/koes/jok120/scnmin221013/min/10#2IJR_1_A.pkl',
 '/net/pulsar/home/koes/jok120/scnmin221013/min/10#2KW1_1_A.pkl',
 '/net/pulsar/home/koes/jok120/scnmin221013/min/10#2MFQ_2_B.pkl',
 '/net/pulsar/home/koes/jok120/scnmin221013/min/10#2MHD_1_A.pkl',
 '/net/pulsar/home/koes/jok120/scnmin221013/min/10#2MNJ_1_A.pkl']

In [6]:
# Helper dictionaries to map between job name and expected output file
protein_expected_fn_to_idx = {fn:idx for idx, fn in enumerate(protein_fnames_out_expected)}
protein_idx_to_expected_fn = {idx:fn for idx, fn in enumerate(protein_fnames_out_expected)}

In [7]:
def get_logfile_from_outname(outname):
    """Return SLURM logfile name for an expected outfile."""
    index = protein_expected_fn_to_idx[outname]
    logfile = os.path.join(SLURM_OUT, JOBID + f"_{index:06}.out")
    return logfile

In [8]:
get_logfile_from_outname('/net/pulsar/home/koes/jok120/scnmin221013/min/10#1HF2_1_A.pkl')

'/net/pulsar/home/koes/jok120/repos/sidechainnet/sidechainnet/research/cluster/221013/10382630_000000.out'

In [9]:
!cat '/net/pulsar/home/koes/jok120/repos/sidechainnet/sidechainnet/research/cluster/221013/10382630_000000.out'

Running job 10382631 on node n124.
error: cannot lock ref 'refs/remotes/origin/dkoes-research_openmm': is at 4e2d6b52c7267383136b38d3e7415ff3ce3734f0 but expected 13235455917d724ffcf8c71025116a61c7f4cbe9
From github.com:jonathanking/sidechainnet
 ! 1323545..4e2d6b5  dkoes-research_openmm -> origin/dkoes-research_openmm  (unable to update local ref)
Already on 'dkoes-research_openmm'
Your branch is behind 'origin/dkoes-research_openmm' by 5 commits, and can be fast-forwarded.
  (use "git pull" to update your local branch)
Updated repo.
Traceback (most recent call last):
  File "/net/pulsar/home/koes/jok120/repos/sidechainnet/sidechainnet/utils/minimize_scn.py", line 59, in process_index
    m.minimize_scnprotein(protein, use_sgd=False, verbose=True, path=output_path)
  File "/net/pulsar/home/koes/jok120/repos/sidechainnet/sidechainnet/utils/minimizer.py", line 76, in minimize_scnprotein
    p.fastbuild(add_hydrogens=True, inplace=True)
  File "/net/pulsar/home/koes/jok120/repos/sidechai

In [16]:
def get_last_2_lines_from_file(fn):
    with open(fn, "r") as f:
        lines = f.readlines()
    return " ".join(lines[-2:])

def failed_due_to_missing_atoms(outname):
    """Turn True iff protein failed to minimized due to missing atoms."""
    try:
        logfile = get_logfile_from_outname(outname)
        last_lines = get_last_2_lines_from_file(logfile)
        if "Cannot construct an OpenMM Representation with missing atoms" in last_lines:
            return True
        else:
            return False
    except IndexError:
        return False
    
def failed_due_to_badmin(outname):
    try:
        logfile = get_logfile_from_outname(outname)
        last_lines = get_last_2_lines_from_file(logfile)
        if "The protein was not minimized correctly." in last_lines:
            return True
        else:
            return False
    except IndexError:
        return False

def failed_due_to_missing_a_terminal_group(outname):
    """Return true if the failed potentially due to a missing terminal group."""
    try:
        logfile = get_logfile_from_outname(outname)
        last_lines = get_last_2_lines_from_file(logfile)
        if "Perhaps the chain is missing a terminal group" in last_lines:
            return True
        else:
            return False
    except IndexError:
        return False
    

In [17]:
failed_due_to_missing_atoms('/net/pulsar/home/koes/jok120/scnmin221013/min/10#1HF2_1_A.pkl')

True

In [18]:
failed_due_to_badmin(protein_idx_to_expected_fn[874]), failed_due_to_badmin(protein_idx_to_expected_fn[875]), 

(True, False)

The minimized directory should have three files for each completed protein: a pdb, png and pkl object. The important thing is that the pkl can be parsed!

In [13]:
!ls $MIN_PATH | head

10#1WAZ_1_A.pdb
10#1WAZ_1_A.pkl
10#1WAZ_1_A.png
10#1Z9V_1_A.pdb
10#1Z9V_1_A.pkl
10#1Z9V_1_A.png
10#2EEM_1_A.pdb
10#2EEM_1_A.pkl
10#2EEM_1_A.png
10#2FK5_1_A.pdb
ls: write error: Broken pipe


In [19]:
incomplete = []
complete = []
incomplete_logfiles = []
missing_atoms = []
missing_terminal_group = []
badmin = []
limit = None
for idx, (fname_out, fname_in) in tqdm(enumerate(
                                            zip(protein_fnames_out_expected[:limit],
                                                protein_fnames_in[:limit])
                                            ), total=len(protein_fnames_out_expected[:limit]), smoothing=0):
    try:
        p = scn.dataloaders.SCNProtein.SCNProtein.from_pkl(fname_out)
        complete.append(fname_out)
        continue
    except FileNotFoundError:
        try:
            if failed_due_to_missing_atoms(fname_out):
                # print(f"Missing atoms: {os.path.basename(fname_out)}")
                missing_atoms.append(fname_out)
                continue
            elif failed_due_to_badmin(fname_out):
                # print(f"Bad min: {os.path.basename(fname_out)}")
                badmin.append(fname_out)
                continue
            elif failed_due_to_missing_a_terminal_group(fname_out):
                # print(f"Missing terminal group: {os.path.basename(fname_out)}")
                missing_terminal_group.append(fname_out)
                continue
            else:
                pass
        except FileNotFoundError:
            # If the logfile itself is not found, this is also incomplete
            print("Logfile not found.", end=" ")
            pass
        print(os.path.basename(fname_out), "not found.")
    incomplete.append(idx)
    incomplete_logfiles.append(get_logfile_from_outname(fname_out))


  0%|          | 0/103954 [00:00<?, ?it/s]

3B60_1_A.pkl not found.
3IAP_1_A.pkl not found.
3IYJ_1_F.pkl not found.
3P7L_1_A.pkl not found.
3QLH_1_A.pkl not found.
3RA4_1_A.pkl not found.
3S79_1_A.pkl not found.
3T09_1_A.pkl not found.
3TTD_1_A.pkl not found.
3VQ2_1_A.pkl not found.
3W36_1_A.pkl not found.
3W3A_1_A.pkl not found.
3W7W_1_A.pkl not found.
3W81_1_A.pkl not found.
3WFD_3_B.pkl not found.
3WG6_1_A.pkl not found.
3X0D_1_A.pkl not found.
3X16_1_A.pkl not found.
3ZJK_1_A.pkl not found.
3ZMV_1_A.pkl not found.
3ZQ1_1_A.pkl not found.
3ZWC_1_A.pkl not found.
3ZXL_1_A.pkl not found.
4AJ9_1_A.pkl not found.
4AKF_1_A.pkl not found.
4AP6_1_A.pkl not found.
4ARY_1_A.pkl not found.
4BEW_1_A.pkl not found.
4BEY_1_A.pkl not found.
4BQL_1_A.pkl not found.
4C2V_1_A.pkl not found.
4C2X_d4c2xa1.pkl not found.
4CAG_1_A.pkl not found.
4CCX_1_A.pkl not found.
4CEL_1_A.pkl not found.
4CIZ_1_A.pkl not found.
4CJA_1_A.pkl not found.
4CJN_1_A.pkl not found.
4COG_1_A.pkl not found.
4DX6_1_A.pkl not found.
4FF3_1_A.pkl not found.
4FKE_1_A.pkl

In [None]:
print("got:", sum(map(len, [complete, incomplete, missing_atoms, badmin])), "expected:", len(protein_fnames_out_expected[:limit]))
len(complete), len(incomplete), len(missing_atoms), len(badmin)

got: 103954 expected: 103954


(51310, 5463, 47010, 171)

In [39]:
incomplete[0]

45618

In [40]:
incomplete_logfiles[0]

'/net/pulsar/home/koes/jok120/repos/sidechainnet/sidechainnet/research/cluster/221013/10382630_045618.out'

In [21]:
for fn in incomplete_logfiles:
    # Test if the file exists first
    if os.path.exists(fn):
        !tail -2 {fn}

Epoch 67: Loss has been updated.
Epoch 68:	10214Epoch 98:	-101079.70
Epoch 98: Loss has been Epoch 82: Loss has been updated.
Epoch 83:	406241Epoch 98:	-63825.91
Epoch 98: Loss has been upEpoch 62: Loss has been updated.
EpocEpoch 73:	823080.92
EpocEpoch 63: Loss has been updated.
Epoch 64:	-557Epoch 90: Loss has been updated.
EEpoch 82:	-63200.16
Epoch 82: Loss has been updatEpoch 85: Loss has been updated.
EpocEpoch 91:	-59392.67
Epoch 91: Loss has beEpoch 62:	599753.87
Epoch 62: LossEpoch 74: Loss has been updated.
Epoch 75:	-Epoch 110: Loss has been updated.
Epoch 11Epoch 69:	1241892.81
EpEpoch 106:	139825.56
Epoch 106: Loss has been Epoch 59: Loss has been updated.
Epoch 60:	-64753.0Epoch 64:	219024.58
Epoch 64: Loss has been Epoch 78:	-69352.06
Epoch 79:	-69920Epoch 69:	11821.01
Epoch 69: Loss has Epoch 71:	-49720.10
Epoch 71: Loss has Epoch 72: Loss has been updated.
Epoch 73:	-73279.73Epoch 81:	-81760.92
Epoch 81: Loss has beEpoch 114:	1200858.64
Epoch 114: Loss has been update

Exception ignored in: <function _releaseLock at 0x7f019becfca0>
Traceback (most recent call last):
  File "/net/pulsar/home/koes/jok120/anaconda3/envs/sidechainnetv2/lib/python3.8/logging/__init__.py", line 227, in _releaseLock
    def _releaseLock():
KeyboardInterrupt: 


Epoch 69:	1468226.89
Epoch 69: LosEpoch 67: Loss has been updated.
Epoch 68Epoch 81: Loss has been updated.
EpochEpoch 70:	-63988.10
Epoch 70: Loss has been updated.
Epoch 93:	-82105.26
EEpoch 66:	-64579.53
Epoch 66: Loss has beeEpoch 36:	-89888.33
Epoch 36: Loss has bEpoch 19:	-87682.62
Epoch 19: Loss haEpoch 32:	-54549.38
Epoch 32Epoch 70:	-67717.85
Epoch 70: Loss has been upEpoch 59:	-50460.67
Epoch 59: Loss has been updated.
Epoch 71: Loss has been updated.
Epoch 72:Epoch 67: Loss has been updated.
Epoch 68:	-90387Epoch 98:	-48923.60
Epoch 98: Loss has been updateEpoch 69:	968.62
Epoch 69: Loss has been updEpoch 83:	-46699.91
Epoch 84:	-46Epoch 97:	-68595.73
Epoch 98:	-687Epoch 47:	-61652.26
Epoch 47: Loss haEpoch 65:	-57478.62
Epoch 65: Loss hEpoch 67: Loss has been updated.
Epoch 68:	-73498.67Epoch 85: Loss has been updated.
Epoch 86:	-152Epoch 67:	102846.24
Epoch 67: Loss has been updatEpoch 90:	-33295.23
Epoch 90: Loss has been Epoch 85: Loss has been updated.
EpocEpoch 93:	-61

In [9]:
nums = incomplete
ranges = sum((list(t) for t in zip(nums, nums[1:]) if t[0]+1 != t[1]), [])
iranges = list(iter(nums[0:1] + ranges + nums[-1:]))
working_str = ""
i = 0
while i < len(iranges) - 1:
    start = iranges[i]
    end = iranges[i+1]
    if start == end:
        working_str += str(start) + ","
    else:
        working_str += str(start) + "-" + str(end) + ","
    i += 2
# print (', '.join([str(n) + '-' + str(next(iranges)) for n in iranges]))
print (working_str[:-1])

52210,59587,60274,66674,68068,68716,69566,70401,71264,73093,73418,73422,73502,73506,73688,73705,74248,74263,74567,74651,74723,74886,74910,75495,75520,75654,75730,76334,76336,76635,76997,77002,77196,77262,77305,77406,77417,77422,77523,78655,80111,80256,80604,80672,81951,81954,82343,82392,83208,83833,84990,85543,85863,87792,88196,88425,88722,90035,90584,91556,91755,91845-91846,92024,92398,92875,92948,93032,93227,93290,93523,93569,93583,93719,93761,93808,93974,93983,93990,94150,94540,94634,94884,95058,95063,95081,95168,95175,95273,95407,95443,95634,95767,95853,95865,96098,96113-96115,96226,96327,96339,96579,96966,96980,97033,97070,97072,97110,97166,97173,97188,97190,97194,97197,97207,97349,97360,97384,97388,97394,97506,97608,97636,97657,97666,97690-97692,97766,97905,97909,97914,97926,97929-97930,97945,98022,98024,98037,98088,98113,98128-98129,98144,98154,98185,98196,98213,98258,98264,98329-98331,98370,98377,98393,98396-98397,98401,98411,98413,98428,98479,98481,98486,98524,98528-98529,9855

In [10]:
nums[:10]

[52210, 59587, 60274, 66674, 68068, 68716, 69566, 70401, 71264, 73093]

In [12]:
cmd = "sbatch --export=ALL,JOB_TYPE=process_index,CASP_V=12,CASP_T=100 --array="
cmd += working_str[:-1]
cmd += " minimize_scn2.slurm"
cmd

'sbatch --export=ALL,JOB_TYPE=process_index,CASP_V=12,CASP_T=100 --array=52210,59587,60274,66674,68068,68716,69566,70401,71264,73093,73418,73422,73502,73506,73688,73705,74248,74263,74567,74651,74723,74886,74910,75495,75520,75654,75730,76334,76336,76635,76997,77002,77196,77262,77305,77406,77417,77422,77523,78655,80111,80256,80604,80672,81951,81954,82343,82392,83208,83833,84990,85543,85863,87792,88196,88425,88722,90035,90584,91556,91755,91845-91846,92024,92398,92875,92948,93032,93227,93290,93523,93569,93583,93719,93761,93808,93974,93983,93990,94150,94540,94634,94884,95058,95063,95081,95168,95175,95273,95407,95443,95634,95767,95853,95865,96098,96113-96115,96226,96327,96339,96579,96966,96980,97033,97070,97072,97110,97166,97173,97188,97190,97194,97197,97207,97349,97360,97384,97388,97394,97506,97608,97636,97657,97666,97690-97692,97766,97905,97909,97914,97926,97929-97930,97945,98022,98024,98037,98088,98113,98128-98129,98144,98154,98185,98196,98213,98258,98264,98329-98331,98370,98377,98393,983