In [1]:
from artem import *

In [2]:
def artem(m, n):
    prim_transform  = get_transform(r_prim[m], q_prim[n])
    
    q_avg_tree = KDTree(apply_transform(q_avg, prim_transform))
    dist = r_avg_tree.sparse_distance_matrix(
        q_avg_tree,
        matchrange,
        p=2,
        output_type='ndarray'
    )

    neighbors = mutual_nearest_neighbors(dist)
    size = len(neighbors)
    if not sizemin <= size <= sizemax:
        return None
    
    X, Y = vstack([[r_scnd[i], q_scnd[j]] for i, j in neighbors])
    scnd_transform = get_transform(X, Y)
    
    X, Y = vstack([[r_eval[i], q_eval[j]] for i, j in neighbors])
    
    rmsd = RMSD(X, apply_transform(Y, scnd_transform))
    if not rmsdmin <= rmsd <= rmsdmax:
        return None
    
    rmsdsize = rmsd / size
    if not rmsdsizemin <= rmsdsize <= rmsdsizemax:
        return None
    
    neighbors = tuple(sorted(neighbors))
    return (neighbors, rmsd)

In [3]:
argv = [
    'r=data/1xjr_2mod.cif',
    'q=data/1xjr.pdb',
    'rresneg=#1'
]

In [4]:
if argv[0] in {'--H', '-H', '--h', '-h', '--help', '-help'}:
    with open('README.md', 'r') as rdme:
        print(*rdme)
    exit()
else:
    kwargs = dict([arg.split('=') for arg in argv])

In [5]:
threads = int(kwargs.get('threads', threads))
if threads != 1:
    mp.set_start_method('fork')     # ARTEM multiprocessing is available only for UNIX-like systems
    if threads <= 0:
        threads = mp.cpu_count()
    else:
        threads = min(threads, mp.cpu_count())

r       = kwargs.get('r')
rres    = kwargs.get('rres', rres)

rresneg = kwargs.get('rresneg', rresneg)
rneg    = bool(rresneg)
rseed   = kwargs.get('rseed', '')

rformat = kwargs.get('rformat', None)
rname, rext = r.split(os.sep)[-1].split('.')
rext = rext.upper()
if not rformat:
    if rext in pdb.formats:
        rformat = rext
    else:
        rformat = 'PDB'
else:
    rformat = rformat.upper()

q       = kwargs.get('q')
qres    = kwargs.get('qres', qres)
qresneg = kwargs.get('qresneg', qresneg)
qneg    = bool(qresneg)
qseed   = kwargs.get('qseed', '')
qformat = kwargs.get('qformat', None)
qname, qext = q.split(os.sep)[-1].split('.')
qext = qext.upper()
if not qformat:
    if qext in pdb.formats:
        qformat = qext
    else:
        qformat = 'PDB'
else:
    qformat = qformat.upper()

sizemin     = float(kwargs.get('sizemin', sizemin))
sizemax     = float(kwargs.get('sizemax', sizemax))

rmsdmin     = float(kwargs.get('rmsdmin', rmsdmin))
rmsdmax     = float(kwargs.get('rmsdmax', rmsdmax))

rmsdsizemin = float(kwargs.get('rmsdsizemin', rmsdsizemin))
rmsdsizemax = float(kwargs.get('rmsdsizemax', rmsdsizemax))

matchrange  = float(kwargs.get('matchrange', matchrange))

In [6]:
rres, rresneg, rseed

('', '#1', '')

In [7]:
rstruct  = pdb.parser(r, rformat, rname)
rstruct.drop_duplicates_alt_id(keep=keep)

rresstruct = rstruct.get_res_substruct(
    (rres, rresneg)[rneg],
    rneg
)

rdata, rnoise = describe(rresstruct)
if not rdata:
    msg = 'No {}={} nucleotides in the r={} for rseed={}'.format(
        ('rres', 'rresneg')[rneg],
        (rres, rresneg)[rneg],
        r,
        rseed
    )
    raise Exception(msg)
else:
    r_code, r_prim, r_avg, r_scnd, r_eval = zip(*rdata)
    r_avg = np.vstack(r_avg)

In [30]:
s = pdb.parser('data/1xjr.cif', 'PDB')