In [1]:
from artem import *

In [2]:
def task(m, n):
    transform  = get_transform(r_prim[m], q_prim[n])
    
    q_avg_tree = KDTree(apply_transform(q_avg, transform))
    
    dist = r_avg_tree.sparse_distance_matrix(
        q_avg_tree,
        matchrange,
        p=2,
        output_type='ndarray'
    )
    
    nb   = mutual_nb(dist)
    
    size = len(nb)
    if not sizemin <= size <= sizemax:
        return None
    
    scnd = vstack([[r_scnd[i], q_scnd[j]] for i, j in nb])
    transform  = get_transform(*scnd)
    
    r_coord, q_coord = vstack([[r_eval[i], q_eval[j]] for i, j in nb])
    q_coord_tran = apply_transform(q_coord, transform)
    
    rmsd = RMSD(r_coord, q_coord_tran)
    if not rmsdmin <= rmsd <= rmsdmax:
        return None
    
    rmsdsize = rmsd / size
    if not rmsdsizemin <= rmsdsize <= rmsdsizemax:
        return None
    
    nb = tuple(sorted(nb))
    return [size, rmsd, rmsdsize, nb, transform]

In [3]:
argv = [
    'r=data/1xjr.cif', 
    'q=data/1xjr.pdb', 
    # 'rmsdmax=3', 
    # 'sizemin=20',
    # 'saveto=results', 
    # 'threads=-1',
    'qres=/A:A_45'
    # 'rres=/X:A_0_100',
    # 'rresneg=/X:_100_500',
    # 'rseed=/X:A_0_20' 
]

In [4]:
kwargs = dict([arg.split('=') for arg in argv])

threads = int(kwargs.get('threads', threads))
if threads != 1:
    # cur_sys = system()
    # Multiprocessing is available only for UNIX-like systems
    # if cur_sys in {'Linux', 'Darwin'}:
    mp.set_start_method('fork')
    if threads < 0:
        threads = mp.cpu_count()
    else:
        threads = min(threads, mp.cpu_count())


r       = kwargs.get('r')
rres    = kwargs.get('rres', rres)
rresneg = kwargs.get('rresneg', rresneg)
rseed   = kwargs.get('rseed', rseed)
rformat = kwargs.get('rformat', rformat)

q       = kwargs.get('q')
qres    = kwargs.get('qres', qres)
qresneg = kwargs.get('qresneg', qresneg)
qseed   = kwargs.get('qseed', qseed)
qformat = kwargs.get('qformat', qformat)

sizemin     = float(kwargs.get('sizemin', sizemin))
sizemax     = float(kwargs.get('sizemax', sizemax))

rmsdmin     = float(kwargs.get('rmsdmin', rmsdmin))
rmsdmax     = float(kwargs.get('rmsdmax', rmsdmax))

rmsdsizemin = float(kwargs.get('rmsdsizemin', rmsdsizemin))
rmsdsizemax = float(kwargs.get('rmsdsizemax', rmsdsizemax))

matchrange  = float(kwargs.get('matchrange', matchrange))

saveto     = kwargs.get('saveto', saveto)
saveres    = kwargs.get('saveres', saveres)

rname, rext = r.split(os.sep)[-1].split('.')
qname, qext = q.split(os.sep)[-1].split('.')

available_format = {'PDB', 'CIF'}

rext = rext.upper()
if rext in available_format:
    rformat = rext

qext = qext.upper()
if qext in available_format:
    qformat = qext

saveformat = kwargs.get('saveformat', qformat)

In [5]:
rstruct  = pdb.parser(r, rformat, rname)
rstruct.drop_duplicates_alt_id(keep=keep)
rnegcase = bool(rresneg)
rsstruct = rstruct.get_res_substruct(
    [rres, rresneg][rnegcase],
    rnegcase
)
rrres, rures = rsstruct.artem_desc(seed_res_repr)
rseed_code = rsstruct.get_res_code(rseed)

r_code, r_prim, r_avg, r_scnd, r_eval = zip(*rrres)
r_avg = np.vstack(r_avg)
rseed_code = set(r_code) & set(rseed_code)
r_ind = [i for i, code in enumerate(r_code) if code in rseed_code]

In [6]:
qstruct  = pdb.parser(q, qformat, qname)
qstruct.drop_duplicates_alt_id(keep=keep)
qnegcase = bool(qresneg)
qsstruct = qstruct.get_res_substruct(
    [qres, qresneg],
    qnegcase
)
qrres, qures = qsstruct.artem_desc(seed_res_repr)

q_code, q_prim, q_avg, q_scnd, q_eval = zip(*qrres)
q_avg = np.vstack(q_avg)
qseed_code = set(q_code) & set(qseed_code)
q_ind = [i for i, code in enumerate(q_code) if code in qseed_code]

KeyboardInterrupt: 

In [8]:
pdb.Structure._res_split(qres)

{'#': 1, '/': 'A', ':': ['A', 45]}

In [None]:
qstruct.get_res_substruct(
    [qres, qresneg],
    qnegcase
)

In [9]:
res = qres

In [10]:
tab = qstruct.tab
spl = pdb.Structure._res_split(res)

In [13]:
mod = spl['#']
if mod:
    mod_msk = tab['pdbx_PDB_model_num'].eq(mod)
else:
    mod_msk = tab['pdbx_PDB_model_num'].astype(bool)

In [14]:
chn = spl['/']
if chn:
    chn_msk = tab['auth_asym_id'].eq(chn)
else:
    chn_msk = tab['auth_asym_id'].astype(bool)

In [16]:
rng  = spl[':']
if rng:
    case = len(rng)
else:
    case = 0

In [18]:
res, num  = rng
if type(num) == int:
    if res:
        rng_msk = tab['auth_seq_id'].eq(num) \
            & tab['auth_comp_id'].eq(res)
    else:
        rng_msk = tab['auth_seq_id'].eq(num)
else:
    # ':N1_numN2' = ':_numN2' even if N1 != N2
    dgt = ''
    for i, c in enumerate(num):
        if c.isdigit():
            dgt += c
        else:
            break
    res = num[i:]
    num = int(dgt)
    rng_msk = tab['auth_seq_id'].eq(num) \
        & tab['auth_comp_id'].eq(res)

In [19]:
msk = mod_msk & chn_msk & rng_msk

In [23]:
qsstruct = qstruct.get_res_substruct(
    [qres, qresneg][qnegcase],
    qnegcase
)

In [24]:
qsstruct

<1xjr Structure>

In [25]:
'GGCCAT' in 'ATGGCCATGGCCCCCAGAACTGAGATCAATAGTACCCGTATTAACGGGTGA'

True