In [1]:
from urslib2 import RSS
import urslib2.SS.Relation as Relation
import os, glob

In [None]:
PDBPATH   = "../models/*.cif1"
MOTIFPATH = "refmotifs/*"
ARTEMPATH = "ARTEM/artem.py"

SIZETHR   = 12  #  12 residues minimum
RMSDTHR   = 2.0 # 2.0 angstrom RMSD maximum

In [2]:
files  = sorted(glob.glob(PDBPATH))
motifs = sorted(glob.glob(MOTIFPATH))
len(files), len(motifs)

(7256, 2)

In [4]:
scheme = "-2b -2n -1b -1n L1 L1p 1n 1b 2b 2n 3b 3n 4b 4n".split()

cores = {"1ffk_0_kt7.cif": { "L1":"0.G.94.", "L1p":None,
                            "-1b":"0.C.93.", "-1n":"0.G.81.", 
                            "-2b":"0.G.92.", "-2n":"0.C.82.", 
                             "1n":"0.A.80.",  "1b":"0.G.97.",
                             "2b":"0.A.98.",  "2n":"0.G.79.",
                             "3b":"0.A.99.",  "3n":"0.G.78.",
                             "4b":"0.C.100.", "4n":"0.G.77."},
         
         "3d2g_A_kj.cif":  { "L1":"A.C.38.", "L1p":"A.G.8.",
                            "-1b":"A.C.37.", "-1n":"A.G.9.", 
                            "-2b":"A.C.36.", "-2n":"A.G.10.",
                             "1n":"A.A.72.",  "1b":"A.G.42.",
                             "2b":"A.A.44.",  "2n":"A.G.71.",
                             "3b":"A.C.45.",  "3n":"A.G.70.",
                             "4b":"A.C.46.",  "4n":"A.G.69."},
        }

qres = {m:' '.join([":_{}".format(cores[m][x].split('.')[-2]) 
                    for x in scheme if cores[m][x]]) 
        for m in [os.path.basename(x) for x in motifs]}

In [None]:
def AtomDist(a1,a2):
    return (a1['X']-a2['X'])**2 + (a1['Y']-a2['Y'])**2 + (a1['Z']-a2['Z'])**2

cnt = 0
outp = open("rawhits.tsv",'w')

title = ['PDB', "MOL", 'REFERENCE','SIZE','RMSD'] +\
        scheme + [x+'BASE' for x in scheme] + [x+'SS' for x in scheme] +\
        ["XwayJunction","RANGE","KINK","TYPE"]

outp.write("\t".join(title)+'\n')

for file in files:
    outpath = file.replace("/models/","/out/").replace(".cif",".out")
    model   = RSS.SecStruct(file, outpath)
    cnt += 1
    pdb = os.path.basename(file)
    print(pdb, cnt)
    juncd = {}
    for loop in model.loops['JUNCTION']:
        xway = loop['THREADSNUM']
        if not loop['PTYPE'] == 'C':
            continue
        for t in loop['TLOOP']:
            thread = model.threads[t['THREAD']-1]
            if thread['LEN']:
                for nucl in model.chains[thread['CHAIN']][thread['START'][1]]\
                                        [thread["START"][2]:thread["END"][2]+1]:
                    juncd[nucl['DSSR']] = xway
    
    for motif in motifs:
        ref = os.path.basename(motif)
        os.system('python {} r={} rformat=cif q={} qres="{}" sizemin={} rmsdmax={} > out.tmp'\
                  .format(ARTEMPATH,file,motif,qres[motif],SIZETHR,RMSDTHR))
        with open('out.tmp') as inp:
            for line in inp:
                if not line.startswith("ID"):
                    linesplit = line.strip().split()
                    size, rmsd = linesplit[1], linesplit[2]
                    match = (pair.split('=') for pair in linesplit[-1].split(','))
                    match = {y.split('.',1)[1]:x.split('.',1)[1] for x,y in match}
                    core = {token:match[cores[ref][token]] 
                            if cores[ref][token] and cores[ref][token] in match
                            else ''
                            for token in scheme}
                    coress = {token:model.NuclSS(core[token]) if core[token] else '' for token in core}
                    
                    xway = set()
                    for dssr in core.values():
                        if dssr and dssr in juncd:
                            xway.add(juncd[dssr])
                    xway = ','.join([str(x) for x in sorted(xway)])
                    
                    lclr = "NA"
                    if core['L1'] and core['1n'] and core['2b']:
                        if model.NuclRelation(core['L1'],core['1n']) == 'LR' and\
                           model.NuclRelation(core['L1'],core['2b']) == 'LR':
                            lclr = 'LR'
                        else:
                            lclr = 'LC'
                    
                    kink = 'NA'
                    if core['L1'] and core['2b']:
                        if 0 <= model.SeqDist(core['L1'],core['2b']) < 10:
                            kink = 'YES'
                        else:
                            kink = 'NO'
                    
                    Type = 'NA'
                    if core['L1'] and core['1n']:
                        n1 = Relation.GetNuclByDSSR(model,core['L1'])
                        n2 = Relation.GetNuclByDSSR(model,core['1n'])
                        if n2['NAME'] in {'A','G'}:       
                            try:
                                atom1 = [atom for atom in n1['ATOMS'] if atom['NAME']=="O2'"][0]
                                atom2 = [atom for atom in n2['ATOMS'] if atom['NAME']=="N1"][0]
                                atom3 = [atom for atom in n2['ATOMS'] if atom['NAME']=="N3"][0]
                                if AtomDist(atom1,atom2) <= AtomDist(atom1,atom3):
                                    Type = "N1"
                                else:
                                    Type = "N3"
                            except:
                                pass
                            
                    mols = {model.molecules[model.chains[dssr.split('.')[0]]['MOL_ID']]['MOLECULE'] 
                            for dssr in core.values() if dssr}
                    mols = ','.join(sorted(mols))
                    
                    res = [pdb,mols,ref,size,rmsd]
                    res += [core[t] for t in scheme]
                    res += [core[t].split('.')[1] if core[t] else '' for t in scheme]
                    res += [coress[t] for t in scheme]
                    res += [xway, lclr, kink, Type]
                    outp.write("\t".join([str(x) for x in res])+'\n')
                            
outp.close()              