In [1]:
from urslib2 import RSS, DSSR, SplitmmCIF, Rfam
import os

In [9]:
## Create models from files

path_to_files = '/home/baulin/Desktop/urs2/urslib2/data/files/'
path_to_models = '/home/baulin/Desktop/urs2/urslib2/data/models/'

SplitmmCIF.All(path_to_files,path_to_models)


1/1 1ffk.cif is successfully divided into models.
Time: 0 min 1 sec


In [2]:
## Create a model object for a PDB-entry


# Data directory, have to contain two sub-directories - models with cif-files and out with out-files 
mmcif    = '/home/baulin/Desktop/urs2/urslib2/data/'

# Two example filenames
pdb = '1ffk.cif1'
out = '1ffk.out1'

# Path + filename
pdbmodel = mmcif+'models/'+pdb
outmodel = mmcif+'out/'+out

# If required cif-file exists:
if os.path.exists(pdbmodel):
    
    # Create out-file if it does not exist
    if not os.path.exists(outmodel):
        
        DSSR.run(pdbmodel, mmcif+'out/')
        
    # Create the structure object from the two files
    model = RSS.SecStruct(pdbmodel,outmodel)


In [13]:
model.chains.keys()

dict_keys(['0', '9', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '1'])

In [14]:
help(Rfam.GetRfamInfo)

Help on function GetRfamInfo in module urslib2.Rfam:

GetRfamInfo(path_to_rfam_files='', update=False, rfam_ftp='ftp://ftp.ebi.ac.uk/pub/databases/Rfam/CURRENT/database_files/')
    Parameters:
        path_to_rfam_files - path to family.txt and pdb_full_region.txt (by default - working directory)
        update - True/False - download new family.txt and pdb_full_region.txt
        rfam_ftp - source URL for Rfam files
    Returns:
        dictionary {PDB-ID: {CHAIN: RFAM_INFO}}



In [15]:
## Get Rfam annotations for RNA chains from PDB files

rfam = GetRfamInfo()
rfam['1ffk']['0']

['RF02540', 'LSU_rRNA_archaea', 'Archaeal large subunit ribosomal RNA']

In [3]:
## Get a set of pairs of close atoms

help(RSS.Atompairs)

Help on function Atompairs in module urslib2.RSS:

Atompairs(model, type1='', type2='', dist=4)
    Parameters:
        model - object of PDB-entry, created with SecStruct function
        type1, type2 - strings of molecule types: R = RNA, D = DNA, P = Protein, L = Ligand, M = Metal, W = Water.
            Example: type1 = 'RD', type2 = 'MW' - find all contacts of nucleic acid atoms with metals and water
        dist - maximal distance in angstroms
    Returns:
        list of pairs of close atoms of form {DSSR1:     residue1-dssrid,
                                              DSSR2:     residue2-dssrid,
                                            'atom1':         atomname1,
                                            'atom2':         atomname2,
                                             'dist':           distance,
                                             'type':  restype1+restype2}



In [20]:
atompairs = RSS.Atompairs(model, type1 = 'R', type2 = 'P', dist = 4.5)

for pair in atompairs[:5]:
    
    print(pair)

{'DSSR1': '0.G.13.', 'DSSR2': 'O.GLN.61.', 'atom1': 'OP1', 'atom2': 'CA', 'dist': 4.302435821717742, 'type': 'RP'}
{'DSSR1': '0.U.19.', 'DSSR2': 'O.SER.5.', 'atom1': "C2'", 'atom2': 'CA', 'dist': 4.354945120205287, 'type': 'RP'}
{'DSSR1': '0.U.19.', 'DSSR2': 'O.SER.5.', 'atom1': "O2'", 'atom2': 'CA', 'dist': 3.3868513401092586, 'type': 'RP'}
{'DSSR1': '0.G.20.', 'DSSR2': 'O.SER.5.', 'atom1': "C5'", 'atom2': 'CA', 'dist': 4.45814232612642, 'type': 'RP'}
{'DSSR1': '0.G.20.', 'DSSR2': 'O.SER.5.', 'atom1': "C4'", 'atom2': 'CA', 'dist': 4.0119482798261386, 'type': 'RP'}


In [34]:
## useful utilities

# dssrnucls

# NuclSS function: input = DSSR-ID, output = Secondary Structure Element(s):
# S = Stem, 
# H = Hairpin, B = Bulge, I = Internal loop, J = Multiple junction
# C = Classical, I = Isolated, P = Pseudoknotted

for n in model.chains['0']['RES'][50:60]:
    
    print(n['DSSR'], model.NuclSS(n['DSSR']))


# NuclRelation function: input = two DSSR-IDs, output = relative position of two residues 
# within RNA secondary structure:
# SM - same, i.e. within the same element, 
# LC - local, i.e. adjacent stem and loop,
# NR - neighbor, i.e. neighbor stems (one loop apart) or neighbor loops (one stem apart),
# LR - long-range, i.e. from distant elements
# NA - n/a

print('\n')

print('0.U.55.','0.G.54.',model.NuclRelation('0.U.55.','0.G.54.'))
print('0.G.56.','0.G.61.',model.NuclRelation('0.G.56.','0.G.61.'))
print('0.U.55.','0.G.61.',model.NuclRelation('0.U.55.','0.G.61.'))
print('0.G.47.','0.G.66.',model.NuclRelation('0.G.47.','0.G.66.'))
print('O.SER.5.','0.G.66.',model.NuclRelation('O.SER.5.','0.G.66.'))


# SeqDist

0.A.52. S
0.C.53. S
0.G.54. S
0.U.55. S
0.G.56. HP
0.C.57. SHP
0.C.58. SHP
0.A.59. HPIC
0.A.60. HPIC
0.G.61. SHP


0.U.55. 0.G.54. SM
0.G.56. 0.G.61. LC
0.U.55. 0.G.61. NR
0.G.47. 0.G.66. LR
O.SER.5. 0.G.66. NA


In [None]:
## DSSR motifs

#nbphbs, base-pairs, stacks, a-minors, base-triples, ribose-zippers etc.

In [23]:
model.NuclRelation('0.U.55.','0.G.61.')

'NR'