In [1]:
from urslib2 import RSS, DSSR, SplitmmCIF, Rfam, SplitPDB
import os

In [2]:
## Create models from files

path_to_files = '/home/ebaulin/eugene/work/urs2/urslib2/data/files/'
path_to_models = '/home/ebaulin/eugene/work/urs2/urslib2/data/models/'

SplitmmCIF.All(path_to_files,path_to_models)


1/1 1ffk.cif is successfully divided into models.
Time: 0 min 0 sec


In [3]:
SplitPDB.All(path_to_files,path_to_models)

1/1 6dtd.pdb is successfully divided into models.
Time: 0 min 0 sec


In [2]:
## Create a model object for a PDB-entry


# Data directory, have to contain two sub-directories - models with cif-files and out with out-files 
mmcif    = '/home/ebaulin/eugene/work/urs2/urslib2/data/'

# Two example filenames
pdb = '1ffk.cif1'
out = '1ffk.out1'

# Path + filename
pdbmodel = mmcif+'models/'+pdb
outmodel = mmcif+'out/'+out

# If required cif-file exists:
if os.path.exists(pdbmodel):
    
    # Create out-file if it does not exist
    if not os.path.exists(outmodel):
        
        DSSR.run(pdbmodel, mmcif+'out/')
        
    # Create the structure object from the two files
    model = RSS.SecStruct(pdbmodel,outmodel)


In [5]:
model.chains.keys()

dict_keys(['0', '9', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '1'])

In [7]:
help(Rfam.GetRfamInfo)

Help on function GetRfamInfo in module urslib2.Rfam:

GetRfamInfo(path_to_rfam_files='', update=False, rfam_ftp='ftp://ftp.ebi.ac.uk/pub/databases/Rfam/CURRENT/database_files/')
    Parameters:
        path_to_rfam_files - path to family.txt and pdb_full_region.txt (by default - working directory)
        update - True/False - download new family.txt and pdb_full_region.txt
        rfam_ftp - source URL for Rfam files
    Returns:
        dictionary {PDB-ID: {CHAIN: RFAM_INFO}}



In [8]:
## Get Rfam annotations for RNA chains from PDB files

rfam = Rfam.GetRfamInfo()
rfam['1ffk']['0']

['RF02540', 'LSU_rRNA_archaea', 'Archaeal large subunit ribosomal RNA']

In [9]:
## Get a set of pairs of close atoms

help(RSS.Atompairs)

Help on function Atompairs in module urslib2.RSS:

Atompairs(model, type1='', type2='', dist=4)
    Parameters:
        model - object of PDB-entry, created with SecStruct function
        type1, type2 - strings of molecule types: R = RNA, D = DNA, P = Protein, L = Ligand, M = Metal, W = Water.
            Example: type1 = 'RD', type2 = 'MW' - find all contacts of nucleic acid atoms with metals and water
        dist - maximal distance in angstroms
    Returns:
        list of pairs of close atoms of form {DSSR1:     residue1-dssrid,
                                              DSSR2:     residue2-dssrid,
                                            'atom1':         atomname1,
                                            'atom2':         atomname2,
                                             'dist':           distance,
                                             'type':  restype1+restype2}



In [20]:
atompairs = RSS.Atompairs(model, type1 = 'R', type2 = 'P', dist = 4.5)

for pair in atompairs[:5]:
    
    print(pair)

{'DSSR1': '0.G.13.', 'DSSR2': 'O.GLN.61.', 'atom1': 'OP1', 'atom2': 'CA', 'dist': 4.302435821717742, 'type': 'RP'}
{'DSSR1': '0.U.19.', 'DSSR2': 'O.SER.5.', 'atom1': "C2'", 'atom2': 'CA', 'dist': 4.354945120205287, 'type': 'RP'}
{'DSSR1': '0.U.19.', 'DSSR2': 'O.SER.5.', 'atom1': "O2'", 'atom2': 'CA', 'dist': 3.3868513401092586, 'type': 'RP'}
{'DSSR1': '0.G.20.', 'DSSR2': 'O.SER.5.', 'atom1': "C5'", 'atom2': 'CA', 'dist': 4.45814232612642, 'type': 'RP'}
{'DSSR1': '0.G.20.', 'DSSR2': 'O.SER.5.', 'atom1': "C4'", 'atom2': 'CA', 'dist': 4.0119482798261386, 'type': 'RP'}


In [4]:
## useful utilities

# model.dssrnucls dictionary: key = DSSR-ID, value = [chain, mode, index] triple:
# chain - identifier of a chain
# mode - RES (residues) or LIGANDS (ligands)
# index - index in the list of residues or ligands
# one can access the residue object with model.chains[chain][mode][index]

chain, mode, index = model.dssrnucls['0.G.54.']

residue = model.chains[chain][mode][index]

print(residue['NAME'])
print(residue['TYPE'])
print(residue.keys())

print('\n')

# model.NuclSS function: input = DSSR-ID, output = Secondary Structure Element(s):
# S = Stem, 
# H = Hairpin, B = Bulge, I = Internal loop, J = Multiple junction
# C = Classical, I = Isolated, P = Pseudoknotted

for n in model.chains['0']['RES'][50:60]:
    
    print(n['DSSR'], model.NuclSS(n['DSSR']))

print('\n')

# model.NuclRelation function: input = two DSSR-IDs, output = relative position of two residues 
# within RNA secondary structure:
# SM - same, i.e. within the same element, 
# LC - local, i.e. adjacent stem and loop,
# NR - neighbor, i.e. neighbor stems (one loop apart) or neighbor loops (one stem apart),
# LR - long-range, i.e. from distant elements
# NA - n/a



print('0.U.55.','0.G.54.',model.NuclRelation('0.U.55.','0.G.54.'))
print('0.G.56.','0.G.61.',model.NuclRelation('0.G.56.','0.G.61.'))
print('0.U.55.','0.G.61.',model.NuclRelation('0.U.55.','0.G.61.'))
print('0.G.47.','0.G.66.',model.NuclRelation('0.G.47.','0.G.66.'))
print('O.SER.5.','0.G.66.',model.NuclRelation('O.SER.5.','0.G.66.'))

print('\n')

# model.SeqDist function: input = two DSSR-IDs, output = distance between residues in sequence
# or -1 for residues from different chains

print(model.SeqDist('O.SER.5.','0.G.66.'))
print(model.SeqDist('0.G.47.','0.G.66.'))
print(model.SeqDist('0.U.55.','0.G.56.'))

G
RNA
dict_keys(['ID', 'CIFID', 'TYPE', 'PDBNUM', 'NAME', 'CHAIN', 'ATOMS', 'FLOAT', 'DSSR', 'MISS', 'WING', 'OLDWING', 'FSTEMS', 'THREAD', 'ZIP', 'LUMULT', 'MULT', 'BPS', 'BRACKETS', 'SLBRACKETS', 'ATOMNAMEDICT'])


0.A.52. S
0.C.53. S
0.G.54. S
0.U.55. S
0.G.56. HP
0.C.57. SHP
0.C.58. SHP
0.A.59. HPIC
0.A.60. HPIC
0.G.61. SHP


0.U.55. 0.G.54. SM
0.G.56. 0.G.61. LC
0.U.55. 0.G.61. NR
0.G.47. 0.G.66. LR
O.SER.5. 0.G.66. NA


-1
19
1


In [14]:
## DSSR motifs

print('DSSR base pairs:')

print(model.bpairs[0],'\n')

print('DSSR multiplets:')

print(model.lumults[0],'\n')

print('DSSR non-pairing interactions:')

print(model.non_pairs[0],'\n')

print('DSSR A-minors:')

print(model.a_minors[0],'\n')

print('DSSR U-turns:')

print(model.u_turns[0],'\n')

print('DSSR ribose-zippers:')

print(model.ribzips[0],'\n')

print('DSSR kink-turns:')

print(model.k_turns[0],'\n')

print('DSSR atom-base capping interactions:')

print(model.abcaps[0],'\n')

print('DSSR non-stem base stacks:')

print(model.stacks[0],'\n')


DSSR base pairs:
{'ID': 1, 'NUCL1': ['0.U.12.', 'RES', 10], 'NUCL2': ['0.G.531.', 'RES', 529], 'PAIR': '0.U.12.-0.G.531.', 'BOND': 'U-G', 'TYPE': '-S', 'CLASS': ['--', 'tHS', 'tM-m'], 'CHAIN1': '0', 'CHAIN2': '0', 'INFO1': "-171.2(anti) ~C3'-endo lambda=8.4", 'INFO2': "-159.4(anti) ~C3'-endo lambda=97.9", 'DIST1': 10.93, 'DIST2': 9.76, 'DIST3': 10.33, 'TOR': -76.3, 'HBONDSNUM': 1, 'HBONDS': ['O4(carbonyl)-N2(amino)[2.99]'], 'PARAMS': '[-7.75   -3.20   0.40    -11.69  2.97    -4.39]', 'SHEAR': '-7.75', 'STRETCH': '-3.20', 'STAGGER': '0.40', 'BUCKLE': '-11.69', 'PROPELLER': '2.97', 'OPENING': '-4.39', 'STEM': None, 'OLDSTEM': None, 'FULLSTEM': 1, 'REVSTEM': None, 'LUSTEM': None, 'LINK': 1, 'HELIX': 1, 'NUCLMULT': None, 'STEP': '1.27 -1.68 2.98 -2.92 6.23 50.89'} 

DSSR multiplets:
{'ID': 1, 'SIZE': 3, 'NUCLS': [['0.G.32.', 'RES', 30], ['0.C.451.', 'RES', 449], ['0.G.456.', 'RES', 454]], 'SEQ': 'GCG', 'PLANARITY': 0.308} 

DSSR non-pairing interactions:
{'ID': 1, 'NUCL1': '0.U.10.', 'NUCL

In [3]:
print('BIE/BWE motifs:')

print(model.biebwe[0],'\n')

print('Helical stacking:')

print(model.helicalstacking[0],'\n')

print('Dinucleotide platform:')

print(model.nnplatform[0], model.bpairs[model.nnplatform[0]['BPID']-1]['CLASS'],'\n')

print('Internal loop motifs (TandemGA; UAA/GAN):')

print(model.intloopmotif[0],'\n')

print('Tetraloops:')

print(model.tetraloop[0],'\n')

BIE/BWE motifs:
{'TYPE': 'BIE', 'NUCLS': ['9.C.64.', '9.C.113.', '9.A.65.'], 'STACK1': ['2.5(0.2)', 'pp(><,inward)'], 'STACK2': ['2.9(2.0)', 'mm(<>,outward)']} 

Helical stacking:
{'BP1': ['0.G.33.', '0.C.450.'], 'BP2': ['0.C.34.', '0.G.448.'], 'STACKING': [['0.G.33.', '0.C.34.', '7.2(4.2)--pm(>>,forward)', '', '2'], ['0.C.450.', '0.G.448.', '2.8(0.2)--mp(<<,backward)', '', '5']]} 

Dinucleotide platform:
{'NUCL1': '0.G.44.', 'NUCL2': '0.A.45.', 'BPID': 37} ['--', 'cSH', 'cm+M'] 

Internal loop motifs (TandemGA; UAA/GAN):
{'TYPE': 'UAA/GAN', 'STRAND1': ['0.U.664.', '0.A.665.', '0.A.666.'], 'STRAND2': ['0.G.680.', '0.G.681.', '0.A.682.']} 

Tetraloops:
{'SEQ': 'U,C,A,C', 'NUCLS': ['0.U.253.', '0.C.254.', '0.A.255.', '0.C.256.']} 

