# Annotation of long-range nucleotide doublets

### Using DSSR 2.0 output file (according to the command from the DSSR.py script)

In [1]:
from urslib2 import RSS
import os

#Sort nucleotide in dssr-format by (chain,seqnumber) pair
def SortNucl(x):
    return x.split('.')[0],int(x.split('.')[2])

#Specify data folder
mmcif    = 'data/'

#Specify mmCIF filename
cif = '6ugg.cif1'
#Specify DSSR output filename
out = '6ugg.out1'

# Path + filename
pdbmodel = mmcif+'models/'+cif
outmodel = mmcif+'out/'+out

# Build the model object from the two files
model = RSS.SecStruct(pdbmodel,outmodel)

# Get atom-atom pairs between RNA atoms at a threshold of 6.0 angstroms
atompairs = RSS.Atompairs(model, type1 = 'R', type2 = 'R', dist = 6.0)

#Specify non-redundant chains
nr_chains = {"B",}

#Here the doublets will be stored
doublets = set()

# Filter atom-atom pairs
for pair in atompairs:
    chain1 = pair['DSSR1'].split('.')[0]
    chain2 = pair['DSSR2'].split('.')[0]
    # Ignore contacts within a redundant chain or between two redundant chains
    if chain1 in nr_chains or chain2 in nr_chains:
        # Ignore non-long-range contacts
        if model.NuclRelation(pair['DSSR1'], pair['DSSR2']) == 'LR':
            # Ignore contacts between different bioassemblies
            if model.headers['CHAINBIO'][chain1] == model.headers['CHAINBIO'][chain2]:
                doublets.add(frozenset((pair['DSSR1'], pair['DSSR2'])))

#Sorting long-range doublets
doublets = sorted([sorted(tuple(d),key= SortNucl) for d in doublets], key = lambda x: (SortNucl(x[0]),
                                                                                       SortNucl(x[1])))
#Printing long-range doublets
for i,doublet in enumerate(doublets):
    print(i+1,*doublet)
            


1 B.G.5. B.A.13.
2 B.G.6. B.A.13.
3 B.G.6. B.G.14.
4 B.G.14. B.G.59.
5 B.G.14. B.U.60.
6 B.U.15. B.G.59.
7 B.U.15. B.U.60.
8 B.C.16. B.U.60.
9 B.C.16. B.C.61.
10 B.G.17. B.U.54.
11 B.G.17. B.U.55.
12 B.G.17. B.G.57.
13 B.G.17. B.A.58.
14 B.G.17. B.G.59.
15 B.G.17. B.U.60.
16 B.G.17. B.C.61.
17 B.G.18. B.C.56.
18 B.G.18. B.G.57.
19 B.G.18. B.G.59.
20 B.G.18. B.U.60.
21 B.U.19. B.U.60.
22 B.U.20. B.G.57.


### Using a list of secondary-structure-forming base pairs (alternative)

In [2]:
#We will use this list of Watson-Crick + Wobble base pairs 
!cat data/bps/6ugg.bps

A.G.0. A.C.72. 
A.G.1. A.C.71. 
A.A.2. A.U.70. 
A.G.3. A.U.69. 
A.C.4. A.G.68. 
A.G.5. A.C.67. 
A.G.6. A.C.66. 
A.G.9. A.U.25. 
A.U.10. A.A.24. 
A.U.11. A.A.23. 
A.C.12. A.G.22. 
A.G.18. A.C.56. 
A.C.27. A.G.43. 
A.C.28. A.G.42. 
A.U.29. A.A.41. 
A.G.30. A.C.40. 
A.C.31. A.G.39. 
A.G.49. A.U.65. 
A.C.50. A.G.64. 
A.G.51. A.C.63. 
A.G.52. A.C.62. 
A.G.53. A.C.61. 
B.G.0. B.C.72. 
B.G.1. B.C.71. 
B.A.2. B.U.70. 
B.G.3. B.U.69. 
B.C.4. B.G.68. 
B.G.5. B.C.67. 
B.G.6. B.C.66. 
B.G.9. B.U.25. 
B.U.10. B.A.24. 
B.U.11. B.A.23. 
B.C.12. B.G.22. 
B.G.18. B.C.56. 
B.C.27. B.G.43. 
B.C.28. B.G.42. 
B.U.29. B.A.41. 
B.G.30. B.C.40. 
B.C.31. B.G.39. 
B.G.49. B.U.65. 
B.C.50. B.G.64. 
B.G.51. B.C.63. 
B.G.52. B.C.62. 
B.G.53. B.C.61.



In [3]:
from urslib2 import RSS
import os

#Sort nucleotide in dssr-format by (chain,seqnumber) pair
def SortNucl(x):
    return x.split('.')[0],int(x.split('.')[2])

#Specify data folder
mmcif    = 'data/'

#Specify mmCIF filename
cif = '6ugg.cif1'
#Specify DSSR output filename
bps = '6ugg.bps'

# Path + filename
pdbmodel = mmcif+'models/'+cif
bpslist = mmcif+'bps/'+bps

# Build the model object from the two files
model = RSS.SecStruct(pdbmodel,wcwblist=bpslist)

# Get atom-atom pairs between RNA atoms at a threshold of 6.0 angstroms
atompairs = RSS.Atompairs(model, type1 = 'R', type2 = 'R', dist = 6.0)

#Specify non-redundant chains
nr_chains = {"B",}

#Here the doublets will be stored
doublets = set()

# Filter atom-atom pairs
for pair in atompairs:
    chain1 = pair['DSSR1'].split('.')[0]
    chain2 = pair['DSSR2'].split('.')[0]
    # Ignore contacts within a redundant chain or between two redundant chains
    if chain1 in nr_chains or chain2 in nr_chains:
        # Ignore non-long-range contacts
        if model.NuclRelation(pair['DSSR1'], pair['DSSR2']) == 'LR':
            # Ignore contacts between different bioassemblies
            if model.headers['CHAINBIO'][chain1] == model.headers['CHAINBIO'][chain2]:
                doublets.add(frozenset((pair['DSSR1'], pair['DSSR2'])))

#Sorting long-range doublets
doublets = sorted([sorted(tuple(d),key= SortNucl) for d in doublets], key = lambda x: (SortNucl(x[0]),
                                                                                       SortNucl(x[1])))
#Printing long-range doublets
for i,doublet in enumerate(doublets):
    print(i+1,*doublet)
            


1 B.G.5. B.A.13.
2 B.G.6. B.A.13.
3 B.G.6. B.G.14.
4 B.G.14. B.G.59.
5 B.G.14. B.U.60.
6 B.U.15. B.G.59.
7 B.U.15. B.U.60.
8 B.C.16. B.U.60.
9 B.C.16. B.C.61.
10 B.G.17. B.U.54.
11 B.G.17. B.U.55.
12 B.G.17. B.G.57.
13 B.G.17. B.A.58.
14 B.G.17. B.G.59.
15 B.G.17. B.U.60.
16 B.G.17. B.C.61.
17 B.G.18. B.C.56.
18 B.G.18. B.G.57.
19 B.G.18. B.G.59.
20 B.G.18. B.U.60.
21 B.U.19. B.U.60.
22 B.U.20. B.G.57.
