In [1]:
from nussinov import Nussinov

In [2]:
T = {
    'A': 'U',
    'G': 'C',
    'C': 'G',
    'U': 'A',
}

In [3]:
s = 'GCUCGGG UUCCC UAU UCA AGAGC'.replace(' ', '') # should be 10
s = s
ns = Nussinov(s)
ns.solve(min_padding=0)

10

In [4]:
print(ns.dot_parentheses(prettify=True))

G C U C G G G U U C C C U A U U C A A G A G C
---------------------------------------------
( ( ( ( ( ( ( - - ) ) ) ( ( ) ( - ) ) ) ) ) )


In [5]:
ns.solve(min_padding=0, tie_break_permute=['B', 'M', 'D', 'L'])
print(ns.dot_parentheses(prettify=True))

G C U C G G G U U C C C U A U U C A A G A G C
---------------------------------------------
( ) ( ( ) ( ( - - ) ) ( ( ) ( ( - ) ) ) ) ( )


In [9]:
result = ns.evaluate_tie_breaks(min_padding=0, prettify=True)

In [10]:
len(result.keys())

2

In [12]:
possibilities = list(result.keys())

result[possibilities[0]]

[('L', 'D', 'M', 'B'),
 ('L', 'M', 'D', 'B'),
 ('L', 'M', 'B', 'D'),
 ('D', 'L', 'M', 'B'),
 ('D', 'M', 'L', 'B'),
 ('D', 'M', 'B', 'L'),
 ('M', 'L', 'D', 'B'),
 ('M', 'L', 'B', 'D'),
 ('M', 'D', 'L', 'B'),
 ('M', 'D', 'B', 'L'),
 ('M', 'B', 'L', 'D'),
 ('M', 'B', 'D', 'L')]

In [13]:
result[possibilities[1]]

[('L', 'D', 'B', 'M'),
 ('L', 'B', 'D', 'M'),
 ('L', 'B', 'M', 'D'),
 ('D', 'L', 'B', 'M'),
 ('D', 'B', 'L', 'M'),
 ('D', 'B', 'M', 'L'),
 ('B', 'L', 'D', 'M'),
 ('B', 'L', 'M', 'D'),
 ('B', 'D', 'L', 'M'),
 ('B', 'D', 'M', 'L'),
 ('B', 'M', 'L', 'D'),
 ('B', 'M', 'D', 'L')]

There are clearly some patterns here. `B` and `M` are clearly at odds, and `L` and `D` evenly split on the options. We would like to further investigate these patterns. This includes the number of unique structures and their count distribution, the dynamics of the "dominating" options (most likely `B` and `M`) and the remaining options, and the affect of `min_padding.`

A real example is below.

In [21]:
# https://rnacentral.org/rna/URS00000DE3E2/9606
# Homo sapiens small nucleolar RNA, H/ACA box 73A (SNORA73A)

s = 'GUCUUCUCAUUGAGCUCCUUUCUGUCUAUCAGUGGCAGUUUAUGGAUUCGCACGAGAAGAAGAGAGAAUUCACAGAACUAGCAUUAUUUUACCUUCUGUCUUUACAGAGGUAUAUUUAGCUGUAUUGUGAGACAUUC'

In [22]:
print(len(s))

137


In [56]:
ns = Nussinov(s)
ns.solve(min_padding=3)

100%|██████████| 9316/9316.0 [00:00<00:00, 14658.21it/s]


45

In [38]:
result = ns.evaluate_tie_breaks(min_padding=3, prettify=False)

100%|██████████| 9316/9316.0 [00:00<00:00, 14499.13it/s]
100%|██████████| 9316/9316.0 [00:00<00:00, 14369.60it/s]
100%|██████████| 9316/9316.0 [00:00<00:00, 15104.67it/s]
100%|██████████| 9316/9316.0 [00:00<00:00, 14614.11it/s]
100%|██████████| 9316/9316.0 [00:00<00:00, 15210.76it/s]
100%|██████████| 9316/9316.0 [00:00<00:00, 14271.38it/s]
100%|██████████| 9316/9316.0 [00:00<00:00, 15255.86it/s]
100%|██████████| 9316/9316.0 [00:00<00:00, 14272.23it/s]
100%|██████████| 9316/9316.0 [00:00<00:00, 15163.27it/s]
100%|██████████| 9316/9316.0 [00:00<00:00, 14174.88it/s]
100%|██████████| 9316/9316.0 [00:00<00:00, 14414.86it/s]
100%|██████████| 9316/9316.0 [00:00<00:00, 13983.36it/s]
100%|██████████| 9316/9316.0 [00:00<00:00, 15242.39it/s]
100%|██████████| 9316/9316.0 [00:00<00:00, 13979.28it/s]
100%|██████████| 9316/9316.0 [00:00<00:00, 14964.93it/s]
100%|██████████| 9316/9316.0 [00:00<00:00, 14041.12it/s]
100%|██████████| 9316/9316.0 [00:00<00:00, 15275.46it/s]
100%|██████████| 9316/9316.0 [0

In [39]:
len(result.keys())

13

In [40]:
possibilities = list(result.keys())

In [41]:
for i, p in enumerate(possibilities):
    print(i, len(result[p]))

0 1
1 2
2 2
3 1
4 4
5 1
6 1
7 2
8 1
9 1
10 2
11 3
12 3


In [42]:
result[possibilities[4]]

[('D', 'L', 'M', 'B'),
 ('D', 'L', 'B', 'M'),
 ('D', 'B', 'L', 'M'),
 ('D', 'B', 'M', 'L')]

In [43]:
result[possibilities[11]]

[('B', 'L', 'D', 'M'), ('B', 'D', 'L', 'M'), ('B', 'D', 'M', 'L')]

In [44]:
result[possibilities[12]]

[('B', 'L', 'M', 'D'), ('B', 'M', 'L', 'D'), ('B', 'M', 'D', 'L')]

In [53]:
# website dot_bracket notation:
official = "..((((((.....(((((...(((((.......))))).....)))...))..))))))..........((((((.................((((((......))))))..............))))))......."


In [54]:
count = 0
for c in official:
    if c == '(':
        count += 1

In [55]:
count

28