In [15]:
from src.utils import *

fpath = 'rnas.fa'
seqs = readSequences(fpath)
print(seqs)


['UAUUAGGUUGGUGCACAAGUAAUUGCGGUUUUUGCCAAGAAAAGUAAUGGCAAAAACCGCAAUUACUUUUGCACCAGUGUAAUAAUUAGCAUCUUCCGCUAAUCUUUUUC', 'CAUCAAGACCCAGCUGAGUCACUGUCACUGCCUACCAAUCUCGACCGGACCUCGACCGGCUCGUCUGUGUUGCCAAUCGACUCGGCGUGGCGUCGGUCGUGGUAGAUAGGCGGUCAUGCAUACGAAUUUUCAGCUCUUGUUCUGGUGAC']


In [None]:
import numpy as np
from rna_tools.SecondaryStructure import draw_ss
# https://rna-tools.readthedocs.io/en/latest/tools.html
# https://rna-tools.readthedocs.io/en/latest/install-dev.html#configuration


class Nussinov: # Secondary Structure Prediction
    def __init__(self, seq, edges):
        self.seq = seq
        self.edges = edges

    def tryBasePair(self, i, j):
        if (self.seq[i], self.seq[j]) in self.edges:
            return self.edges[(self.seq[i], self.seq[j])]
        elif (self.seq[j], self.seq[i]) in self.edges:
            return self.edges[(self.seq[j], self.seq[i])]
        else: return 0


    def findSolutionMat(self):
        L = len(self.seq)
        N = np.zeros((L,L), dtype=np.int64)
        for l in range(1,L):
            for i in range(0,L-l):
                j = i + l
                #print('l, i, j:', l, i, j)
                cases = [N[i+1,j-1]+self.tryBasePair(i,j), N[i+1,j], N[i,j-1]]
                if l >= 3:
                    bifurcMax = np.max([N[i,k]+N[k+1,j] for k in range(i+1, j-1)])
                    cases.append(bifurcMax)
                N[i,j] = np.max(cases)
        self.N = N
        print(N)


    def traceback(self):
        L = len(self.seq)
        N = self.N
        basePairs = []
        connections = '.' * L
        stack = []
        stack.append((0,L-1))
        while len(stack) > 0:
            i,j = stack.pop(0)
            if i < j:
                dij = self.tryBasePair(i,j)
                if N[i+1,j-1] + dij == N[i,j]:
                    if dij: 
                        basePairs.append([i,j])
                        connections = connections[:i] +'(' + connections[i+1:]
                        connections = connections[:j] +')' + connections[j+1:]
                    stack.append((i+1,j-1))
                elif N[i+1,j] == N[i,j]:
                    stack.append((i+1,j))
                elif N[i,j-1] == N[i,j]:
                    stack.append((i,j-1))
                else:
                    for k in range(i+1, j-1):
                        if N[i,k] + N[k+1,j] == N[i,j]:
                            stack.append((i,k))
                            stack.append((k+1,j))
                            break

        return basePairs, connections

In [None]:
edges = {('A', 'U'): 1, ('C', 'G'): 1}
for i, seq in enumerate(seqs):
    print('Sequence:', seq)
    nussinov = Nussinov(seq, edges)
    nussinov.findSolutionMat()
    basePairs, connections = nussinov.traceback()
    print('Base Pairs:', basePairs)
    print('Connections:', connections)
    pltFile = f'secondStructImgs/struct_rna_{i+1}.png'
    draw_ss('rna', seq, connections, pltFile)

In [None]:
# einfaches Beispiel
input = 'GGAAACC'
nussinov = Nussinov(input, edges)
nussinov.findSolutionMat()
basePairs, connections = nussinov.traceback()
print(basePairs)
print(connections)
draw_ss('rna secondary structure', input, connections, 'example2.png')

[[0 0 0 0 0 1 2]
 [0 0 0 0 0 1 1]
 [0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0]]
[[0, 6], [1, 5]]
((...))


In [None]:
# komplexes Vorlesungsbeispiel
input = 'AACGGAACCAACAUGGAUUCAUGCUUCGGCCCUGGUCGCG'
nussinov = Nussinov(input, edges)
nussinov.findSolutionMat()
basePairs, connections = nussinov.traceback()
print(basePairs)
print(connections)
draw_ss('rna secondary structure', input, connections, 'example3.png')

[[ 0  0  0 ... 16 16 17]
 [ 0  0  0 ... 16 16 17]
 [ 0  0  0 ... 15 16 16]
 ...
 [ 0  0  0 ...  0  1  1]
 [ 0  0  0 ...  0  0  1]
 [ 0  0  0 ...  0  0  0]]
[[1, 35], [36, 39], [2, 34], [37, 38], [3, 8], [4, 7], [9, 25], [26, 33], [10, 24], [27, 31], [11, 14], [15, 23], [28, 30], [12, 13], [16, 17], [19, 22], [20, 21]]
.((((..))(((())(().(()))))(((.)).)))(())


In [None]:
# einfacheres Vorlesungsbeispiel
input = 'ACUAAGCUUCGACGGGGU'
nussinov = Nussinov(input, edges)
nussinov.findSolutionMat()
basePairs, connections = nussinov.traceback()
print(basePairs)
print(connections)
draw_ss('rna secondary structure', input, connections, 'example.png')


[[0 0 1 1 1 1 1 2 3 3 4 4 4 5 5 5 5 6]
 [0 0 0 0 0 1 1 1 2 2 3 3 3 4 5 5 5 5]
 [0 0 0 0 0 0 0 1 2 2 3 3 3 4 4 4 4 5]
 [0 0 0 0 0 0 0 1 2 2 3 3 3 4 4 4 4 5]
 [0 0 0 0 0 0 0 1 1 1 2 2 2 3 3 3 3 4]
 [0 0 0 0 0 0 0 0 0 0 1 1 1 2 3 3 3 3]
 [0 0 0 0 0 0 0 0 0 0 1 1 1 2 3 3 3 3]
 [0 0 0 0 0 0 0 0 0 0 1 1 1 2 2 2 2 3]
 [0 0 0 0 0 0 0 0 0 0 1 1 1 2 2 2 2 3]
 [0 0 0 0 0 0 0 0 0 0 1 1 1 2 2 2 2 3]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 2]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 2]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]]
[[0, 17], [1, 16], [3, 8], [4, 7], [9, 10], [12, 13]]
((.((..))().()..))


In [12]:
from draw_rna.ipynb_draw import draw_struct
draw_struct('ACUAAGCUUCGACGGGGU', '((.((..))().()..))')

Error occured while drawing RNA 10 9


RuntimeError: Error occured while drawing RNA 10 9