In [6]:
from src.utils import *

fpath = 'rnas.fa'
seqs = readSequences(fpath)
print(seqs)

['UAUUAGGUUGGUGCACAAGUAAUUGCGGUUUUUGCCAAGAAAAGUAAUGGCAAAAACCGCAAUUACUUUUGCACCAGUGUAAUAAUUAGCAUCUUCCGCUAAUCUUUUUC', 'CAUCAAGACCCAGCUGAGUCACUGUCACUGCCUACCAAUCUCGACCGGACCUCGACCGGCUCGUCUGUGUUGCCAAUCGACUCGGCGUGGCGUCGGUCGUGGUAGAUAGGCGGUCAUGCAUACGAAUUUUCAGCUCUUGUUCUGGUGAC']


In [None]:
import numpy as np
from rna_tools.SecondaryStructure import draw_ss
# https://rna-tools.readthedocs.io/en/latest/tools.html
# https://rna-tools.readthedocs.io/en/latest/install-dev.html#configuration


class Nussinov: # Secondary Structure Prediction
    def __init__(self, seq, edges):
        self.seq = seq
        self.edges = edges

    def tryBasePair(self, i, j):
        if (self.seq[i], self.seq[j]) in self.edges:
            return self.edges[(self.seq[i], self.seq[j])]
        elif (self.seq[j], self.seq[i]) in self.edges:
            return self.edges[(self.seq[j], self.seq[i])]
        else: return 0


    def findSolutionMat(self):
        L = len(self.seq)
        N = np.zeros((L,L), dtype=np.int64)
        for l in range(1,L):
            for i in range(0,L-l):
                j = i + l
                cases = [N[i+1,j-1]+self.tryBasePair(i,j), N[i+1,j], N[i,j-1]]
                if l >= 3:
                    bifurcMax = np.max([N[i,k]+N[k+1,j] for k in range(i+1, j-1)])
                    cases.append(bifurcMax)
                N[i,j] = np.max(cases)
        self.N = N
        print(N)


    def traceback(self):
        L = len(self.seq)
        N = self.N
        basePairs = []
        connections = '.' * L
        stack = []
        stack.append((0,L-1))
        while len(stack) > 0:
            i,j = stack.pop(0)
            if i < j:
                dij = self.tryBasePair(i,j)
                if N[i+1,j-1] + dij == N[i,j] and dij:
                    basePairs.append([i,j])
                    connections = connections[:i] +'(' + connections[i+1:]
                    connections = connections[:j] +')' + connections[j+1:]
                    stack.append((i+1,j-1))
                elif N[i+1,j] == N[i,j]:
                    stack.append((i+1,j))
                elif N[i,j-1] == N[i,j]:
                    stack.append((i,j-1))
                else:
                    for k in range(i+1, j-1):
                        if N[i,k] + N[k+1,j] == N[i,j]:
                            stack.append((i,k))
                            stack.append((k+1,j))
                            break

        return basePairs, connections

In [None]:
edges = {('G', 'U'): 1, ('A', 'U'): 2, ('C', 'G'): 3}
for i, seq in enumerate(seqs):
    print('Sequence:', seq)
    nussinov = Nussinov(seq, edges)
    nussinov.findSolutionMat()
    basePairs, connections = nussinov.traceback()
    print('Base Pairs:', basePairs)
    print('Connections:', connections)
    pltFile = f'sndStrImgs/sndStrRNA-{i+1}.png'
    draw_ss('rna', seq, connections, pltFile)

Sequence: UAUUAGGUUGGUGCACAAGUAAUUGCGGUUUUUGCCAAGAAAAGUAAUGGCAAAAACCGCAAUUACUUUUGCACCAGUGUAAUAAUUAGCAUCUUCCGCUAAUCUUUUUC
[[  0   2   2 ... 109 109 112]
 [  0   0   2 ... 109 109 111]
 [  0   0   0 ... 107 107 110]
 ...
 [  0   0   0 ...   0   0   0]
 [  0   0   0 ...   0   0   0]
 [  0   0   0 ...   0   0   0]]
Base Pairs: [[0, 1], [2, 5], [6, 109], [3, 4], [7, 37], [38, 108], [8, 36], [39, 107], [9, 35], [40, 106], [10, 34], [41, 105], [42, 104], [11, 14], [15, 33], [43, 103], [12, 13], [16, 32], [17, 31], [44, 45], [46, 102], [18, 30], [47, 101], [20, 29], [48, 95], [21, 28], [22, 27], [49, 50], [51, 94], [97, 98], [99, 100], [23, 26], [52, 93], [24, 25], [53, 69], [70, 92], [54, 68], [55, 67], [71, 78], [72, 77], [79, 80], [81, 91], [73, 76], [82, 90], [58, 59], [60, 66], [84, 85], [86, 87], [88, 89], [61, 62], [63, 64]]
Connections: ()(())((((((())((((.((((()))))))))))))((((((()(((()(((((..()(()().))))((((..)))()((.()()())))))).()())))))))))
Sequence: CAUCAAGACCCAGCUGAGUCACUGUCACUG

In [11]:
edges = {('G', 'U'): 1, ('A', 'U'): 2, ('C', 'G'): 3}
seq = 'GGGAAUUU'
nussinov = Nussinov(seq, edges)
nussinov.findSolutionMat()
basePairs, connections = nussinov.traceback()
print('Base Pairs:', basePairs)
print('Connections:', connections)

[[0 0 0 0 0 2 4 5]
 [0 0 0 0 0 2 4 5]
 [0 0 0 0 0 2 4 5]
 [0 0 0 0 0 2 4 4]
 [0 0 0 0 0 2 2 2]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]]
Base Pairs: [[0, 7], [3, 6], [4, 5]]
Connections: (..(()))
