In [1]:
from src.utils import *

fpath = 'rnas.fa'
seqs = readSequences(fpath)
print(seqs)

FileNotFoundError: [Errno 2] No such file or directory: 'rnas.fa'

In [None]:
import numpy as np
from rna_tools.SecondaryStructure import draw_ss
# https://rna-tools.readthedocs.io/en/latest/tools.html
# https://rna-tools.readthedocs.io/en/latest/install-dev.html#configuration


class Nussinov: # Secondary Structure Prediction
    def __init__(self, seq, edges, minHairpinLen):
        self.seq = seq
        self.edges = edges
        self.minLoopLen = minHairpinLen
        if minHairpinLen < 1:
            print('minimal Hairpin Loop length must be greater zero. Setting to 1.')
            self.minLoopLen = 1
        

    def tryBasePair(self, i, j):
        if (self.seq[i], self.seq[j]) in self.edges:
            return self.edges[(self.seq[i], self.seq[j])]
        elif (self.seq[j], self.seq[i]) in self.edges:
            return self.edges[(self.seq[j], self.seq[i])]
        else: return 0


    def findSolutionMat(self):
        L = len(self.seq)
        N = np.zeros((L,L), dtype=np.int64)
        for l in range(self.minLoopLen+1,L):
            for i in range(0,L-l):
                j = i + l
                #print('l, i, j:', l, i, j)
                cases = [N[i+1,j-1]+self.tryBasePair(i,j), N[i+1,j], N[i,j-1]]
                if l >= 3+2*self.minLoopLen:
                    bifurcMax = np.max([N[i,k]+N[k+1,j] for k in range(i+self.minLoopLen+1, j-self.minLoopLen-1)])
                    cases.append(bifurcMax)
                N[i,j] = np.max(cases)
        self.N = N
        print(N)


    def traceback(self):
        L = len(self.seq)
        N = self.N
        basePairs = []
        connections = '.' * L
        stack = []
        stack.append((0,L-1))
        while len(stack) > 0:
            i,j = stack.pop(0)
            if i+self.minLoopLen < j:
                dij = self.tryBasePair(i,j)
                if N[i+1,j-1] + dij == N[i,j]:
                    if dij: 
                        basePairs.append([i,j])
                        connections = connections[:i] +'(' + connections[i+1:]
                        connections = connections[:j] +')' + connections[j+1:]
                    stack.append((i+1,j-1))
                elif N[i+1,j] == N[i,j]:
                    stack.append((i+1,j))
                elif N[i,j-1] == N[i,j]:
                    stack.append((i,j-1))
                else:
                    for k in range(i+1, j-1):
                        if N[i,k] + N[k+1,j] == N[i,j]:
                            stack.append((i,k))
                            stack.append((k+1,j))
                            break

        return basePairs, connections
    

edges = {('G', 'U'): 1, ('A', 'U'): 2, ('C', 'G'): 3}
seq = 'GGGAAUUU'
nussinov = Nussinov(seq, edges, 3)
nussinov.findSolutionMat()
basePairs, connections = nussinov.traceback()
print('Base Pairs:', basePairs)
print('Connections:', connections)


[[0 0 0 0 0 1 2 2]
 [0 0 0 0 0 1 1 2]
 [0 0 0 0 0 0 1 2]
 [0 0 0 0 0 0 0 2]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]]
Base Pairs: [[0, 7], [1, 6]]
Connections: ((....))


In [None]:
edges = {('G', 'U'): 1, ('A', 'U'): 2, ('C', 'G'): 3}
for i, seq in enumerate(seqs):
    print('Sequence:', seq)
    nussinov = Nussinov(seq, edges, 3)
    nussinov.findSolutionMat()
    basePairs, connections = nussinov.traceback()
    print('Base Pairs:', basePairs)
    print('Connections:', connections)
    pltFile = f'secondStructImgs/struct_rna_{i+1}.png'
    draw_ss('rna', seq, connections, pltFile)

Sequence: UAUUAGGUUGGUGCACAAGUAAUUGCGGUUUUUGCCAAGAAAAGUAAUGGCAAAAACCGCAAUUACUUUUGCACCAGUGUAAUAAUUAGCAUCUUCCGCUAAUCUUUUUC
[[  0   0   0 ... 100 100 100]
 [  0   0   0 ... 100 100 100]
 [  0   0   0 ...  98  98  98]
 ...
 [  0   0   0 ...   0   0   0]
 [  0   0   0 ...   0   0   0]
 [  0   0   0 ...   0   0   0]]
Base Pairs: [[1, 108], [4, 105], [5, 104], [6, 103], [7, 101], [8, 100], [9, 74], [75, 99], [10, 73], [76, 98], [11, 72], [77, 97], [12, 71], [78, 96], [13, 70], [14, 69], [79, 87], [88, 95], [80, 86], [16, 67], [81, 85], [90, 94], [17, 66], [18, 65], [19, 64], [20, 63], [21, 62], [22, 61], [23, 60], [24, 59], [25, 58], [26, 57], [27, 56], [28, 55], [29, 54], [30, 53], [31, 52], [32, 51], [33, 50], [34, 49], [35, 48], [36, 47], [39, 44]]
Connections: .(..(((((((((((.(((((((((((((((((((((..(....)..))))))))))))))))))))).))))))(((((((...)))(.(...)))))))).)))..).
Sequence: CAUCAAGACCCAGCUGAGUCACUGUCACUGCCUACCAAUCUCGACCGGACCUCGACCGGCUCGUCUGUGUUGCCAAUCGACUCGGCGUGGCGUCGGUCGUGGUAGAUAGGC