In [4]:
from src.utils import *

fpath = '/home/chr/Uni/Master/5.Semester/AlgSeqII/Data/SndStrPredRNAs/rnas.fa'
seqs = readSequences(fpath)
print(seqs)

['UAUUAGGUUGGUGCACAAGUAAUUGCGGUUUUUGCCAAGAAAAGUAAUGGCAAAAACCGCAAUUACUUUUGCACCAGUGUAAUAAUUAGCAUCUUCCGCUAAUCUUUUUC', 'CAUCAAGACCCAGCUGAGUCACUGUCACUGCCUACCAAUCUCGACCGGACCUCGACCGGCUCGUCUGUGUUGCCAAUCGACUCGGCGUGGCGUCGGUCGUGGUAGAUAGGCGGUCAUGCAUACGAAUUUUCAGCUCUUGUUCUGGUGAC']


In [5]:
import numpy as np
from rna_tools.SecondaryStructure import draw_ss
# https://rna-tools.readthedocs.io/en/latest/tools.html
# https://rna-tools.readthedocs.io/en/latest/install-dev.html#configuration


class Nussinov: # Secondary Structure Prediction
    def __init__(self, seq, edges, minHairpinLen):
        self.seq = seq
        self.edges = edges
        self.minLoopLen = minHairpinLen
        if minHairpinLen < 1:
            print('minimal Hairpin Loop length must be greater zero. Setting to 1.')
            self.minLoopLen = 1
        

    def tryBasePair(self, i, j):
        if (self.seq[i], self.seq[j]) in self.edges:
            return self.edges[(self.seq[i], self.seq[j])]
        elif (self.seq[j], self.seq[i]) in self.edges:
            return self.edges[(self.seq[j], self.seq[i])]
        else: return 0

    # wenn stack begonnen in begin stack mat. 1 eintragen
    # wenn stack erweitert dann in N+1
    def findSolutionMat(self):
        L = len(self.seq)
        N = np.zeros((L,L), dtype=np.int64)
        S = np.zeros_like(N) # encodes Stack Initialization
        for l in range(self.minLoopLen+1,L):
            for i in range(0,L-l):
                j = i + l
                case1 = N[i+1,j-1]
                if (S[i+1,j-1] or N[i+1,j-1]):
                    case1 += self.tryBasePair(i,j)
                cases = [case1, N[i+1,j], N[i,j-1]]
                if l >= 3+2*self.minLoopLen:
                    bifurcMax = np.max([N[i,k]+N[k+1,j] for k in range(i+self.minLoopLen+1, j-self.minLoopLen-1)])
                    cases.append(bifurcMax)
                N[i,j] = np.max(cases)
                if i+self.minLoopLen+1==j or N[i+1,j-1] == N[i+2,j-2]: #we have Å„o stack
                    S[i,j] = self.tryBasePair(i,j)
        self.N = N + np.ones_like(N) + S
        print(self.N)


    def traceback(self):
        L = len(self.seq)
        N = self.N
        basePairs = []
        connections = '.' * L
        stack = []
        stack.append((0,L-1))
        while len(stack) > 0:
            i,j = stack.pop(0)
            if i+self.minLoopLen < j:
                dij = self.tryBasePair(i,j)
                if N[i+1,j-1] + dij == N[i,j]:
                    if dij: 
                        basePairs.append([i,j])
                        connections = connections[:i] +'(' + connections[i+1:]
                        connections = connections[:j] +')' + connections[j+1:]
                    stack.append((i+1,j-1))
                elif N[i+1,j] == N[i,j]:
                    stack.append((i+1,j))
                elif N[i,j-1] == N[i,j]:
                    stack.append((i,j-1))
                else:
                    for k in range(i+1, j-1):
                        if N[i,k] + N[k+1,j] == N[i,j]:
                            stack.append((i,k))
                            stack.append((k+1,j))
                            break

        return basePairs, connections
    

edges = {('G', 'U'): 1, ('A', 'U'): 2, ('C', 'G'): 3}
seq = 'GGGAAUUU'
nussinov = Nussinov(seq, edges, minHairpinLen=3)
nussinov.findSolutionMat()
basePairs, connections = nussinov.traceback()
print('Base Pairs:', basePairs)
print('Connections:', connections)

[[1 1 1 1 1 2 3 3]
 [1 1 1 1 1 2 2 3]
 [1 1 1 1 1 1 2 2]
 [1 1 1 1 1 1 1 3]
 [1 1 1 1 1 1 1 1]
 [1 1 1 1 1 1 1 1]
 [1 1 1 1 1 1 1 1]
 [1 1 1 1 1 1 1 1]]
Base Pairs: [[0, 7], [1, 6]]
Connections: ((....))


In [6]:
edges = {('G', 'U'): 1, ('A', 'U'): 2, ('C', 'G'): 3}
for i, seq in enumerate(seqs):
    print('Sequence:', seq)
    nussinov = Nussinov(seq, edges, 3)
    nussinov.findSolutionMat()
    basePairs, connections = nussinov.traceback()
    print('Base Pairs:', basePairs)
    print('Connections:', connections)
    pltFile = f'secondStructImgs/struct_rna_{i+1}.png'
    draw_ss('rna', seq, connections, pltFile)

Sequence: UAUUAGGUUGGUGCACAAGUAAUUGCGGUUUUUGCCAAGAAAAGUAAUGGCAAAAACCGCAAUUACUUUUGCACCAGUGUAAUAAUUAGCAUCUUCCGCUAAUCUUUUUC
[[ 1  1  1 ... 94 94 94]
 [ 1  1  1 ... 96 96 94]
 [ 1  1  1 ... 92 92 92]
 ...
 [ 1  1  1 ...  1  1  1]
 [ 1  1  1 ...  1  1  1]
 [ 1  1  1 ...  1  1  1]]
Base Pairs: []
Connections: ..............................................................................................................


TypeError: chdir: path should be string, bytes, os.PathLike or integer, not NoneType