In [5]:
import queue

In [11]:
def greedy_max_parings(s, T, return_pairings=False):
    """    
    Very simple algorithm. It reads the string left to right, and for each read it sees if it has
    seen a match for that letter earlier (which is maintained in a queue for that letter that matches). If the 
    queue is empty, no match is possible, so add the current letter to it's queue
    
    Conceptually, the max base pairs for any string is min(count(A), count(U)) + min(count(C), count(G))
    This algorithm finds the pairings greedily, without care for pseudoknots
    """
    letter_to_queue = {k: queue.Queue() for k in T}
    
    num_pairings = 0
    pairings = []
    
    for i, l in enumerate(s):
        match = T[l] # complement
        if not letter_to_queue[match].empty():
            # match current letter by popping it from the queue for the match letter
            idx = letter_to_queue[match].get()
            pairings.append((i, idx))
            num_pairings += 1
        else:
            # store index in the queue
            letter_to_queue[l].put(i)
    
    if return_pairings:
        return pairings
    
    return num_pairings


def max_pairings_possible(s, T):
    """
    Computes the max pairings possible for a string
    
    Should always equal greedy max pairings
    """
    
    letter_to_count = {k:0 for k in T}
    
    for l in s:
        letter_to_count[l] += 1
        
    seen = set()
    count = 0
    for l in T:
        if l in seen:
            continue
        
        # process the letter and its match
        seen.add(l)
        seen.add(T[l])
        
        count += min(letter_to_count[l], letter_to_count[T[l]])
        
    return count
        

In [12]:
T = {
    'A': 'U',
    'G': 'C',
    'C': 'G',
    'U': 'A',
}

In [13]:
# clear example of something that Nussinov would get 1 but greedy 2
s = 'ACUG'
greedy_max_parings(s, T)

2

In [14]:
s = 'GCUCGGGUUCCCUAUUCAAGAGC'
greedy_max_parings(s, T), max_pairings_possible(s, T)

(10, 10)

In [16]:
s = 'GCACGACG'
greedy_max_parings(s, T), max_pairings_possible(s, T)

(3, 3)

In [17]:
# should have 7 maximum possible
s = 'A ACUG GAUC GGUUCA'.replace(' ', '')
greedy_max_parings(s, T), max_pairings_possible(s, T)

(7, 7)