# SSEQ: Subsequences and Spliced Motifs

In [111]:
input_file = '/home/hanuman/docs/biomatics/rosalind/SSEQ/input_1.txt'
input_file = '/home/hanuman/docs/biomatics/rosalind/SSEQ/input_2.txt'

## FASTA parser

In [112]:
def FASTA_iterator(fasta_filename):

    """
    A generator function that reads a FASTA file. At each iteration, the
    function must return a tuple with the format (identifier, sequence).
    """

    with open(fasta_filename, 'rt') as fasta:
        sequence = ''
        identifier = ''
        my_list = []
        for line in fasta:
            if (line[0] == '>'):
                if (sequence != ''):
                    my_tuple = (identifier, sequence)
                    yield(my_tuple)
                    identifier= ''
                    sequence= ''
                identifier = line[1:].strip()
            else:
                sequence += line.strip()
        my_tuple = (identifier, sequence)
        yield(my_tuple)

## The following code is suited for an exhaustive search (far beyond the goal of the problem)

In [113]:
def subind(sub, seq):
    positions = [[] for j in range(len(sub))]
    indices = list(enumerate(seq))
    for j in range(len(positions)):
        w = [i for i, v in indices if sub[j] == v]
        positions[j] += w
    return positions

def sorted_selection_exhaustive(index_list):
    outcome = []
    queue = [[]]
    while len(queue) > 0:
        w = queue.pop(0)
        k = len(w)
        if k > len(index_list) - 1:
            outcome.append(w)
        else:
            for i in index_list[k]:
                if w == [] or (i > w[-1]):
                    queue.append(w + [i])
    return outcome

def sorted_selection_depth(index_list):
    queue = [[]]
    while len(queue) > 0:
        w = queue.pop()
        k = len(w)
        if k > len(index_list) - 1:
            return w
        else:
            for i in index_list[k]:
                if w == [] or (i > w[-1]):
                    queue.append(w + [i])

## The following code is a swift algorithm to find just any suitable index subset

In [114]:
def submotif(sub, seq):
    j = 0
    m = len(sub)
    outcome = []
    indices = enumerate(seq)
    for i, v in indices:
        if j < m:
            if v == sub[j]:
                outcome.append(i)
                j += 1
    return outcome

## Main

In [116]:
def spliced_motifs(input_file, function):
    g = FASTA_iterator(input_file)
    seq = next(g)[1]
    sub = next(g)[1]
    w = function(sub, seq)
    w = [i+1 for i in w]
    w = list(map(str, w))
    print(' '.join(w))

In [117]:
spliced_motifs(input_file, submotif)

1 6 7 12 16 26 41 42 43 44 46 53 64 68 69 72 84 85 87 89 94 114 118 121 122 124 126 128 132 133 135 136 139 141 142 146 153 157 159 164 168 187 188 195 196 204 211 213 217 218 219 222 227 228 229 234 239 247 248 250 257 262 266 268 269 270 280 282 284 286 290 292 296 301 304 319 331 334 336 337 338 341 344 349 351 352 368 371 372 375 378 382
