## Necessary Packages

In [1]:
import argparse
import re
import cairo

## Argparse function
- Placeholder for use in final python script

In [3]:
def get_arguments():
    parser = argparse.ArgumentParser(
        description="reads in fasta file and splicing regulation motifs, to visualize mapping")
    parser.add_argument("-f", "--filename", help="name of fasta file",
                        required=True, type=str)
    parser.add_argument("-m", "--motifs", help="file containing motifs, 1 per line",
                        required=True, type=str)
    return parser.parse_args()

## Core functions

In [1]:
def parse_fa(fa_file):
    '''(file) -> ?
    This function is going to parse a fasta file where introns appear in lowercase
    and exons appear in uppercase. It will return an intron and an exon dictionary
    where the key represents the order of the segment.'''
    ln = 0
    base = 0
    seg = 0 #counts segment placement for mapping
    exon = ''
    intron = ''
    exons={} #key will be start coordinate, value will be seq, end coordinate
    introns={} #key will be start coordinate, value will be seq, end coordinate
    segments={} #key will be seg, value will be start coordinate
    with open(fa_file) as fa:
        for line in fa:
            ln+=1
            line = line.strip('\n')
            if ln%2 == 1: # header
                header = line.split( )
                    base = 0 #set based on position coordinates
                #id gene?
            if ln%2 == 0: # sequence line
                while base < len(line): 
                    while line[base].isupper():
                        if exon == '':
                            start = base
                        exon += line[base]
                        base += 1
                    if exon != '':
                        fin = base - 1
                        exons[start] = [exon, fin]
                        segments[seg] = start
                        seg += 1
                        exon = ''
                    while line[base].islower():
                        if intron == '':
                            start = base
                        intron += line[base]
                        base += 1
                    if intron != '':
                        fin = base - 1
                        introns[start] = [intron, fin]
                        segments[seg] = start
                        seg += 1
                        intron = ''
    return segments, introns, exons

In [None]:
def id_motif(m_file, introns, exons):
    '''(file, dict, dict) -> dict
    This function takes in a sequence as a string and uses regex to find motifs.
    Motifs are identified and returned with coordinates (start position?).'''
    motif_dict = {} #define acceptable versions of motifs
    motif_coords = {} #dict for motif mapping
    # parse motif file
    with open(m_file) as motifs:
        for motif in motifs:
            motif_dict[motif] = motif
            for base in range(len(motif)): # Populate motif variants of 1 Y or N
                motif_dict[motif[0:base]+'W'+motif[base+1:len(motif)]] = motif
                motif_dict[motif[0:base]+'S'+motif[base+1:len(motif)]] = motif
                motif_dict[motif[0:base]+'M'+motif[base+1:len(motif)]] = motif
                motif_dict[motif[0:base]+'K'+motif[base+1:len(motif)]] = motif
                motif_dict[motif[0:base]+'R'+motif[base+1:len(motif)]] = motif
                motif_dict[motif[0:base]+'Y'+motif[base+1:len(motif)]] = motif
                motif_dict[motif[0:base]+'N'+motif[base+1:len(motif)]] = motif
    # find motifs in introns
    for intron in introns:
        for key in motif_dict:
            # use re.findall to identify motifs
            if motif_dict[key] not in motif_coords:
                motif_coords[motif_dict[key]]= []
            # motif_coords[motif_dict[key]].append(start_coordinates)
    # find motifs in exons
    for exon in exons:
        for key in motif_dict:
            # use re.findall to identify motifs
            if motif_dict[key] not in motif_coords:
                motif_coords[motif_dict[key]]= []
            # motif_coords[motif_dict[key]].append(start_coordinates)
    return motif_coords

In [None]:
def draw_motifs(s_dict, m_dict, i_dict, e_dict):
    '''(dict,dict,dict) -> svg
    This function uses dictionaries generated from parse_fa() and id_motif. Dictionaries 
    are: s_dict (segment dictionary, key = order found, value = true start position),
    m_dict (motif dictionary, key = motif, value = list of start positions),
    i_dict (intron dictionary, key = true start pos, value = sequence, end pos),
    e_dict (exon dictionary, key = true start pos, value = sequence, end pos).
    Function generates an SVG image of the gene including introns,
    exons and motif mapping, using pycairo to draw.'''
    #add code
    return None

# Main function to use core functions

In [1]:
def main():
    '''documentation'''
    seg_dict, intron_dict, exon_dict = parse_fa(args.filename)
    motif_coords = id_motif(args.motifs, intron_dict, exon_dict)
    draw_motifs(seg_dict, motif_coords, intron_dict, exon_dict)
    return None

# Testing functions