In [150]:
import cairo
import numpy as np
import matplotlib.colors as mplc

In [248]:
def setup_surface(n):
    '''draws a surface for the number of fasta entries'''
    surface = cairo.SVGSurface("example.svg", 300, 200)
    context = cairo.Context(surface)
    context.scale(300, 200)
    
    global centers
    
    centers = []
    top = 1
    centers.append(top/(n+1))
    
    while top < n:
        top += 1
        centers.append(top/(n+1))
        
    return surface, context


def draw_exon(exon, center):
    '''draws exons from a string of coordinates "start:stop"'''
    r,g,b,a = mplc.to_rgba('black')
    context.set_source_rgba(r,g,b,a)
    exon = (int(exon.split(':')[0]), int(exon.split(':')[1]))
    context.rectangle(exon[0]/length,(center-0.02), exon[1]/length ,0.04)
    
    return context.stroke()


def draw_motif(motif, col, centers):
    '''draws motifs of the desired color on the sequence region'''
    
    # use mpl color function to get the color you want
    r,g,b,a = mplc.to_rgba(col)
    
    # set that color as the context
    context.set_source_rgba(r,g,b,a)
    
    # get the first coordinate of the motif
    motif = int(motif)
    
    x = motif/length
    y = center + 20/length
    y1 = center - 20/length
    
    # create line
    context.move_to(x,y)
    context.line_to(x,y1)
    
    # return stroke
    return context.stroke()

def draw_motifs(motif_list, col, centers):
    for m in motifs:
        draw_motif(m, col, centers)


def draw_seq_regions(lengths, centers):
    # sequence lines across figure
    for l,c in zip(lengths, centers):
        x, x2 = 0, (l/1000)
        y, y2 = c, c
        context.move_to(x, y)
        context.set_line_width(0.01)
        context.line_to(x2, y2)
    return context.stroke()


def draw_svg():
    
    '''draw the svg with the right centers and lengths'''
    
    # setup the surface and the context
    surface, context = setup_surface(3)
    
    # draw the scaled regions for each entry
    regions = draw_seq_regions(lengths, centers)
    
    for cen in centers:
        # draw the exon for each center 
        draw_exon(exons, centers[1])

        # draw the motifs for each center
        draw_motifs(motifs, 'r', centers[1])

    return regions, surface.finish()

motifs  = [('7', '130', '147', '217', '338', '379'), ('17', '414', '465'), ('37', '49', '117', '169', '196', '428')]

lengths = (857, 723, 432)

exons = ('200:300', '500:300', '70:100')

draw_svg()

from IPython.display import SVG, display
SVG('example.svg')

Error: the target surface has been finished

In [238]:
centers[1]

0.5

In [131]:
## Object oriented play time, let's implement this for motif_mark

class to_do:
    '''To do list'''
    def __init__(self, store, my_list):
        self.store = 'TJS'
        self.list = my_list
        
    def add_item_to_list(self,item):
        self.list.append(item)

my_list = ['apples', 'cookies', 'milk']

td = to_do('TJS', my_list)

print(td.list)

td.add_item_to_list('milk')

print(td.list)


['apples', 'cookies', 'milk']
['apples', 'cookies', 'milk', 'milk']


In [75]:
st = "asdfa;lsdf;lkajs;dlfjkhlkjCGCChsdfoiuwerASygcyDFBBTGCATGCBTugcuTGGTGAGGTTAAAGTGCTGTGCGTasdfhlkwerltgcatgasdflksdf"

In [242]:
# def find_exon(string):
#     coordinates = []
#     for i, char in enumerate(st):
#         if char.isupper():
#             coordinates.append(i)
#     return (min(coordinates),max(coordinates))

# find_exon(st)

# motif list
motif_list
def motif_patterns(motif_list):
    'returns list of regex searchs for any motif of IUPAC nucleotide codes'
    motif_match_list = []
    
    IUPAC_dict = {'y':'(C|T|U)',
                  'r':'(A|G)',
                  's':'(C|G)',
                  'w':'(A|T|U)',
                  'k':'(G|T|U)',
                  'm':'(C|A)',
                  'b':'(C|G|T|U)',
                  'd':'(A|G|T|U)',
                  'v':'(A|C|G)',
                  'h':'(A|C|T|U)'
                 }
    
    for tif in motif_list:
        for key in IUPAC_dict:
            m = re.finditer(r'(?i)'+key, tif)
            for i in m:
                tif = tif.replace(i.group(), IUPAC_dict[key])
    
        motif_match_list.append(tif)
        
    return motif_match_list
        

motif_patterns(motif_list)

           

import re

def motif_coords(motif):
    'returns string coordinates of motif match (case insensitive)'
    motif_coords_list = []
    m = re.finditer(r'(?i)'+motif, st)
    for i in m:
        motif_coords_list.append(str(i.start())+':'+str(i.end()))
        
    return motif_coords_list

motif_coords('(C|T|U)gc(C|T|U)')

['26:30', '59:63', '77:81']


In [155]:
def get_motif_list(motifile):
    '''return list of motifs from motif file'''
    with open(motifile, 'r') as mh:

        # list to hold motifs of interest
        motifs = []

        # loop over each line storing each motif in list
        for line in mh: 
            motifs.append(line.strip())

        return motifs
    
motif_list = get_motif_list('motifs.txt')

In [97]:
def get_seq_regions(fastafile):
    with open(fastafile, 'r') as fh:
        exon_dict = {}
        for line in fh:
            line = line.strip()
            if line[0] == '>':
                header = line
                seq = '' 
            elif line[0] != '>':  
                seq += line
                exon_dict[header] = seq

{'>ADD3 chr10:111891895-111892326': 'aatgtataattatggatatatgggataactgttagcatgctcagctcactgctgaagaatttatcatctctttgtatacaggcatttgatgtatgcactaacctccctaaaatcatatgctgctttgttttgttttgcatggcttttaactaaactcttatccaacagATGCTGAGCAGGAATTACTCTCAGATGACGCTTCATCTGTTTCACAAATTCAGTCTCAAACTCAGTCACCGCAAAATGTCCCTGAAAAATTAGAAGgtactcaatgtaatttcccacatagcattcactgagttagtcttgagtctgtccctctgtgttttgttttcacgtgaggaagttgaatacctcatcacagtaagttttccatattttacttatatctcccaataattacatattttatatcattaaaaatggggcgct',
 '>INSR chr19:7149896-7151209 (reverse complement)': 'aaaattctgccagacttggagaagtggctgagtcagttgtgatgtccacatgtagtcacgtttgacatcccagggccacctcagcaggccgtctctggggagaattttctctgatttcttccccttcccttgctggacccctgcacctgctggggaagatgtagctcactccgtctagcaagtgatgggagcgagtggtccagggtcaaagccagggtgcccttactcggacacatgtggcctccaagtgtcagagcccagtggtctgtctaatgaagttccctctgtcctcaaaggcgttggttttgtttccacagAAAAACCTCTTCAGGCACTGGTGCCGAGGACCCTAGgtatgactcacctgtgcgacccctggtgcctgctccgcgcagggccggcggcgtgccaggcagatgcctcggagaacccaggggtttctgtggctttttgcatgcggcgggcagctgtgct

In [4]:
li = ['a','b','r','t']
li2 = ''.join(li)
print(li2)

abrt
