plotting util and transcript changes

jeffhsu3 · Dec 3, 2015 · 6710143 · 6710143
1 parent 0858dc2
commit 6710143
Show file tree

Hide file tree

Showing 3 changed files with 211 additions and 97 deletions.
diff --git a/genda/plotting/plotting_utils.py b/genda/plotting/plotting_utils.py
@@ -198,7 +198,7 @@ def create_gene_path(gtf_iter, gene, x_scale):
     -------
     None
     """
-    pass
+    raise NotImplementedError
 
 
 def draw_arrows(xmin, xmax, y, ax, spacing=0.001):
@@ -231,3 +231,52 @@ def get_path_max_and_min(gene_dict):
             for k in j.vertices:
                 points.append(k[0])
     return(min(points), max(points))
+
+
+def plot_transcript_i(transcript, ax, y=0, height=2., 
+        intron_scale=0.10, scale=1):
+    '''
+    Arguments
+    =========
+    transcript - a list of exons (start, end, exon_number)
+    :TODO transcript could be an object.  
+    ax - matplotlib axes object
+
+    Returns:
+    =======
+    matplotlib.axis
+    new_scaled_ylim and xlim
+    '''
+    xmin, xmax = 1e12, 0
+    ymin, ymax = ax.get_ylim()
+    beg_exon = transcript[0]
+    path = make_rectangle(beg_exon[0], beg_exon[1], y, y+height)
+    patch = patches.PathPatch(path, facecolor='grey', alpha=0.6)
+    ax.add_patch(patch)
+    rx = transcript[0][1]
+    for i, exon in enumerate(transcript[1:]):
+        intron_l = (exon[0] - transcript[i - 1][1]) * intron_scale
+        exon_length = exon[1] - exon[0]
+        ns = rx + intron_l
+        ne = rx + exon_length
+        path = make_rectangle(ns, ne, y, y+height)
+        patch = patches.PathPatch(path, facecolor='grey', alpha=0.6)
+        ax.add_patch(patch)
+        rx = rx + intron_l + exon_length
+        if ns < xmin: xmin = ns
+        if ne > xmax: xmax = ne
+    return(ax, (beg_exon[0], xmax))
+
+
+
+def add_size_legends(ax, size_base=200):
+    sizes = [((size_base * i ) + 20) for i in np.linspace(0,0.5, num = 4)]
+    l1 = ax.scatter([],[], s=sizes[0])
+    l2 = ax.scatter([],[], s=sizes[1])
+    l3 = ax.scatter([],[], s=sizes[2])
+    l4 = ax.scatter([],[], s=sizes[3])
+    leg = ax.legend([l1, l2, l3, l4], 
+            labels = np.around(np.linspace(0, 0.5, num=4), decimals=2),
+            fontsize=14, loc=0,
+            frameon=True, title='maf', scatterpoints=1)
+    return(ax)
diff --git a/genda/transcripts/__init__.py b/genda/transcripts/__init__.py
@@ -1,97 +1,2 @@
-import numpy as np
-
-
-class SNPInfo(object):
-    """ Docstring
-
-    """
-
-    def __init__(self, position):
-        self.position = position
-
-    def __call__(self, position):
-        pass
-
-class Exon(object):
-    """ A region
-    """
-
-    def __init__(self, region, start, end):
-        self.region = region
-        try:
-            self.start = int(start)
-            self.end = int(end)
-        except ValueError:
-            print("Start and End positions need to be integers")
-
-class Transcript(object):
-    """ A collection of exons
-    """
-
-    def __init__(self, exons=[]):
-       self.exons = exons
-
-
-class Gene(object):
-    """ A collection of transcripts
-    """
-
-    def __init__(self, transcripts=[]):
-        self.transcripts = []
-
-    def _unique_junctions(self):
-        pass
-
-    def hmm(self):
-        pass
-
-def break_exons(exon1, exon2):
-    """ Returns a list of new exons
-
-    Breaks a interval into 2 intervals if it overlaps, upstream exon always
-    comes first.
-
-    """
-    if exon1.region == exon2.region:
-        if exon2.start > exon1.start and exon2.end < exon1.end:
-            # Case where exon1 is wholly enclosed in exon2
-            return([Exon(exon1.region, exon1.start, exon2.start),
-                    Exon(exon1.region, exon2.start, exon2.end),
-                    Exon(exon1.region, exon2.end, exon1.end)])
-        elif exon2.end < exon1.end:
-            return([Exon(exon1.region, exon1.start, exon2.start),
-                    Exon(exon1.region, exon2.start, exon1.end),
-                    Exon(exon1.region, exon1.end, exon2.end)])
-        else:
-            # Do nothing if there is no overlap
-            return(exon1, exon2)
-    else:
-        pass
-
-def unique_sets(set_list):
-    """ Returns a list of sets
-    Calculates the unique regions defined such that only the same elements
-    are bound to it.
-    """
-
-    # Since most exons are shared, remove the most common elements first
-    common_set = reduce( lambda x, y: x & y, list_of_sets)
-    rs = [i - common_set for i in list_of_sets]
-    out_sets = []
-    all_unique = True
-    n = len(set_list)
-    for i in set_list:
-        intersection = [t & i for t in set_list]
-        # uniqufy the intersections
-        u_s = set(intersection)
-        for t in u_s:
-            pass
-
-
-        set_list = [i]
-    while len(rs) > 0:
-        intersection = [t & i for i in set_list]
-        out_sets.append(i)
-
-    return(out_sets)
+from .transcript_utils import *
 
diff --git a/genda/transcripts/transcripts_utils.py b/genda/transcripts/transcripts_utils.py
@@ -0,0 +1,160 @@
+
+
+class SNPInfo(object):
+    """ Docstring
+
+    """
+
+    def __init__(self, position):
+        self.position = position
+
+    def __call__(self, position):
+        pass
+
+
+class Exon(object):
+    """ A region
+    """
+
+    def __init__(self, region, start, end):
+        self.region = region
+        try:
+            self.start = int(start)
+            self.end = int(end)
+        except ValueError:
+            print("Start and End positions need to be integers")
+
+
+class Transcript(object):
+    """ A collection of exons
+    """
+
+    def __init__(self, exons=[]):
+       self.exons = exons
+
+
+class Gene(object):
+    """ A collection of transcripts
+    """
+
+    def __init__(self, transcripts=[]):
+        self.transcripts = []
+
+    def _unique_junctions(self):
+        pass
+
+    def hmm(self):
+        pass
+
+
+def break_exons(exon1, exon2):
+    """ Returns a list of new exons
+
+    Breaks a interval into 2 intervals if it overlaps, upstream exon always
+    comes first.
+
+    """
+    if exon1.region == exon2.region:
+        if exon2.start > exon1.start and exon2.end < exon1.end:
+            # Case where exon1 is wholly enclosed in exon2
+            return([Exon(exon1.region, exon1.start, exon2.start),
+                    Exon(exon1.region, exon2.start, exon2.end),
+                    Exon(exon1.region, exon2.end, exon1.end)])
+        elif exon2.end < exon1.end:
+            return([Exon(exon1.region, exon1.start, exon2.start),
+                    Exon(exon1.region, exon2.start, exon1.end),
+                    Exon(exon1.region, exon1.end, exon2.end)])
+        else:
+            # Do nothing if there is no overlap
+            return(exon1, exon2)
+    else:
+        pass
+
+
+def unique_sets(set_list):
+    """ Returns a list of sets
+    Calculates the unique regions defined such that only the same elements
+    are bound to it.
+    """
+
+    # Since most exons are shared, remove the most common elements first
+    common_set = reduce( lambda x, y: x & y, list_of_sets)
+    rs = [i - common_set for i in list_of_sets]
+    out_sets = []
+    all_unique = True
+    n = len(set_list)
+    for i in set_list:
+        intersection = [t & i for t in set_list]
+        # uniqufy the intersections
+        u_s = set(intersection)
+        for t in u_s:
+            pass
+
+
+        set_list = [i]
+    while len(rs) > 0:
+        intersection = [t & i for i in set_list]
+        out_sets.append(i)
+
+    return(out_sets)
+
+
+def compare_two_transcripts(trans1, trans2, transcript_dict):
+    """
+    Returns the splice differences between two transcripts.
+    Note this ignores TSS and just looks for splice junctions
+
+    Parameters
+    ----------
+    :TODO refactor this
+    :TODO still not finished
+    """
+
+    t1 = transcript_dict[trans1]
+    t2 = transcript_dict[trans2]
+
+    starts1 = [i[0] for i in t1]
+    starts2 = [i[0] for i in t2]
+    # Assume sorted
+    reverse = False
+    if min(starts1) >= min(starts2):
+        s1 = t2 
+        s2 = t1
+        reverse = True
+    else:
+        s1 = t1
+        s2 = t2
+    # Assume sorted order, ordering in reverse by construction
+    if reverse:
+        torder = (trans2, trans1)
+    else:
+        torder = (trans1, trans2)
+    oc = 0
+    #start_matches = False
+    exclusive_juncs = []
+    matching_exons = []
+    skipped_exons = []
+    for i in s1:
+        if (i[0] < s2[0][0]) and (i[1] <= s2[0][0]): 
+            pass
+        elif (i[0] < s2[oc][0]) and (i[1] > s2[oc][0]):
+            # 5' difference
+            print('5 primt diff')
+            sdiff = i[0] - s2[oc][0]
+            ediff = i[1] - s2[oc][1]
+            print(s2[oc])
+            exclusive_juncs.append((i[0], s2[oc][0], 
+                sdiff, ediff, (i[2], s2[oc][2])))
+            oc += 1
+        elif (i[0] > s2[oc][0]) and (i[0] < s2[oc][1]):
+            print('3 prime diff')
+            print(i[0] - s2[oc][0])
+            print(i)
+            print(s2[oc])
+            #:TODO FIX this
+            oc += 1
+        elif (i[0] == s2[oc][0]) and (i[1] == s2[oc][1]):
+            matching_exons.append((i[2], s2[oc][2]))
+            oc += 1
+            pass
+    return(exclusive_juncs, torder)