more transcript modifications and updated readme

jeffhsu3 · Dec 31, 2015 · ebb7f99 · ebb7f99
1 parent 0d02f38
commit ebb7f99
Show file tree

Hide file tree

Showing 4 changed files with 91 additions and 43 deletions.
diff --git a/README.md b/README.md
@@ -1,5 +1,7 @@
 What is genda?
 ==============
+genda is still very much in development and should not be used by others
+
 genda is a python package that allows the user to import many varieties of 
 genetic data and genda will parse the data, converting it to pandas 
 dataframes. This standardization of data makes it incredibley simple to 
@@ -22,14 +24,16 @@ Allelic Expression Imbalance
 
 Documentation
 =============
-Documentation, along with tutorials, can be found `here <http://pyseq.rtfd.org>`_.
+Documentation, along with tutorials, can be found [here](http://pyseq.rtfd.org).
 
 External Library Requirments:
 =============================
-    - `pandas <http://pandas.pydata.org/>`_
-    - `matplotlib <http://matplotlib.org/>`_
-    - `scipy <http://www.scipy.org/>`_
-    - `cython <http://www.cython.org/>`_
-    - `numpy <http://http://www.numpy.org/`_
-
-
+    - [numpy] (http://http://www.numpy.org/)
+    - [pandas] (http://pandas.pydata.org/)
+    - [matplotlib] (http://matplotlib.org/)
+    - [scipy] (http://www.scipy.org/)
+    - [cython] (http://www.cython.org/)
+    - [bx-python] (https://bitbucket.org/james_taylor/bx-python/wiki/Home)
+    - [statsmodels] (http://statsmodels.sourceforge.net/)
+    - [scikit-learn] (http://scikit-learn.org/stable/)
+    - [pysam] (https://code.google.com/p/pysam/)
diff --git a/genda/plotting/plotting_utils.py b/genda/plotting/plotting_utils.py
@@ -76,6 +76,11 @@ def make_rectangle(start, end, y1, y2):
     return (Path(verts, codes))
 
 
+def plot_box_(transcript, ax):
+
+    path = make_rectangle()
+    return(path)
+
 def plot_transcript(transcript_id, paths, ax, y=0, height=2., 
         full_gene=False):
     """ Plot a transcript
@@ -84,27 +89,38 @@ def plot_transcript(transcript_id, paths, ax, y=0, height=2.,
     xmax = None
     ps = []
     ymin, ymax = ax.get_ylim()
-    for i in paths[transcript_id]:
-        i.vertices[:, 1] = np.asarray([y, y + height, y + height, y, y])
-        if not xmin or not xmax:
+    try:
+        for i in paths[transcript_id]:
+            i.vertices[:, 1] = np.asarray([y, y + height, y + height, y, y])
             if not xmin or not xmax:
-                xmin = np.min(i.vertices[:, 0])
-                xmax = np.max(i.vertices[:, 0])
-            else:
-                if xmin > np.min(i.vertices[:, 0]):
+                if not xmin or not xmax:
                     xmin = np.min(i.vertices[:, 0])
-                elif xmax < np.max(i.vertices[:, 0]):
                     xmax = np.max(i.vertices[:, 0])
-                else: pass
-        ps.append(patches.PathPatch(i, facecolor='darkgray', lw=0))
-    for patch in ps:
-        ax.add_patch(patch)
+                else:
+                    if xmin > np.min(i.vertices[:, 0]):
+                        xmin = np.min(i.vertices[:, 0])
+                    elif xmax < np.max(i.vertices[:, 0]):
+                        xmax = np.max(i.vertices[:, 0])
+                    else: pass
+            ps.append(patches.PathPatch(i, facecolor='darkgray', lw=0))
+        for patch in ps:
+            ax.add_patch(patch)
+    except AttributeError:
+        for i, exon in enumerate(paths[transcript_id]):
+            try:
+                path = make_rectangle(i[0], i[1], y, y+height)
+            except IndexError:
+                path = make_rectangle(i.start, i.end, y, y+height)
+            patch = patches.PathPatch(path, facecolor='grey', alpha=0.6)
+            ax.add_patch(patch)
     # hlines doesn't in this function but outside it?
     #draw_arrows(xmin, xmax, y+height/2.0, ax)
     #ax.hlines(y+height/2.0, xmin, xmax, colors='darkgray', lw=2)
     return(ax)
 
 
+
+
 def add_gene_bounderies(ax, gene_annot, gene_name, x_scale):
     """ Add gene bounderies to an axis
     """
@@ -233,12 +249,13 @@ def get_path_max_and_min(gene_dict):
     return(min(points), max(points))
 
 
-def plot_transcript_i(transcript, ax, y=0, height=2., 
+def plot_transcript_(transcript, ax, y=0, height=2., 
         intron_scale=0.10, scale=1):
     '''
     Arguments
     =========
-    transcript - a list of exons (start, end, exon_number)
+    transcript - a list of exons (start, end, exon_number) or
+    a genda.transcripts.Transcript object
     ax - matplotlib.axes
     y - where to place the base of the transcript on the y-axis
     height - height of the boxes of the transcript
@@ -273,10 +290,17 @@ def plot_transcript_i(transcript, ax, y=0, height=2.,
     return(ax, (beg_exon[0], xmax))
 
 
-def intron_scaling(transcripts):
+def _calculate_intron_scaling(transcript):
+    """
+    """
+    raise NotImplemented
+
+
+def intron_scaling(ax):
     """ Scale all features shrinking introns
     """
     pass
+    return(ax)
 
 
 def add_size_legends(ax, size_base=200):

diff --git a/genda/transcripts/transcripts_utils.py b/genda/transcripts/transcripts_utils.py
@@ -1,16 +1,13 @@
+from itertools import tee, izip
+# Requires bx python
 from bx.intervals.intersection import Intersecter, Interval
 
 
-class SNPInfo(object):
-    """ Docstring
 
-    """
-
-    def __init__(self, position):
-        self.position = position
-
-    def __call__(self, position):
-        pass
+def pairwise(iterable):
+    a, b = tee(iterable)
+    next(b, None)
+    return(izip(a,b))
 
 
 class Exon(object):
@@ -26,14 +23,18 @@ def __init__(self, region, start, end):
             print("Start and End positions need to be integers")
 
 
+
 class Transcript(object):
     """ A collection of exons
     """
 
+
     def __init__(self, exons=[]):
        self.exons = exons
 
 
+
+
 class Gene(object):
     """ A collection of transcripts
     """
@@ -48,6 +49,7 @@ def hmm(self):
         pass
 
 
+
 def break_exons(exon1, exon2):
     """ Returns a list of new exons
 
@@ -147,13 +149,16 @@ def compare_two_transcripts(trans1, trans2, transcript_dict):
     start_of_exons = None
     s1.sort(key=lambda x: x[2])
     s2.sort(key=lambda x: x[2])
+    max_exon_1 = max([i[2] for i in s1])
+    max_exon_2 = max([i[2] for i in s2])
     for start, end, exon_n in s2:
         start = int(start)
         end = int(end)
         overlap = tree.find(int(start), int(end))
         if len(overlap) == 0:
             if start_of_exons:
-                skipped_exons.append((start, end, (None, exon_n), (0,end-start)))
+                skipped_exons.append((start, end, 
+                    (None, exon_n), (0,end-start)))
             else: pass
         elif len(overlap) == 1:
             if start_of_exons: pass
@@ -164,26 +169,43 @@ def compare_two_transcripts(trans1, trans2, transcript_dict):
             else:
                 sstart = min(start, overlap[0].start)
                 ssend = max(end, overlap[0].end)
-                exclusive_juncs.append((sstart , ssend,
-                                        (overlap[0].value['anno'], exon_n), 
-                                        (overlap[0].start - start, 
-                                        overlap[0].end - end) 
-                                        ))
+                # Ignore 5' or 3' differences
+                if (exon_n == max_exon_2 and
+                        overlap[0].value['anno'] == max_exon_1):
+                    pass
+                else:
+                    exclusive_juncs.append(
+                            (sstart, ssend,
+                            (overlap[0].value['anno'], exon_n), 
+                            (overlap[0].start - start, overlap[0].end - end) 
+                            )
+                    )
         else:
             if start_of_exons:
                 pass
             else: start_of_exons = overlap[0].value['anno']
     # Checking for skipped exons of t1 missing in t2
     hit_exon = [i[2][0] for i in exclusive_juncs] 
     hit_exon.extend([i[2][0] for i in matching_exons])
-
     for start, end, exon_n in s1:
         if exon_n <= start_of_exons:
             pass
         elif exon_n in hit_exon:
             pass
         else:
             skipped_exons.append((start, end, (exon_n, None), (end-start)) ) 
-
-
     return(exclusive_juncs, torder, matching_exons, skipped_exons)
+
+
+def pairwise_transcript_comparison(transcript_dict):
+    """
+    """
+    skipped_exons_out = []
+    for key1, key2 in pairwise(transcript_dict.keys()):
+        exclusive_juncs, to, me, skipped_exons = compare_two_transcripts(
+                key1, key2, transcript_dict)
+        if len(skipped_exons) >=1:
+            skipped_exons_out.append((key1, key2))
+    return(skipped_exons_out)
+
+
diff --git a/tests/transcriptTests.py b/tests/transcriptTests.py
@@ -54,8 +54,6 @@ def setUp(self):
     def test_compare_transcripts_skipped_exon(self):
         exclusive_juncs, torder, matching_exons, skipped_exons =\
                 compare_two_transcripts('t1', 't2', self.transcript_dict)
-        print(torder)
-        print(exclusive_juncs)
         self.assertEqual([(50, 60, (None, 2), (0, 10)),], skipped_exons)