Skip to content

Commit

Permalink
more transcript modifications and updated readme
Browse files Browse the repository at this point in the history
  • Loading branch information
jeffhsu3 committed Dec 31, 2015
1 parent 0d02f38 commit ebb7f99
Show file tree
Hide file tree
Showing 4 changed files with 91 additions and 43 deletions.
20 changes: 12 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
What is genda?
==============
genda is still very much in development and should not be used by others

genda is a python package that allows the user to import many varieties of
genetic data and genda will parse the data, converting it to pandas
dataframes. This standardization of data makes it incredibley simple to
Expand All @@ -22,14 +24,16 @@ Allelic Expression Imbalance

Documentation
=============
Documentation, along with tutorials, can be found `here <http://pyseq.rtfd.org>`_.
Documentation, along with tutorials, can be found [here](http://pyseq.rtfd.org).

External Library Requirments:
=============================
- `pandas <http://pandas.pydata.org/>`_
- `matplotlib <http://matplotlib.org/>`_
- `scipy <http://www.scipy.org/>`_
- `cython <http://www.cython.org/>`_
- `numpy <http://http://www.numpy.org/`_


- [numpy] (http://http://www.numpy.org/)
- [pandas] (http://pandas.pydata.org/)
- [matplotlib] (http://matplotlib.org/)
- [scipy] (http://www.scipy.org/)
- [cython] (http://www.cython.org/)
- [bx-python] (https://bitbucket.org/james_taylor/bx-python/wiki/Home)
- [statsmodels] (http://statsmodels.sourceforge.net/)
- [scikit-learn] (http://scikit-learn.org/stable/)
- [pysam] (https://code.google.com/p/pysam/)
54 changes: 39 additions & 15 deletions genda/plotting/plotting_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,11 @@ def make_rectangle(start, end, y1, y2):
return (Path(verts, codes))


def plot_box_(transcript, ax):

path = make_rectangle()
return(path)

def plot_transcript(transcript_id, paths, ax, y=0, height=2.,
full_gene=False):
""" Plot a transcript
Expand All @@ -84,27 +89,38 @@ def plot_transcript(transcript_id, paths, ax, y=0, height=2.,
xmax = None
ps = []
ymin, ymax = ax.get_ylim()
for i in paths[transcript_id]:
i.vertices[:, 1] = np.asarray([y, y + height, y + height, y, y])
if not xmin or not xmax:
try:
for i in paths[transcript_id]:
i.vertices[:, 1] = np.asarray([y, y + height, y + height, y, y])
if not xmin or not xmax:
xmin = np.min(i.vertices[:, 0])
xmax = np.max(i.vertices[:, 0])
else:
if xmin > np.min(i.vertices[:, 0]):
if not xmin or not xmax:
xmin = np.min(i.vertices[:, 0])
elif xmax < np.max(i.vertices[:, 0]):
xmax = np.max(i.vertices[:, 0])
else: pass
ps.append(patches.PathPatch(i, facecolor='darkgray', lw=0))
for patch in ps:
ax.add_patch(patch)
else:
if xmin > np.min(i.vertices[:, 0]):
xmin = np.min(i.vertices[:, 0])
elif xmax < np.max(i.vertices[:, 0]):
xmax = np.max(i.vertices[:, 0])
else: pass
ps.append(patches.PathPatch(i, facecolor='darkgray', lw=0))
for patch in ps:
ax.add_patch(patch)
except AttributeError:
for i, exon in enumerate(paths[transcript_id]):
try:
path = make_rectangle(i[0], i[1], y, y+height)
except IndexError:
path = make_rectangle(i.start, i.end, y, y+height)
patch = patches.PathPatch(path, facecolor='grey', alpha=0.6)
ax.add_patch(patch)
# hlines doesn't in this function but outside it?
#draw_arrows(xmin, xmax, y+height/2.0, ax)
#ax.hlines(y+height/2.0, xmin, xmax, colors='darkgray', lw=2)
return(ax)




def add_gene_bounderies(ax, gene_annot, gene_name, x_scale):
""" Add gene bounderies to an axis
"""
Expand Down Expand Up @@ -233,12 +249,13 @@ def get_path_max_and_min(gene_dict):
return(min(points), max(points))


def plot_transcript_i(transcript, ax, y=0, height=2.,
def plot_transcript_(transcript, ax, y=0, height=2.,
intron_scale=0.10, scale=1):
'''
Arguments
=========
transcript - a list of exons (start, end, exon_number)
transcript - a list of exons (start, end, exon_number) or
a genda.transcripts.Transcript object
ax - matplotlib.axes
y - where to place the base of the transcript on the y-axis
height - height of the boxes of the transcript
Expand Down Expand Up @@ -273,10 +290,17 @@ def plot_transcript_i(transcript, ax, y=0, height=2.,
return(ax, (beg_exon[0], xmax))


def intron_scaling(transcripts):
def _calculate_intron_scaling(transcript):
"""
"""
raise NotImplemented


def intron_scaling(ax):
""" Scale all features shrinking introns
"""
pass
return(ax)


def add_size_legends(ax, size_base=200):
Expand Down
58 changes: 40 additions & 18 deletions genda/transcripts/transcripts_utils.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,13 @@
from itertools import tee, izip
# Requires bx python
from bx.intervals.intersection import Intersecter, Interval


class SNPInfo(object):
""" Docstring

"""

def __init__(self, position):
self.position = position

def __call__(self, position):
pass
def pairwise(iterable):
a, b = tee(iterable)
next(b, None)
return(izip(a,b))


class Exon(object):
Expand All @@ -26,14 +23,18 @@ def __init__(self, region, start, end):
print("Start and End positions need to be integers")



class Transcript(object):
""" A collection of exons
"""


def __init__(self, exons=[]):
self.exons = exons




class Gene(object):
""" A collection of transcripts
"""
Expand All @@ -48,6 +49,7 @@ def hmm(self):
pass



def break_exons(exon1, exon2):
""" Returns a list of new exons
Expand Down Expand Up @@ -147,13 +149,16 @@ def compare_two_transcripts(trans1, trans2, transcript_dict):
start_of_exons = None
s1.sort(key=lambda x: x[2])
s2.sort(key=lambda x: x[2])
max_exon_1 = max([i[2] for i in s1])
max_exon_2 = max([i[2] for i in s2])
for start, end, exon_n in s2:
start = int(start)
end = int(end)
overlap = tree.find(int(start), int(end))
if len(overlap) == 0:
if start_of_exons:
skipped_exons.append((start, end, (None, exon_n), (0,end-start)))
skipped_exons.append((start, end,
(None, exon_n), (0,end-start)))
else: pass
elif len(overlap) == 1:
if start_of_exons: pass
Expand All @@ -164,26 +169,43 @@ def compare_two_transcripts(trans1, trans2, transcript_dict):
else:
sstart = min(start, overlap[0].start)
ssend = max(end, overlap[0].end)
exclusive_juncs.append((sstart , ssend,
(overlap[0].value['anno'], exon_n),
(overlap[0].start - start,
overlap[0].end - end)
))
# Ignore 5' or 3' differences
if (exon_n == max_exon_2 and
overlap[0].value['anno'] == max_exon_1):
pass
else:
exclusive_juncs.append(
(sstart, ssend,
(overlap[0].value['anno'], exon_n),
(overlap[0].start - start, overlap[0].end - end)
)
)
else:
if start_of_exons:
pass
else: start_of_exons = overlap[0].value['anno']
# Checking for skipped exons of t1 missing in t2
hit_exon = [i[2][0] for i in exclusive_juncs]
hit_exon.extend([i[2][0] for i in matching_exons])

for start, end, exon_n in s1:
if exon_n <= start_of_exons:
pass
elif exon_n in hit_exon:
pass
else:
skipped_exons.append((start, end, (exon_n, None), (end-start)) )


return(exclusive_juncs, torder, matching_exons, skipped_exons)


def pairwise_transcript_comparison(transcript_dict):
"""
"""
skipped_exons_out = []
for key1, key2 in pairwise(transcript_dict.keys()):
exclusive_juncs, to, me, skipped_exons = compare_two_transcripts(
key1, key2, transcript_dict)
if len(skipped_exons) >=1:
skipped_exons_out.append((key1, key2))
return(skipped_exons_out)


2 changes: 0 additions & 2 deletions tests/transcriptTests.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,6 @@ def setUp(self):
def test_compare_transcripts_skipped_exon(self):
exclusive_juncs, torder, matching_exons, skipped_exons =\
compare_two_transcripts('t1', 't2', self.transcript_dict)
print(torder)
print(exclusive_juncs)
self.assertEqual([(50, 60, (None, 2), (0, 10)),], skipped_exons)


Expand Down

0 comments on commit ebb7f99

Please sign in to comment.