Skip to content

Commit

Permalink
transcript utils
Browse files Browse the repository at this point in the history
  • Loading branch information
jeffhsu3 committed Dec 17, 2015
1 parent 6710143 commit 0d02f38
Show file tree
Hide file tree
Showing 5 changed files with 98 additions and 37 deletions.
12 changes: 11 additions & 1 deletion genda/plotting/plotting_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,9 +239,14 @@ def plot_transcript_i(transcript, ax, y=0, height=2.,
Arguments
=========
transcript - a list of exons (start, end, exon_number)
:TODO transcript could be an object.
ax - matplotlib.axes
y - where to place the base of the transcript on the y-axis
height - height of the boxes of the transcript
ax - matplotlib axes object
:TODO transcript could be an object.
:TODO write tests for this
Returns:
=======
matplotlib.axis
Expand All @@ -268,6 +273,11 @@ def plot_transcript_i(transcript, ax, y=0, height=2.,
return(ax, (beg_exon[0], xmax))


def intron_scaling(transcripts):
""" Scale all features shrinking introns
"""
pass


def add_size_legends(ax, size_base=200):
sizes = [((size_base * i ) + 20) for i in np.linspace(0,0.5, num = 4)]
Expand Down
2 changes: 1 addition & 1 deletion genda/transcripts/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
from .transcript_utils import *
from .transcripts_utils import *

91 changes: 60 additions & 31 deletions genda/transcripts/transcripts_utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from bx.intervals.intersection import Intersecter, Interval


class SNPInfo(object):
Expand Down Expand Up @@ -99,23 +100,30 @@ def unique_sets(set_list):
return(out_sets)





def compare_two_transcripts(trans1, trans2, transcript_dict):
"""
Returns the splice differences between two transcripts.
Note this ignores TSS and just looks for splice junctions
Parameters
----------
:TODO refactor this
:TODO still not finished
trans1 - string of transcript of interest
trans2 - string of second transcript of interest
transcript_dict - a dictionary of transcript names with
values being a list of exons
Returns:
5' upstream exons -
3' downstram exons -
"""

t1 = transcript_dict[trans1]
t2 = transcript_dict[trans2]

tree = Intersecter()
starts1 = [i[0] for i in t1]
starts2 = [i[0] for i in t2]
# Assume sorted
reverse = False
if min(starts1) >= min(starts2):
s1 = t2
Expand All @@ -124,37 +132,58 @@ def compare_two_transcripts(trans1, trans2, transcript_dict):
else:
s1 = t1
s2 = t2
# Assume sorted order, ordering in reverse by construction
if reverse:
torder = (trans2, trans1)
else:
torder = (trans1, trans2)
oc = 0
#start_matches = False
exclusive_juncs = []

for i in s1:
tree.add_interval(Interval(int(i[0]), int(i[1]),
value={'anno':i[2]}))
matching_exons = []
exclusive_juncs = []
skipped_exons = []
for i in s1:
if (i[0] < s2[0][0]) and (i[1] <= s2[0][0]):
# Perform the query
start_of_exons = None
s1.sort(key=lambda x: x[2])
s2.sort(key=lambda x: x[2])
for start, end, exon_n in s2:
start = int(start)
end = int(end)
overlap = tree.find(int(start), int(end))
if len(overlap) == 0:
if start_of_exons:
skipped_exons.append((start, end, (None, exon_n), (0,end-start)))
else: pass
elif len(overlap) == 1:
if start_of_exons: pass
else: start_of_exons = overlap[0].value['anno']
if start == overlap[0].start and end == overlap[0].end:
matching_exons.append((start, end, (overlap[0].value['anno'],
exon_n), (0, 0)))
else:
sstart = min(start, overlap[0].start)
ssend = max(end, overlap[0].end)
exclusive_juncs.append((sstart , ssend,
(overlap[0].value['anno'], exon_n),
(overlap[0].start - start,
overlap[0].end - end)
))
else:
if start_of_exons:
pass
else: start_of_exons = overlap[0].value['anno']
# Checking for skipped exons of t1 missing in t2
hit_exon = [i[2][0] for i in exclusive_juncs]
hit_exon.extend([i[2][0] for i in matching_exons])

for start, end, exon_n in s1:
if exon_n <= start_of_exons:
pass
elif (i[0] < s2[oc][0]) and (i[1] > s2[oc][0]):
# 5' difference
print('5 primt diff')
sdiff = i[0] - s2[oc][0]
ediff = i[1] - s2[oc][1]
print(s2[oc])
exclusive_juncs.append((i[0], s2[oc][0],
sdiff, ediff, (i[2], s2[oc][2])))
oc += 1
elif (i[0] > s2[oc][0]) and (i[0] < s2[oc][1]):
print('3 prime diff')
print(i[0] - s2[oc][0])
print(i)
print(s2[oc])
#:TODO FIX this
oc += 1
elif (i[0] == s2[oc][0]) and (i[1] == s2[oc][1]):
matching_exons.append((i[2], s2[oc][2]))
oc += 1
elif exon_n in hit_exon:
pass
return(exclusive_juncs, torder)
else:
skipped_exons.append((start, end, (exon_n, None), (end-start)) )


return(exclusive_juncs, torder, matching_exons, skipped_exons)
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@ Cython==0.19.1
argparse==1.2.1
numpy==1.7.1
pandas==0.11.0
pysam==0.7.4
pysam==0.8.4
python-dateutil==2.1
28 changes: 25 additions & 3 deletions tests/transcriptTests.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from genda.transcripts import Exon, Gene, Transcript, unique_sets
from genda.transcripts import (Exon, Gene, Transcript, unique_sets,
compare_two_transcripts)
import unittest

class TestGeneBreaks(unittest.TestCase):
Expand Down Expand Up @@ -32,11 +33,32 @@ def setUp(self):
self.set_b = set(['a', 'b'])
self.set_c = set(['a', 'b', 'd'])


def find_sets(self):
self.assertEqual(unique_sets([self.set_a, self.set_b, self.set_c]),
[set(['a', 'b']), set(['c']), set(['d'])])


if __name__ == '__main__':
unittests.main()

class TestCompareTwoTranscripts(unittest.TestCase):
"""
"""
def setUp(self):
self.transcript_dict = {
't1' : [(10, 20, 1),
(50, 60, 2),
(70, 90, 3)],
't2' : [(10, 20, 1), (70, 90, 2)]
}

def test_compare_transcripts_skipped_exon(self):
exclusive_juncs, torder, matching_exons, skipped_exons =\
compare_two_transcripts('t1', 't2', self.transcript_dict)
print(torder)
print(exclusive_juncs)
self.assertEqual([(50, 60, (None, 2), (0, 10)),], skipped_exons)



if __name__ == '__main__':
unittest.main()

0 comments on commit 0d02f38

Please sign in to comment.