Skip to content

Commit

Permalink
plotting util and transcript changes
Browse files Browse the repository at this point in the history
  • Loading branch information
jeffhsu3 committed Dec 3, 2015
1 parent 0858dc2 commit 6710143
Show file tree
Hide file tree
Showing 3 changed files with 211 additions and 97 deletions.
51 changes: 50 additions & 1 deletion genda/plotting/plotting_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ def create_gene_path(gtf_iter, gene, x_scale):
-------
None
"""
pass
raise NotImplementedError


def draw_arrows(xmin, xmax, y, ax, spacing=0.001):
Expand Down Expand Up @@ -231,3 +231,52 @@ def get_path_max_and_min(gene_dict):
for k in j.vertices:
points.append(k[0])
return(min(points), max(points))


def plot_transcript_i(transcript, ax, y=0, height=2.,
intron_scale=0.10, scale=1):
'''
Arguments
=========
transcript - a list of exons (start, end, exon_number)
:TODO transcript could be an object.
ax - matplotlib axes object
Returns:
=======
matplotlib.axis
new_scaled_ylim and xlim
'''
xmin, xmax = 1e12, 0
ymin, ymax = ax.get_ylim()
beg_exon = transcript[0]
path = make_rectangle(beg_exon[0], beg_exon[1], y, y+height)
patch = patches.PathPatch(path, facecolor='grey', alpha=0.6)
ax.add_patch(patch)
rx = transcript[0][1]
for i, exon in enumerate(transcript[1:]):
intron_l = (exon[0] - transcript[i - 1][1]) * intron_scale
exon_length = exon[1] - exon[0]
ns = rx + intron_l
ne = rx + exon_length
path = make_rectangle(ns, ne, y, y+height)
patch = patches.PathPatch(path, facecolor='grey', alpha=0.6)
ax.add_patch(patch)
rx = rx + intron_l + exon_length
if ns < xmin: xmin = ns
if ne > xmax: xmax = ne
return(ax, (beg_exon[0], xmax))



def add_size_legends(ax, size_base=200):
sizes = [((size_base * i ) + 20) for i in np.linspace(0,0.5, num = 4)]
l1 = ax.scatter([],[], s=sizes[0])
l2 = ax.scatter([],[], s=sizes[1])
l3 = ax.scatter([],[], s=sizes[2])
l4 = ax.scatter([],[], s=sizes[3])
leg = ax.legend([l1, l2, l3, l4],
labels = np.around(np.linspace(0, 0.5, num=4), decimals=2),
fontsize=14, loc=0,
frameon=True, title='maf', scatterpoints=1)
return(ax)
97 changes: 1 addition & 96 deletions genda/transcripts/__init__.py
Original file line number Diff line number Diff line change
@@ -1,97 +1,2 @@
import numpy as np


class SNPInfo(object):
""" Docstring
"""

def __init__(self, position):
self.position = position

def __call__(self, position):
pass

class Exon(object):
""" A region
"""

def __init__(self, region, start, end):
self.region = region
try:
self.start = int(start)
self.end = int(end)
except ValueError:
print("Start and End positions need to be integers")

class Transcript(object):
""" A collection of exons
"""

def __init__(self, exons=[]):
self.exons = exons


class Gene(object):
""" A collection of transcripts
"""

def __init__(self, transcripts=[]):
self.transcripts = []

def _unique_junctions(self):
pass

def hmm(self):
pass

def break_exons(exon1, exon2):
""" Returns a list of new exons
Breaks a interval into 2 intervals if it overlaps, upstream exon always
comes first.
"""
if exon1.region == exon2.region:
if exon2.start > exon1.start and exon2.end < exon1.end:
# Case where exon1 is wholly enclosed in exon2
return([Exon(exon1.region, exon1.start, exon2.start),
Exon(exon1.region, exon2.start, exon2.end),
Exon(exon1.region, exon2.end, exon1.end)])
elif exon2.end < exon1.end:
return([Exon(exon1.region, exon1.start, exon2.start),
Exon(exon1.region, exon2.start, exon1.end),
Exon(exon1.region, exon1.end, exon2.end)])
else:
# Do nothing if there is no overlap
return(exon1, exon2)
else:
pass

def unique_sets(set_list):
""" Returns a list of sets
Calculates the unique regions defined such that only the same elements
are bound to it.
"""

# Since most exons are shared, remove the most common elements first
common_set = reduce( lambda x, y: x & y, list_of_sets)
rs = [i - common_set for i in list_of_sets]
out_sets = []
all_unique = True
n = len(set_list)
for i in set_list:
intersection = [t & i for t in set_list]
# uniqufy the intersections
u_s = set(intersection)
for t in u_s:
pass


set_list = [i]
while len(rs) > 0:
intersection = [t & i for i in set_list]
out_sets.append(i)

return(out_sets)
from .transcript_utils import *

160 changes: 160 additions & 0 deletions genda/transcripts/transcripts_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@


class SNPInfo(object):
""" Docstring
"""

def __init__(self, position):
self.position = position

def __call__(self, position):
pass


class Exon(object):
""" A region
"""

def __init__(self, region, start, end):
self.region = region
try:
self.start = int(start)
self.end = int(end)
except ValueError:
print("Start and End positions need to be integers")


class Transcript(object):
""" A collection of exons
"""

def __init__(self, exons=[]):
self.exons = exons


class Gene(object):
""" A collection of transcripts
"""

def __init__(self, transcripts=[]):
self.transcripts = []

def _unique_junctions(self):
pass

def hmm(self):
pass


def break_exons(exon1, exon2):
""" Returns a list of new exons
Breaks a interval into 2 intervals if it overlaps, upstream exon always
comes first.
"""
if exon1.region == exon2.region:
if exon2.start > exon1.start and exon2.end < exon1.end:
# Case where exon1 is wholly enclosed in exon2
return([Exon(exon1.region, exon1.start, exon2.start),
Exon(exon1.region, exon2.start, exon2.end),
Exon(exon1.region, exon2.end, exon1.end)])
elif exon2.end < exon1.end:
return([Exon(exon1.region, exon1.start, exon2.start),
Exon(exon1.region, exon2.start, exon1.end),
Exon(exon1.region, exon1.end, exon2.end)])
else:
# Do nothing if there is no overlap
return(exon1, exon2)
else:
pass


def unique_sets(set_list):
""" Returns a list of sets
Calculates the unique regions defined such that only the same elements
are bound to it.
"""

# Since most exons are shared, remove the most common elements first
common_set = reduce( lambda x, y: x & y, list_of_sets)
rs = [i - common_set for i in list_of_sets]
out_sets = []
all_unique = True
n = len(set_list)
for i in set_list:
intersection = [t & i for t in set_list]
# uniqufy the intersections
u_s = set(intersection)
for t in u_s:
pass


set_list = [i]
while len(rs) > 0:
intersection = [t & i for i in set_list]
out_sets.append(i)

return(out_sets)


def compare_two_transcripts(trans1, trans2, transcript_dict):
"""
Returns the splice differences between two transcripts.
Note this ignores TSS and just looks for splice junctions
Parameters
----------
:TODO refactor this
:TODO still not finished
"""

t1 = transcript_dict[trans1]
t2 = transcript_dict[trans2]

starts1 = [i[0] for i in t1]
starts2 = [i[0] for i in t2]
# Assume sorted
reverse = False
if min(starts1) >= min(starts2):
s1 = t2
s2 = t1
reverse = True
else:
s1 = t1
s2 = t2
# Assume sorted order, ordering in reverse by construction
if reverse:
torder = (trans2, trans1)
else:
torder = (trans1, trans2)
oc = 0
#start_matches = False
exclusive_juncs = []
matching_exons = []
skipped_exons = []
for i in s1:
if (i[0] < s2[0][0]) and (i[1] <= s2[0][0]):
pass
elif (i[0] < s2[oc][0]) and (i[1] > s2[oc][0]):
# 5' difference
print('5 primt diff')
sdiff = i[0] - s2[oc][0]
ediff = i[1] - s2[oc][1]
print(s2[oc])
exclusive_juncs.append((i[0], s2[oc][0],
sdiff, ediff, (i[2], s2[oc][2])))
oc += 1
elif (i[0] > s2[oc][0]) and (i[0] < s2[oc][1]):
print('3 prime diff')
print(i[0] - s2[oc][0])
print(i)
print(s2[oc])
#:TODO FIX this
oc += 1
elif (i[0] == s2[oc][0]) and (i[1] == s2[oc][1]):
matching_exons.append((i[2], s2[oc][2]))
oc += 1
pass
return(exclusive_juncs, torder)

0 comments on commit 6710143

Please sign in to comment.