Skip to content

Commit

Permalink
added genome plot basics
Browse files Browse the repository at this point in the history
  • Loading branch information
jeffhsu3 committed Jan 16, 2015
1 parent ad246a6 commit 54ddc0c
Show file tree
Hide file tree
Showing 4 changed files with 130 additions and 10 deletions.
24 changes: 20 additions & 4 deletions genda/eQTL/multiple_eQTL.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,26 +5,42 @@
import numpy as np
import statsmodels.api as sm


def ceQTL(counts, dos, cov_mat, rsid):
""" Convinence function to calculate the single SNP comparison
"""
acov_mat = cov_mat.copy(deep=True)
acov_mat['soi'] = dos.ix[rsid, acov_mat.index]
res = sm.OLS(counts, acov_mat).fit()
return(res)

def calculate_top_cond(counts, dos, cov_mat, rsid1,
return_rsid1=False):
"""
""" Need to mask rsid1 due to perfect multicolinneraity
"""
acov_mat = cov_mat.copy(deep=True)
acov_mat[rsid1] = dos.ix[rsid1, acov_mat.index]

def _calc_model(geno, counts, cov_mat):
t = cov_mat.copy()
t = cov_mat.copy(deep=True)
t['ts'] = geno
return(sm.OLS(counts, t).fit().pvalues['ts'])

def _calc_model_two(geno, counts, cov_mat, rsid1):
t = cov_mat.copy()
t = cov_mat.copy(deep=True)
t['ts'] = geno
fit = sm.OLS(counts, t).fit()
return(pd.Series({rsid1:fit.pvalues['rsid1'],
return(pd.Series({'aic': fit.aic,
'rsid2': fit.pvalues['ts']}))

'''
pvalues = dos.apply(_calc_model, axis=1, args=(counts,
acov_mat))
'''
pvalues = dos.apply(_calc_model_two, axis=1, args=(counts,
acov_mat, rsid1))
# Mask collinearity
pvalues.ix[rsid1, 'rsid2'] = 1
return(pvalues)


Expand Down
3 changes: 1 addition & 2 deletions genda/eQTL/plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,14 +201,13 @@ def plot_eQTL(meQTL, gene_name, annotation, dosage, ax=None,
### Actual scatter #############################
im = ax.scatter(pos, adj_pv, s=dosage_maf, c = color1)
#:TODO make the arrow into a funciton

arrow_start = ylim - max(adj_pv/6.0)/2
arrow_length = max(adj_pv/6.0/2) - max(adj_pv/6.0/2)/8
ax.arrow(snpx, arrow_start, 0, - arrow_length,
head_width=0.01, head_length=0.1, fc='k',
ec='k')
ax.text(snpx-0.05 , arrow_start + max(adj_pv/6.0)/5.0,
snp, style='italic', fontsize=16)
snp, style='italic')
ax.set_ylabel(r'$-log_{10}$ eQTL p-value')
ax.set_xlabel(r'Position (Mb)')
if should_not_plot(gene_annot):
Expand Down
3 changes: 2 additions & 1 deletion genda/plotting/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
from matplotlib.path import Path
from genda.formats.dosages import grab_gene_location
from .plotting_utils import *
#from genda.plotting.plotting_utils import create_path


def make_rectangle(start, end, y1, y2):
Expand All @@ -33,6 +32,8 @@ def make_rectangle(start, end, y1, y2):

def create_path(gtf_iterator, gene, height=0.5, x_scale=1):
""" Create a path for a given transcript
:TODO change name
"""
transcripts = defaultdict(list)
t_rxe = re.compile('transcript_id "(ENST[0-9]+)"')
Expand Down
110 changes: 107 additions & 3 deletions genda/plotting/plotting_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def make_rectangle(start, end, y1, y2):

def plot_transcript(transcript_id, paths, ax, y=0, height=2.,
full_gene=False):
"""
""" Plot a transcript
"""
xmin = None
xmax = None
Expand All @@ -63,7 +63,9 @@ def plot_transcript(transcript_id, paths, ax, y=0, height=2.,
ps.append(patches.PathPatch(i, facecolor='darkgray', lw=0))
for patch in ps:
ax.add_patch(patch)
ax.hlines(y+height/2., xmin, xmax, colors='darkgray', lw=2)
# hlines doesn't work without returning axis
draw_arrows(xmin, xmax, y+height/2.0, ax)
ax.hlines(y+height/2.0, xmin, xmax, colors='darkgray', lw=2)


def add_gene_bounderies(ax, gene_annot, gene_name, x_scale):
Expand All @@ -80,7 +82,7 @@ def add_gene_bounderies(ax, gene_annot, gene_name, x_scale):

def add_snp_arrow(adj_pv, x, snp, ax):
"""Adds an arrow with snp text to plot
The best snp should be some sort of object
:TODO The best snp should be some sort of object
"""
print(type(ax.get_ylim()))
print(type(adj_pv))
Expand All @@ -91,3 +93,105 @@ def add_snp_arrow(adj_pv, x, snp, ax):
ax.text(x - 0.05, arrow_start + adj_pv/6.0/5.0,
snp, style='italic')
return(ax)


def plot_read2(read, y_start, height=1, ymax=30, scale=1e6):
"""
"""
gout = {'patches': [],
'hlines': [],
}
cont = 0
for i, j in read.cigar:
if i == 0:
path = make_rectangle(read.pos/scale + cont/scale,
read.pos/scale + cont/scale + j/scale,
ymax - y_start,
ymax - y_start - float(height))
patch = patches.PathPatch(path, facecolor='grey',
alpha = 0.4)
gout['patches'].append(patch)
cont += j
elif i == 3:
gout['hlines'].append((ymax-y_start - float(height)/2.0,
read.pos/scale + cont/scale,
read.pos/scale + cont/scale + j/scale))
return(gout)



def plot_read(read, y_start, ax, height=1, ymax=30, scale=1e6):
""" Plot a sequencing read
"""
cont = 0
for i, j in read.cigar:
if i == 0:
path = make_rectangle(read.pos/scale + cont/scale,
read.pos/scale + cont/scale + j/scale,
ymax - y_start,
ymax - y_start - float(height))
patch = patches.PathPatch(path, facecolor='grey',
alpha = 0.4)
ax.add_patch(patch)
cont += j
elif i == 3:
ax.hlines(ymax - y_start - float(height)/2.0,
read.pos/scale + cont/scale,
read.pos/scale + cont/scale + j/scale,
colors='grey')
cont += j


def coverage_hist(read, hist_array, start):
ii = read.pos - start
cont = 0
for i, j in read.cigar:
if i == 0:
hist_array[ii + cont:ii+cont+j] +=1
cont += j
elif i==3:
pass


def create_gene_path(gtf_iter, gene, x_scale):
"""
Returns
-------
None
"""
pass


def draw_arrows(xmin, xmax, y, ax, spacing=0.001):
# :TODO make spacing dynamic based on axis limits
x_range = np.arange(xmin, xmax, spacing)[1:-1]
for i in x_range:
draw_arrow(i, y, ax)


def draw_arrow(x, y, ax):
# :TODO x_adjust and y_adjust scale based on axis limits
x_adjust = 0.0003
y_adjust = 0.05
ax.plot([x, x+x_adjust], [y, y+y_adjust], 'k-', color='darkgrey')
ax.plot([x, x+x_adjust], [y, y-y_adjust], 'k-', color='darkgrey')


def get_path_max_and_min(gene_dict):
""" Return min, max of a path
"""
points = []
try:
for i in gene_dict.values():
for j in i:
for k in j.vertices:
points.append(k[0])
except AttributeError:
# Case where only 1 transcript is chosen
for j in gene_dict:
for k in j.vertices:
points.append(k[0])
return(min(points), max(points))


0 comments on commit 54ddc0c

Please sign in to comment.