Skip to content

Commit

Permalink
Added gene utils to formats, a convenience module for obtaining infor…
Browse files Browse the repository at this point in the history
…mation about genes
  • Loading branch information
jeffhsu3 committed Jul 13, 2015
1 parent 68f8481 commit 1678a52
Show file tree
Hide file tree
Showing 5 changed files with 64 additions and 34 deletions.
22 changes: 12 additions & 10 deletions genda/eQTL/plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,18 @@ def __init__(self, chrom, gene, rsID = None):
self.rsID = rsID


def subset_meQTL(meQTL, gene_name):
"""
"""
try:
index = meQTL.gene == gene_name
subset = meQTL.ix[index, :]
except AttributeError:
# Case where eQTL matrix is already 1 gene
subset = meQTL
return(subset)


def plot_dosage_by_rsID(gene_reference, dos, cov_mat, counts,
title = None, ax = None,
additional_covar=None,
Expand Down Expand Up @@ -104,16 +116,6 @@ def plot_dosage_by_rsID(gene_reference, dos, cov_mat, counts,
return(fig, test)


def subset_meQTL(meQTL, gene_name):
"""
"""
try:
index = meQTL.gene == gene_name
subset = meQTL.ix[index, :]
except AttributeError:
# Case where eQTL matrix is already 1 gene
subset = meQTL
return(subset)



Expand Down
6 changes: 6 additions & 0 deletions genda/formats/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
""" Formats submodule contains classes and functions
to parse various formats into pandas dataframes as
well as lookup utilities to various formats
"""

from .gene_utils import *
19 changes: 0 additions & 19 deletions genda/formats/dosages.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import numpy as np
import pandas as pd
import pysam
import pickle as pkl
import statsmodels.api as sm

class Dosage(object):
Expand Down Expand Up @@ -65,20 +64,6 @@ def get_dosages_by_range(chrm, start, end, gene_name, annotation_file,
return Dosage(out_dos, annot, gene_name)


def grab_gene_location(hgnc, cis_buffer=0, ensid = False):
""" :TODO maybe query ensembl for this
:TODO ensid matching
"""
ann_file = "/proj/genetics/Projects/shared/Subject_Sources/" +\
"External/Ensembl/OutputData/EnsemblAnnotationAllHumanGenes.bed.gz"
with gzip.open(ann_file) as annot:
for line in annot:
if hgnc in line:
line = line.split("\t")
return(line[0], int(line[1]) - cis_buffer,
int(line[2]) + cis_buffer, str(line[4]))



def generate_dosage_mapping(dosage_file, mapping_file = None, interval=50):
"""
Expand Down Expand Up @@ -106,10 +91,6 @@ def eQTL_func(snps, cov, expression):
model = sm.OLS(expression, cov)
return(model.fit().pvalues['snps'])

def two_eQTL_func(snps, cov, expression, snp2):
"""
"""
pass

class eQTL(object):
""" Python class for completing eQTLs. Does lazy loading of all large
Expand Down
44 changes: 44 additions & 0 deletions genda/formats/gene_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
"""
"""

import json
import requests
import gzip


def get_symbol_ensembl(ensembl):
"""Get symbol from ensembl ID from
ensembl REST API
"""

server = "http://rest.ensembl.org"
ext = "/lookup/id/{0}?expand=1"
r = requests.get(server + ext.format(str(ensembl)),
headers={ "Content-Type" : "application/json"})
if not r.ok:
return('NA')
else:
decoded = r.json()
try:
return(decoded['display_name'])
except KeyError:
return('NA')


def grab_gene_location(id, ann_file = None, cis_buffer=0):
""" Grab the gene boundries as a tuple buffered by
cis_buffer
returns (chr, pos0, pos1)
"""

if ann_file:
with gzip.open(ann_file) as annot:
for line in annot:
if id in line:
line = line.split("\t")
return(line[0], int(line[1]) - cis_buffer,
int(line[2]) + cis_buffer, str(line[4]))
else:
raise NotImplementedError

7 changes: 2 additions & 5 deletions genda/plotting/plotting_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,8 @@ def plot_transcript(transcript_id, paths, ax, y=0, height=2.,
def add_gene_bounderies(ax, gene_annot, gene_name, x_scale):
""" Add gene bounderies to an axis
"""
gene_bounds = grab_gene_location(gene_name, cis_buffer=0)
gene_bounds = grab_gene_location(gene_name,
ann_file = gene_annot, cis_buffer=0)
path_a = make_rectangle(float(gene_bounds[1])/x_scale,
float(gene_bounds[2])/x_scale,
0, 200)
Expand Down Expand Up @@ -231,7 +232,3 @@ def get_path_max_and_min(gene_dict):
points.append(k[0])
return(min(points), max(points))


class read_plots(object):
def __init__(self):
pass

0 comments on commit 1678a52

Please sign in to comment.