Skip to content

Commit

Permalink
Added protein coding biotype tests (#166)
Browse files Browse the repository at this point in the history
* added unit tests for Transcript.biotype and Gene.biotype

* version bump
  • Loading branch information
iskandr committed Sep 19, 2016
1 parent 2b83f78 commit 98b61b2
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 21 deletions.
2 changes: 1 addition & 1 deletion pyensembl/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
)
from .transcript import Transcript

__version__ = '1.0.0'
__version__ = '1.0.1'

def cached_release(release, species="human"):
"""
Expand Down
32 changes: 20 additions & 12 deletions test/test_gene_objects.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,25 @@
from __future__ import absolute_import

from nose.tools import eq_

from .common import test_ensembl_releases
from .data import TP53_gene_id

@test_ensembl_releases()
def test_TP53_gene_object_by_id(ensembl):
def test_TP53_gene_object_by_id(genome):
# when we look up TP53 by its gene ID, we should get the
# correct gene back
gene = ensembl.gene_by_id(TP53_gene_id)
gene = genome.gene_by_id(TP53_gene_id)
assert gene.name == "TP53", \
"Incorrect gene name %s for gene ID %s in %s" % (
gene.name, gene.id, ensembl)
gene.name, gene.id, genome)
assert gene.contig == "17", \
"Incorrect gene contig %s for gene ID %s in %s" % (
gene.contig, gene.id, ensembl)
gene.contig, gene.id, genome)

@test_ensembl_releases()
def test_TP53_gene_object_by_name(ensembl):
genes = ensembl.genes_by_name("TP53")
def test_TP53_gene_object_by_name(genome):
genes = genome.genes_by_name("TP53")
# we should only have one TP53 gene (there aren't any copies)
assert len(genes) == 1, \
"Expected only one gene with name TP53, got %s" % (genes,)
Expand All @@ -26,17 +28,23 @@ def test_TP53_gene_object_by_name(ensembl):
"Expected gene to have ID %s, got %s" % (TP53_gene_id, genes[0].id)

@test_ensembl_releases()
def test_equal_genes(ensembl):
gene1 = ensembl.genes_by_name("TP53")[0]
def test_equal_genes(genome):
gene1 = genome.genes_by_name("TP53")[0]
# get an identical gene
gene2 = ensembl.gene_by_id(gene1.id)
gene2 = genome.gene_by_id(gene1.id)

assert hash(gene1) == hash(gene2)
assert gene1 == gene2

@test_ensembl_releases()
def test_not_equal_genes(release):
gene1 = release.genes_by_name("MUC1")[0]
gene2 = release.genes_by_name("BRCA1")[0]
def test_not_equal_genes(genome):
gene1 = genome.genes_by_name("MUC1")[0]
gene2 = genome.genes_by_name("BRCA1")[0]
assert hash(gene1) != hash(gene2)
assert gene1 != gene2

@test_ensembl_releases()
def test_BRCA1_protein_coding_biotype(genome):
gene = genome.genes_by_name("BRCA1")[0]
assert gene.is_protein_coding
eq_(gene.biotype, "protein_coding")
22 changes: 14 additions & 8 deletions test/test_transcript_objects.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,10 +77,10 @@ def test_transcript_exons():
# TODO: Add gene_id patching to gtf_parsing, add ensembl54 to the list
# below
@test_ensembl_releases(75, 77)
def test_sequence_parts(ensembl):
def test_sequence_parts(genome):
# Ensure that the UTRs and coding sequence can be
# combined to make the full transcript.
transcript = ensembl.transcript_by_id(FOXP3_001_transcript_id)
transcript = genome.transcript_by_id(FOXP3_001_transcript_id)

# The combined lengths of the upstream untranslated region,
# coding sequence, and downstream untranslated region
Expand Down Expand Up @@ -149,17 +149,17 @@ def test_transcript_cds_CTNNIP1_004():
eq_(cds, CTNNBIP1_004_CDS)

@test_ensembl_releases()
def test_equal_transcripts(ensembl):
t1 = ensembl.transcripts_by_name("TP53-001")[0]
def test_equal_transcripts(genome):
t1 = genome.transcripts_by_name("TP53-001")[0]
# get an identical gene
t2 = ensembl.transcript_by_id(t1.id)
t2 = genome.transcript_by_id(t1.id)
eq_(t1, t2)
eq_(hash(t1), hash(t2))

@test_ensembl_releases()
def test_not_equal_transcripts(release):
t1 = release.transcripts_by_name("MUC1-001")[0]
t2 = release.transcripts_by_name("BRCA1-001")[0]
def test_not_equal_transcripts(genome):
t1 = genome.transcripts_by_name("MUC1-001")[0]
t2 = genome.transcripts_by_name("BRCA1-001")[0]
assert_not_equal(t1, t2)

def test_protein_id():
Expand All @@ -174,3 +174,9 @@ def test_transcript_gene_should_match_parent_gene():
gene = ensembl77.gene_by_id(TP53_gene_id)
for transcript in gene.transcripts:
eq_(transcript.gene, gene)

@test_ensembl_releases()
def test_BRCA1_001_has_protein_coding_biotype(genome):
transcript = genome.transcripts_by_name("BRCA1-001")[0]
assert transcript.is_protein_coding
eq_(transcript.biotype, "protein_coding")

0 comments on commit 98b61b2

Please sign in to comment.