-
Notifications
You must be signed in to change notification settings - Fork 26
Associate EnsemblRelease with each Variant object #20
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
bf1964f
888701f
b0616e7
761459b
2fd74ad
4307a25
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,15 +1,19 @@ | ||
| from varcode import load_variants, VariantCollection, Variant | ||
| from nose.tools import eq_ | ||
| from pyensembl import EnsemblRelease | ||
| from varcode import load_maf, VariantCollection, Variant | ||
|
|
||
| def test_maf(): | ||
| variant_collection_from_maf = load_variants("data/tcga_ov.head.maf") | ||
| eq_(variant_collection_from_maf.reference_name, "GRCh37") | ||
| ensembl = EnsemblRelease(75) | ||
| variant_collection_from_maf = load_maf("data/tcga_ov.head.maf") | ||
| expected_variants = [ | ||
| Variant(1, 1650797, "A", "G"), | ||
| Variant(1, 231401797, "A", "C"), | ||
| Variant(1, 23836447, "C", "A"), | ||
| Variant(11,124617502, "C", "G"), | ||
| Variant(1, 1650797, "A", "G", ensembl), | ||
| Variant(1, 231401797, "A", "C", ensembl), | ||
| Variant(1, 23836447, "C", "A", ensembl), | ||
| Variant(11,124617502, "C", "G", ensembl), | ||
| ] | ||
| eq_(len(variant_collection_from_maf), len(expected_variants)) | ||
| for v1, v2 in zip(expected_variants, variant_collection_from_maf): | ||
| eq_(v1, v2) | ||
| for v_expect, v_maf in zip(expected_variants, variant_collection_from_maf): | ||
| eq_(v_expect, v_maf) | ||
| gene_name = v_maf.info['Hugo_Symbol'] | ||
| assert any(gene.name == gene_name for gene in v_maf.genes()), \ | ||
| "Expected gene name %s but got %s" % (gene_name, v_maf.genes()) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -12,33 +12,28 @@ | |
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
|
|
||
| from varcode import load_variants | ||
| from varcode import load_vcf | ||
|
|
||
| VCF_FILENAME = "data/somatic_hg19_14muts.vcf" | ||
|
|
||
| def test_vcf_reference_name(): | ||
| variants = load_variants(VCF_FILENAME) | ||
| # the raw reference name can be a file path to the hg19 FASTA file | ||
| assert variants.reference_path and "hg19" in variants.reference_path, \ | ||
| "Expected hg19 reference, got %s" % (variants.reference_path,) | ||
| variants = load_vcf(VCF_FILENAME) | ||
| # after normalization, hg19 should be remapped to GRCh37 | ||
| assert variants.reference_name == "GRCh37" | ||
| assert variants.reference_names() == { "GRCh37" } | ||
|
|
||
| def test_vcf_number_entries(): | ||
| # there are 14 mutations listed in the VCF, make sure they are all parsed | ||
| variants = load_variants(VCF_FILENAME) | ||
| variants = load_vcf(VCF_FILENAME) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. regarding the assert one line below: not a big deal, and no need to change this in this PR, but I'd suggest using something like the numpy testing library (numpy.testing) to avoid having to write out assertion failure strings like this
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You can use |
||
| assert len(variants) == 14, \ | ||
| "Expected 14 mutations, got %d" % (len(variants),) | ||
|
|
||
| def _check_effect_gene_name(effect): | ||
| variant = effect.variant | ||
| def _check_variant_gene_name(variant): | ||
| expected_gene_names = variant.info['GE'] | ||
| gene_names = [gene.name for gene in effect.genes] | ||
| assert expected_gene_names == gene_names, \ | ||
| assert variant.gene_names() == expected_gene_names, \ | ||
| "Expected gene name %s for variant %s, got %s" % ( | ||
| expected_gene_name, variant, gene_names) | ||
| expected_gene_name, variant, variant.gene_names()) | ||
|
|
||
| def test_vcf_gene_names(): | ||
| variants = load_variants(VCF_FILENAME) | ||
| for effect in variants.variant_effects(): | ||
| yield (_check_effect_gene_name, effect) | ||
| variants = load_vcf(VCF_FILENAME) | ||
| for variant in variants: | ||
| yield (_check_variant_gene_name, variant) | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'd suggest renaming this default_ensembl
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you say more about why that's a better name?
On Feb 20, 2015 11:35 AM, "timodonnell" notifications@github.com wrote:
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's used only as a default parameter, right? Otherwise it looks to me like we're declaring that this module uses a particular ensembl release
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ah wait sorry, nvm. somehow didn't realize this was in a test method.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
👍