Skip to content

Commit

Permalink
don't assume case-sensitivity for known gene confirmation status
Browse files Browse the repository at this point in the history
  • Loading branch information
Jeremy McRae committed Sep 21, 2016
1 parent 57c045f commit 5fb9c20
Show file tree
Hide file tree
Showing 10 changed files with 41 additions and 41 deletions.
4 changes: 2 additions & 2 deletions clinicalfilter/inheritance.py
Original file line number Diff line number Diff line change
Expand Up @@ -723,10 +723,10 @@ def passes_ddg2p_filter(self, variant):
self.log_string = "non-reported DDG2P CNV"
gene_type = self.known_gene["status"]

if "Both DD and IF" in gene_type:
if "both dd and if" in gene_type:
self.log_string = "Both DD and IF DDG2P gene"
return True
elif {"Confirmed DD Gene", "Probable DD gene"} & gene_type == set():
elif {"confirmed dd gene", "probable dd gene"} & gene_type == set():
return False

for inh in self.known_gene["inh"]:
Expand Down
4 changes: 2 additions & 2 deletions clinicalfilter/load_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def parse_gene_line(line, header):

gene = {}
gene['inh'] = {inheritance: set([mechanism])}
gene["status"] = set([status])
gene["status"] = set([status.lower()])
gene["start"] = int(line[header["start"]])
gene["end"] = int(line[header["stop"]])
gene["chrom"] = line[header["chr"]]
Expand Down Expand Up @@ -96,7 +96,7 @@ def open_known_genes(path):
return None

# only include genes with sufficient DDG2P status
allowed = set(["Confirmed DD Gene", "Probable DD gene", "Both DD and IF"])
allowed = set(["confirmed dd gene", "probable dd gene", "both dd and if"])

known = {}
with io.open(path, "r", encoding="latin_1") as handle:
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

setup(
name = "clinical-filter",
version = "0.4.2",
version = "0.4.6",
author = "Jeremy McRae",
author_email = "jeremy.mcrae@sanger.ac.uk",
description="Clinical filtering for trios.",
Expand Down
2 changes: 1 addition & 1 deletion tests/test_allosomal_inheritance.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def setUp(self):
self.variants = [var]

# make sure we've got known genes data
self.known_gene = {"inh": ["Monoallelic"], "confirmed_status": ["Confirmed DD Gene"]}
self.known_gene = {"inh": ["Monoallelic"], "confirmed_status": ["confirmed dd gene"]}

self.inh = Allosomal(self.variants, self.trio, self.known_gene, "TEST")
self.inh.is_lof = var.child.is_lof()
Expand Down
2 changes: 1 addition & 1 deletion tests/test_autosomal_inheritance.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def setUp(self):
self.variants = [var]

# make sure we've got known genes data
self.known_gene = {"inh": ["Monoallelic"], "confirmed_status": ["Confirmed DD Gene"]}
self.known_gene = {"inh": ["Monoallelic"], "confirmed_status": ["confirmed dd gene"]}

self.inh = Autosomal(self.variants, self.trio, self.known_gene, "TEST")
self.inh.is_lof = var.child.is_lof()
Expand Down
16 changes: 8 additions & 8 deletions tests/test_cnv_inheritance.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def setUp(self):
self.variant = self.create_variant(child_gender)

# make sure we've got known genes data
self.known_gene = {"inh": {"Monoallelic": {"Loss of function"}}, "status": {"Confirmed DD Gene"}, "start": 5000, "end": 6000}
self.known_gene = {"inh": {"Monoallelic": {"Loss of function"}}, "status": {"confirmed dd gene"}, "start": 5000, "end": 6000}

syndrome_regions = {("1", "1000", "2000"): 1}

Expand Down Expand Up @@ -314,7 +314,7 @@ def test_passes_ddg2p_filter(self):

gene_inh = {"inh": {"Monoallelic": \
{"Increased gene dosage"}}, "status": \
{"Confirmed DD Gene"}, "start": 5000, "end": 6000}
{"confirmed dd gene"}, "start": 5000, "end": 6000}

gene = "TEST"
inh = "Monoallelic"
Expand All @@ -335,12 +335,12 @@ def test_passes_ddg2p_filter(self):
# check if the variant passes if the confirmed type is "Both DD and IF",
# even if the variant wouldn't otherwise pass
self.inh.gene = "TEST"
self.inh.known_gene["status"] = {"Both DD and IF"}
self.inh.known_gene["status"] = {"both dd and if"}
self.inh.known_gene["inh"][inh] = {"Loss of function"}
self.assertTrue(self.inh.passes_ddg2p_filter(cnv))

# fail on genes that don't have a robust confirmed status
self.inh.known_gene["status"] = {"Possible DD Gene"}
self.inh.known_gene["status"] = {"possible dd gene"}
self.assertFalse(self.inh.passes_ddg2p_filter(cnv))

def test_passes_gene_inheritance_surrounding_disruptive_dup(self):
Expand All @@ -349,7 +349,7 @@ def test_passes_gene_inheritance_surrounding_disruptive_dup(self):

gene_inh = {"inh": {"Monoallelic": \
{"Loss of function"}}, "status": \
{"Confirmed DD Gene"}, "start": 5000, "end": 6000}
{"confirmed dd gene"}, "start": 5000, "end": 6000}

# make a gene that is loss of function, with a monoallelic inheritance
self.inh.known_gene = gene_inh
Expand Down Expand Up @@ -395,7 +395,7 @@ def test_check_passes_intragenic_dup(self):
gene_inh = {"inh": {"Monoallelic": \
{"Loss of Function"}, "X-linked dominant": \
{"Loss of Function"}}, "status": \
{"Confirmed DD Gene"}, "start": 5000, "end": 6000}
{"confirmed dd gene"}, "start": 5000, "end": 6000}

self.inh.known_gene = gene_inh

Expand Down Expand Up @@ -497,7 +497,7 @@ def test_check_compound_inheritance(self):

gene_inh = {"inh": {"Biallelic": \
{"Increased gene dosage"}}, "status": \
{"Confirmed DD Gene"}, "start": 5000, "end": 6000}
{"confirmed dd gene"}, "start": 5000, "end": 6000}

self.inh.known_gene = gene_inh
cnv = self.create_variant("female")
Expand Down Expand Up @@ -535,7 +535,7 @@ def test_check_compound_inheritance_hemizygous(self):

gene_inh = {"inh": {"Hemizygous": \
{"Increased gene dosage"}}, "status": \
{"Confirmed DD Gene"}, "start": 5000, "end": 6000}
{"confirmed dd gene"}, "start": 5000, "end": 6000}

self.inh.known_gene = gene_inh
cnv = self.create_variant("female")
Expand Down
10 changes: 5 additions & 5 deletions tests/test_inheritance.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def setUp(self):
self.variants.append(self.create_variant(sex))

# make sure we've got known genes data
self.known_gene = {"inh": ["Monoallelic"], "confirmed_status": ["Confirmed DD Gene"]}
self.known_gene = {"inh": ["Monoallelic"], "confirmed_status": ["confirmed dd gene"]}

self.inh = Autosomal(self.variants, self.trio, self.known_gene, "TEST")

Expand Down Expand Up @@ -136,7 +136,7 @@ def test_get_candidate_variants_monoallelic(self):
""" test that get_candidate_variants() works for a monoallelic variant
"""

inh = {"inh": ["Monoallelic"], "confirmed_status": ["Confirmed DD Gene"]}
inh = {"inh": ["Monoallelic"], "confirmed_status": ["confirmed dd gene"]}
var = self.create_variant(position='150', cq='stop_gained',
geno=['0/1', '0/0', '0/0'])
self.inh = Autosomal([var], self.trio, inh, "TEST")
Expand All @@ -156,7 +156,7 @@ def test_get_candidate_variants_imprinted(self):
"""

# check a variant where the imprinting route should work
inh = {"inh": ["Imprinted"], "confirmed_status": ["Confirmed DD Gene"]}
inh = {"inh": ["Imprinted"], "confirmed_status": ["confirmed dd gene"]}
var = self.create_variant(position='150', cq='stop_gained',
geno=['0/1', '0/1', '0/0'])
self.inh = Autosomal([var], self.trio, inh, "TEST")
Expand Down Expand Up @@ -204,7 +204,7 @@ def test_get_candidate_variants_compound_het(self):
""" test that get_candidate_variants() works for biallelic variants
"""

inh = {"inh": ["Biallelic"], "confirmed_status": ["Confirmed DD Gene"]}
inh = {"inh": ["Biallelic"], "confirmed_status": ["confirmed dd gene"]}
var1 = self.create_variant(position='150', cq='stop_gained',
geno=['0/1', '0/1', '0/0'])
var2 = self.create_variant(position='151', cq='stop_gained',
Expand Down Expand Up @@ -275,7 +275,7 @@ def test_check_compound_hets(self):
# set the inheritance type, the compound het type ("compound_het"
# for autosomal variants, and start autosomal inheritance)
# known_genes = "Biallelic"
known_gene = {"inh": ["Biallelic"], "confirmed_status": ["Confirmed DD Gene"]}
known_gene = {"inh": ["Biallelic"], "confirmed_status": ["confirmed dd gene"]}
self.inh = Autosomal([var1, var2, var3], self.trio, known_gene, "TEST")

variants = [(), ()]
Expand Down
38 changes: 19 additions & 19 deletions tests/test_load_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,12 +69,12 @@ def test_parse_gene_line(self):
header = {'gene': 0, 'chr': 1, 'start': 2, 'stop': 3, 'type': 4,
'mode': 5, 'mech': 6}

line = ['TEST', 'chr1', '1000', '2000', 'Confirmed DD Gene',
line = ['TEST', 'chr1', '1000', '2000', 'confirmed dd gene',
'Biallelic', 'Loss-of-function']

self.assertEqual(parse_gene_line(line, header), ('TEST', {
'chrom': 'chr1', 'start': 1000, 'end': 2000,
'status': set(['Confirmed DD Gene']),
'status': set(['confirmed dd gene']),
'inh': {'Biallelic': set(['Loss-of-function'])}}))

def test_parse_gene_line_both_mechanism(self):
Expand All @@ -83,12 +83,12 @@ def test_parse_gene_line_both_mechanism(self):

header = {'gene': 0, 'chr': 1, 'start': 2, 'stop': 3, 'type': 4,
'mode': 5, 'mech': 6}
line = ['TEST', 'chr1', '1000', '2000', 'Confirmed DD Gene',
line = ['TEST', 'chr1', '1000', '2000', 'confirmed dd gene',
'Both', 'Loss-of-function']

self.assertEqual(parse_gene_line(line, header), ('TEST', {
'chrom': 'chr1', 'start': 1000, 'end': 2000,
'status': set(['Confirmed DD Gene']),
'status': set(['confirmed dd gene']),
'inh': {'Biallelic': set(['Loss-of-function']),
'Monoallelic': set(['Loss-of-function']),
'Both': set(['Loss-of-function'])}}))
Expand All @@ -98,7 +98,7 @@ def test_open_known_genes(self):
'''

header = ['gene', 'chr', 'start', 'stop', 'type', 'mode', 'mech']
line = ['TEST', '1', '1000', '2000', 'Confirmed DD Gene',
line = ['TEST', '1', '1000', '2000', 'confirmed dd gene',
'Monoallelic', 'Loss-of-function']

self.temp.write(('\t'.join(header) + '\n').encode('utf8'))
Expand All @@ -107,7 +107,7 @@ def test_open_known_genes(self):

self.assertEqual(open_known_genes(self.temp.name),
{'TEST': {'chrom': '1', 'start': 1000, 'end': 2000,
'status': set(['Confirmed DD Gene']),
'status': set(['confirmed dd gene']),
'inh': {'Monoallelic': set(['Loss-of-function'])}}
})

Expand All @@ -116,9 +116,9 @@ def test_open_known_genes_multimodes(self):
'''

header = ['gene', 'chr', 'start', 'stop', 'type', 'mode', 'mech']
line1 = ['TEST', '1', '1000', '2000', 'Confirmed DD Gene',
line1 = ['TEST', '1', '1000', '2000', 'confirmed dd gene',
'Monoallelic', 'Loss-of-function']
line2 = ['TEST', '1', '1000', '2000', 'Confirmed DD Gene',
line2 = ['TEST', '1', '1000', '2000', 'confirmed dd gene',
'Biallelic', 'Loss-of-function']

self.temp.write(('\t'.join(header) + '\n').encode('utf8'))
Expand All @@ -128,7 +128,7 @@ def test_open_known_genes_multimodes(self):

self.assertEqual(open_known_genes(self.temp.name),
{'TEST': {'chrom': '1', 'start': 1000, 'end': 2000,
'status': set(['Confirmed DD Gene']),
'status': set(['confirmed dd gene']),
'inh': {'Monoallelic': set(['Loss-of-function']),
'Biallelic': set(['Loss-of-function'])}}
})
Expand All @@ -138,9 +138,9 @@ def test_open_known_genes_multimechs(self):
'''

header = ['gene', 'chr', 'start', 'stop', 'type', 'mode', 'mech']
line1 = ['TEST', '1', '1000', '2000', 'Confirmed DD Gene',
line1 = ['TEST', '1', '1000', '2000', 'confirmed dd gene',
'Monoallelic', 'Loss-of-function']
line2 = ['TEST', '1', '1000', '2000', 'Confirmed DD Gene',
line2 = ['TEST', '1', '1000', '2000', 'confirmed dd gene',
'Monoallelic', 'Activating']

self.temp.write(('\t'.join(header) + '\n').encode('utf8'))
Expand All @@ -150,7 +150,7 @@ def test_open_known_genes_multimechs(self):

self.assertEqual(open_known_genes(self.temp.name),
{'TEST': {'chrom': '1', 'start': 1000, 'end': 2000,
'status': set(['Confirmed DD Gene']),
'status': set(['confirmed dd gene']),
'inh': {'Monoallelic': set(['Loss-of-function', 'Activating'])}}
})

Expand All @@ -159,9 +159,9 @@ def test_open_known_genes_multigenes(self):
'''

header = ['gene', 'chr', 'start', 'stop', 'type', 'mode', 'mech']
line1 = ['TEST', '1', '1000', '2000', 'Confirmed DD Gene',
line1 = ['TEST', '1', '1000', '2000', 'confirmed dd gene',
'Monoallelic', 'Loss-of-function']
line2 = ['TEST2', '1', '3000', '4000', 'Confirmed DD Gene',
line2 = ['TEST2', '1', '3000', '4000', 'confirmed dd gene',
'Monoallelic', 'Loss-of-function']

self.temp.write(('\t'.join(header) + '\n').encode('utf8'))
Expand All @@ -171,10 +171,10 @@ def test_open_known_genes_multigenes(self):

self.assertEqual(open_known_genes(self.temp.name),
{'TEST': {'chrom': '1', 'start': 1000, 'end': 2000,
'status': set(['Confirmed DD Gene']),
'status': set(['confirmed dd gene']),
'inh': {'Monoallelic': set(['Loss-of-function'])}},
'TEST2': {'chrom': '1', 'start': 3000, 'end': 4000,
'status': set(['Confirmed DD Gene']),
'status': set(['confirmed dd gene']),
'inh': {'Monoallelic': set(['Loss-of-function'])}}
})

Expand All @@ -183,9 +183,9 @@ def test_open_known_genes_wrong_status(self):
'''

header = ['gene', 'chr', 'start', 'stop', 'type', 'mode', 'mech']
line1 = ['TEST', '1', '1000', '2000', 'Possible DD Gene',
line1 = ['TEST', '1', '1000', '2000', 'possible dd gene',
'Monoallelic', 'Loss-of-function']
line2 = ['TEST2', '1', '3000', '4000', 'Confirmed DD Gene',
line2 = ['TEST2', '1', '3000', '4000', 'confirmed dd gene',
'Monoallelic', 'Loss-of-function']

self.temp.write(('\t'.join(header) + '\n').encode('utf8'))
Expand All @@ -195,7 +195,7 @@ def test_open_known_genes_wrong_status(self):

self.assertEqual(open_known_genes(self.temp.name),
{'TEST2': {'chrom': '1', 'start': 3000, 'end': 4000,
'status': set(['Confirmed DD Gene']),
'status': set(['confirmed dd gene']),
'inh': {'Monoallelic': set(['Loss-of-function'])}}
})

Expand Down
2 changes: 1 addition & 1 deletion tests/test_load_vcfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def setUp(self):
"EUR_AF", "MAX_AF", "SAS_AF", "UK10K_cohort_AF"]
self.known_genes = {"ATRX": {"inheritance": {"Hemizygous": \
{"Loss of function"}}, "start": 1, "chrom": "1", \
"confirmed_status": {"Confirmed DD Gene"}, "end": 20000000}}
"confirmed_status": {"confirmed dd gene"}, "end": 20000000}}

self.vcf_loader = LoadVCFs(total_trios, maf_tags, self.known_genes, set(), None, None, )

Expand Down
2 changes: 1 addition & 1 deletion tests/test_variant_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def setUp(self):
# here are the default filtering criteria, as loaded into python
known = {"ATRX": {"inheritance": {"Hemizygous": \
{"Loss of function"}}, "start": "10000000", "chrom": "1", \
"confirmed_status": {"Confirmed DD Gene"}, "end": "20000000"}}
"confirmed_status": {"confirmed dd gene"}, "end": "20000000"}}

self.pops = ["AFR_AF", "AMR_AF", "ASN_AF", "DDD_AF", "EAS_AF",
"ESP_AF", "EUR_AF", "MAX_AF", "SAS_AF", "UK10K_cohort_AF"]
Expand Down

0 comments on commit 5fb9c20

Please sign in to comment.