Permalink
Browse files

Add error bias filter

  • Loading branch information...
1 parent 880ce55 commit 8a79f574ebf750bb515543b9fd566edd4b3764cd James Casbon committed Jun 25, 2012
Showing with 98 additions and 1 deletion.
  1. +12 −0 docs/FILTERS.rst
  2. +1 −0 setup.py
  3. +85 −1 vcf/filters.py
View
@@ -9,6 +9,18 @@ operation that PyVCF offers an extensible script. ``vcf_filter.py`` does
the work of reading input, updating the metadata and filtering the records.
+Existing Filters
+----------------
+
+.. autoclass:: vcf.filters.SiteQuality
+
+.. autoclass:: vcf.filters.VariantGenotypeQuality
+
+.. autoclass:: vcf.filters.ErrorBiasFilter
+
+
+
+
Adding a filter
---------------
View
@@ -44,6 +44,7 @@
'vcf.filters': [
'site_quality = vcf.filters:SiteQuality',
'vgq = vcf.filters:VariantGenotypeQuality',
+ 'eb = vcf.filters:ErrorBiasFilter',
]
},
url='https://github.com/jamescasbon/PyVCF',
View
@@ -1,3 +1,7 @@
+try:
+ from rpy2 import robjects
+except:
+ robjects = None
class Base(object):
@@ -22,13 +26,13 @@ def __call__(self):
""" filter a site, return not None if the site should be filtered """
raise NotImplementedError('Filters must implement this method')
-
def filter_name(self):
""" return the name to put in the VCF header, default is ``name`` + ``threshold`` """
return '%s%s' % (self.name, self.threshold)
class SiteQuality(Base):
+ """ Filter low quailty sites """
description = 'Filter sites by quality'
name = 'sq'
@@ -47,6 +51,10 @@ def __call__(self, record):
class VariantGenotypeQuality(Base):
+ """ Filters sites with low quality variants. i.e. it is possible
+ to have a high site quality with many low quality calls. This
+ filter demands at least one call be above a threshold quality.
+ """
description = 'Demand a minimum quality associated with a non reference call'
name = 'mgq'
@@ -66,3 +74,79 @@ def __call__(self, record):
return vgq
+class ErrorBiasFilter(Base):
+ """ Some sequencing technologies, notably pyrosequencing, produce mutation
+ hotspots where there is a constant level of noise, producing some reference
+ and some heterozygote calls.
+
+ This filter computes a Bayes Factor for each site by comparing
+ the binomial likelihood of the observed allelic depths under:
+
+ * A model with constant error equal to the MAF
+ * A model where each sample is the ploidy reported by the caller
+
+ The test value is the log of the bayes factor. Higher values
+ are more likely to be errors.
+
+ Note: this filter requires rpy2
+ """
+
+ description = 'Filter sites with a constant level of mutation across all samples'
+ name = 'eb'
+
+ @classmethod
+ def customize_parser(self, parser):
+ parser.add_argument('--errlr', type=int, default=-10,
+ help='Filter sites above this error log odds ratio')
+
+ def __init__(self, args):
+ self.threshold = args.errlr
+ if robjects is None:
+ raise Exception('Please install rpy2')
+ self.ll_test = robjects.r('''
+ function(ra, aa, gt, diag=F) {
+ ra_sum = sum(ra)
+ aa_sum = sum(aa)
+ ab = aa_sum / (ra_sum + aa_sum)
+ gtp = 0.5 + (0.48*(gt-1))
+
+ error_likelihood = log(dbinom(aa, ra+aa, ab))
+ gt_likelihood = log(dbinom(aa, ra+aa, gtp))
+
+ if (diag) {
+ print(ra)
+ print(aa)
+ print(gtp)
+ print(ab)
+ print(error_likelihood)
+ print(gt_likelihood)
+ }
+ error_likelihood = sum(error_likelihood)
+ gt_likelihood = sum(gt_likelihood)
+ c(error_likelihood - gt_likelihood, ab)
+ }
+ ''')
+
+ def __call__(self, record):
+ if record.is_monomorphic:
+ return None
+ passed, tv, ab = self.bias_test(record.samples)
+ if tv > self.threshold:
+ return tv
+
+ def bias_test(self, calls):
+ calls = [x for x in calls if x.called]
+ #TODO: single genotype assumption
+ try:
+ # freebayes
+ ra = robjects.IntVector([x['RO'][0] for x in calls])
+ aa = robjects.IntVector([x['AO'][0] for x in calls])
+ except KeyError:
+ # GATK
+ ra = robjects.IntVector([x['AD'][0] for x in calls])
+ aa = robjects.IntVector([x['AD'][1] for x in calls])
+
+ gt = robjects.IntVector([x.gt_type for x in calls])
+ test_val, ab = self.ll_test(ra, aa, gt)
+
+ return test_val < 0, test_val, ab

0 comments on commit 8a79f57

Please sign in to comment.