Skip to content
This repository has been archived by the owner on Jan 18, 2020. It is now read-only.

Commit

Permalink
Add code to check genome section in MANIFEST before loading sample
Browse files Browse the repository at this point in the history
This includes tests to cover the different cases where the genome
section of the MANIFEST is invalid.

Signed-off-by: Don Naegely <naegelyd@gmail.com>
  • Loading branch information
naegelyd committed Oct 15, 2014
1 parent 8ba5d5e commit ecb230a
Show file tree
Hide file tree
Showing 3 changed files with 111 additions and 8 deletions.
85 changes: 85 additions & 0 deletions tests/cases/sample_load_process/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,91 @@ def test_pipeline(self):
system = System.get(Sample.objects.all()[0])
self.assertEqual(len(system), 3)

@override_settings(REQUIRED_GENOME_VERSION='hg19')
def test_missing_genome_section(self):
expected_counts = {
'batches': 1,
'results_per_sample': [
{
'batch': 'batch1',
'sample': 'NA12891',
'count': 1963,
},
{
'batch': 'batch1',
'sample': 'NA12892',
'count': 1963,
},
{
'batch': 'batch1',
'sample': 'NA12878',
'count': 1963,
}
],
'samples': 3,
'samples_per_batch': [(1, 3)],
}
expected_counts['results'] = \
sum([x['count'] for x in expected_counts['results_per_sample']])

# Immediately validates and creates a sample.
management.call_command('samples', 'queue')

# Synchronously work on queue.
worker1 = get_worker('variants')
worker2 = get_worker('default')

# Work on variants.
worker1.work(burst=True)

# Work on effects.
worker2.work(burst=True)

# Since the MANIFEST for Batch 2 has no genome version listed, we
# should only have data for samples in Batch 1. Perform all the checks
# against our trimmed list of expected counts.
self.assertEqual(Result.objects.count(), expected_counts['results'])

# Batches are now published..
self.assertEqual(Batch.objects.filter(published=True).count(),
expected_counts['batches'])

# Ensure the counts are accurate for each sample..
for ec in expected_counts['results_per_sample']:
sample = Sample.objects.get(name=ec['sample'],
batch__name=ec['batch'])
self.assertTrue(sample.published)
self.assertEqual(sample.count, ec['count'])

# Batches are created with the samples, but are unpublished
for pk, count in expected_counts['samples_per_batch']:
batch = Batch.objects.get(pk=pk)
self.assertTrue(batch.published)
self.assertEqual(batch.count, count)

@override_settings(REQUIRED_GENOME_VERSION='hg18')
def test_wrong_genome_version(self):
# Immediately validates and creates a sample.
management.call_command('samples', 'queue')

# Synchronously work on queue.
worker1 = get_worker('variants')
worker2 = get_worker('default')

# Work on variants.
worker1.work(burst=True)

# Work on effects.
worker2.work(burst=True)

# Since the genome version was required but does not match any of the
# versions specified in the MANIFESTs, we should have no data.
self.assertEqual(Variant.objects.count(), 0)
self.assertEqual(Sample.objects.count(), 0)
self.assertEqual(Cohort.objects.count(), 0)
self.assertEqual(Batch.objects.count(), 0)
self.assertEqual(Project.objects.count(), 0)


class SnpeffReloadTest(QueueTestCase):
def test(self):
Expand Down
4 changes: 4 additions & 0 deletions tests/samples/batch1/locus_1/MANIFEST
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,7 @@ version = 0
[vcf]
file = locus_1.vcf
md5 = f67a6913ee83345657a6e790c6f5feee

[genome]
name = Genome Reference Consortium GRCh37
version = hg19
30 changes: 22 additions & 8 deletions vdw/samples/pipeline/handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import sts
import time
import vcf
from django.conf import settings
from django.db import connections, transaction
from sts.contextmanagers import transition
from vdw.pipeline import checks, job, ManifestReader
Expand Down Expand Up @@ -51,26 +52,39 @@ def check_sample_section(manifest):
def load_samples(manifest_path, database, **kwargs):
manifest = ManifestReader(manifest_path)

# Ensure the sample is marked to be loaded..
# If a specific version of the genome is required then make sure the
# MANIFEST lists that version or abort the load process.
genome_version = getattr(settings, 'REQUIRED_GENOME_VERSION', None)
if genome_version:
genome_info = manifest.section('genome')

if not genome_info:
log.info('Genome version "{0}" is required but genome section is '
'not included in MANIFEST({1}). Skipping sample.'
.format(genome_version, manifest_path))
return

if genome_version != genome_info['version']:
log.info(
'Genome version "{0}" is required but version "{1}" was found '
'in the MANIFEST({2}). Skipping sample.'
.format(genome_version, genome_info['version'], manifest_path))
return

# Ensure the sample is marked to be loaded.
if not manifest.marked_for_load():
log.info('Sample not marked for load', extra={
'manifest_path': manifest_path,
})
return

# Ensure the sample section is valid..
# Ensure the sample section is valid.
if not check_sample_section(manifest):
log.info('Manifest sample section is not valid', extra={
'manifest_path': manifest_path,
})
return

# [sample]
# project = PCGC
# batch = OTHER
# sample = 1-03131
# version = 1

sample_info = manifest.section('sample')
vcf_info = manifest.section('vcf')

Expand Down

0 comments on commit ecb230a

Please sign in to comment.