Skip to content

Commit

Permalink
Merge 6ce76e2 into a7fa8ea
Browse files Browse the repository at this point in the history
  • Loading branch information
charles-cowart committed Mar 14, 2024
2 parents a7fa8ea + 6ce76e2 commit 2350f03
Show file tree
Hide file tree
Showing 6 changed files with 134 additions and 97 deletions.
68 changes: 13 additions & 55 deletions sequence_processing_pipeline/Pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,8 +271,7 @@ def _configure_profile(self):
profile_paths.append(some_path)

# There must be at least one valid profile for the Pipeline to
# continue operation. There must also be a default profile, described
# below.
# continue operation.
if not profile_paths:
raise ValueError(f"'{profile_dir}' doesn't contain profile files")

Expand All @@ -293,72 +292,31 @@ def _configure_profile(self):

# the 'profile' attribute must have a dictionary as its value.
# all profiles must contain 'instrument_type' and 'assay_type',
# unless instrument_type == 'default', in which case the
# profile defines the defaults across all instrument-types and
# assay-types.
if 'instrument_type' not in contents['profile']:
raise ValueError("'instrument_type' is not an attribute "
f"in '{profile_path}'.profile")

if 'assay_type' not in contents['profile']:
if contents['profile']['instrument_type'] != 'default':
raise ValueError("'assay_type' is not an attribute "
f"in '{profile_path}'.profile")
raise ValueError("'assay_type' is not an attribute "
f"in '{profile_path}'.profile")

profiles.append(contents)

# The default profile provides 'fall-through' configuration settings
# for all items. This allows the user to not have to redefine settings
# for all items for all instrument and assay combinations.

# the final profile is created by taking the default profile and using
# it as a base. If a profile matching the run-directory's instrument
# and assay types is found, settings from that profile will overwrite
# the base-profile settings as appropriate.
base_profile = None
selected_profile = None

# iterate through all the profiles, searching for a default
# profile and the first profile w/matching instrument and assay types.
# if a matching profile isn't found, that's okay, but if a default
# profile isn't found, then raise an Error.

for profile in profiles:
p_i_type = profile['profile']['instrument_type']
if p_i_type == 'default':
base_profile = profile
else:
p_a_type = profile['profile']['assay_type']
i_type = profile['profile']['instrument_type']
a_type = profile['profile']['assay_type']

# if both items have been found, it's safe to break early.
if base_profile is not None and selected_profile is not None:
break
if i_type == instr_type and a_type == assay_type:
selected_profile = profile
break

if selected_profile is None:
raise ValueError(f"a matching profile ({instr_type}, {assay_type}"
") was not found. Please notify an administrator")

if p_i_type == instr_type and p_a_type == assay_type:
selected_profile = profile

if base_profile is None:
raise ValueError("a 'default' profile was not found")

if selected_profile:
# overwrite the configuration values in the base-profile with those
# in the matching profile as appropriate.
for attribute in selected_profile['profile']['configuration']:
value = selected_profile['profile']['configuration'][attribute]
base_profile['profile']['configuration'][attribute] = value

# overwrite default info w/selected profile (if one was found)
# so that complete profile can be written to working directory
# as a log.
base_profile['profile']['instrument_type'] = instr_type
base_profile['profile']['assay_type'] = assay_type

# load the default first to create a default entry for everything.
# then overwrite the defaults as they appear once you've identified
# the correct (instrument-type, assay-type) pair.
# set this to a new self.config_profile variable and modify the tests
# and code accordingly.
self.config_profile = base_profile
self.config_profile = selected_profile

def _search_for_run_dir(self):
# this method will catch a run directory as well as its products
Expand Down
Original file line number Diff line number Diff line change
@@ -1,17 +1,18 @@
{
"profile": {
"instrument_type": "default",
"instrument_type": "MiSeq",
"assay_type": "TruSeq HT",
"configuration": {
"bcl2fastq": {
"nodes": 1,
"nprocs": 16,
"nodes": 2,
"nprocs": 62,
"queue": "qiita",
"wallclock_time_in_minutes": 216,
"wallclock_time_in_minutes": 1022,
"modules_to_load": [
"bcl2fastq_2.20.0.422"
"bcl2fastq_2.20.0.222"
],
"executable_path": "bcl2fastq",
"per_process_memory_limit": "10gb"
"per_process_memory_limit": "100gb"
},
"bcl-convert": {
"nodes": 1,
Expand Down Expand Up @@ -46,10 +47,10 @@
"job_max_array_length": 1000
},
"nu-qc": {
"nodes": 1,
"cpus_per_task": 8,
"nodes": 2,
"cpus_per_task": 32,
"queue": "qiita",
"wallclock_time_in_minutes": 240,
"wallclock_time_in_minutes": 2028,
"minimap2_databases": "/scratch/databases/minimap2",
"modules_to_load": [
"fastp_0.20.1",
Expand All @@ -64,27 +65,27 @@
"known_adapters_path": "fastp_known_adapters_formatted.fna",
"bucket_size": 8,
"length_limit": 100,
"cores_per_task": 4
"cores_per_task": 2
},
"seqpro": {
"seqpro_path": "seqpro",
"modules_to_load": []
},
"fastqc": {
"nodes": 1,
"nprocs": 16,
"nodes": 2,
"nprocs": 62,
"queue": "qiita",
"nthreads": 16,
"wallclock_time_in_minutes": 60,
"nthreads": 62,
"wallclock_time_in_minutes": 220,
"modules_to_load": [
"fastqc_0.11.5"
],
"fastqc_executable_path": "fastqc",
"multiqc_executable_path": "multiqc",
"multiqc_config_file_path": "sequence_processing_pipeline/multiqc-bclconvert-config.yaml",
"job_total_memory_limit": "20gb",
"job_pool_size": 30,
"job_max_array_length": 1000
"job_pool_size": 120,
"job_max_array_length": 2000
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,38 @@
"executable_path": "bcl2fastq",
"per_process_memory_limit": "100gb"
},
"bcl-convert": {
"nodes": 1,
"nprocs": 16,
"queue": "qiita",
"wallclock_time_in_minutes": 216,
"modules_to_load": [
"bclconvert_3.7.5"
],
"executable_path": "bcl-convert",
"per_process_memory_limit": "10gb"
},
"qc": {
"nodes": 1,
"nprocs": 16,
"queue": "qiita",
"wallclock_time_in_minutes": 60,
"minimap2_databases": [
"/databases/minimap2/human-phix-db.mmi"
],
"kraken2_database": "/databases/minimap2/hp_kraken-db.mmi",
"modules_to_load": [
"fastp_0.20.1",
"samtools_1.12",
"minimap2_2.18"
],
"fastp_executable_path": "fastp",
"minimap2_executable_path": "minimap2",
"samtools_executable_path": "samtools",
"job_total_memory_limit": "20gb",
"job_pool_size": 30,
"job_max_array_length": 1000
},
"nu-qc": {
"nodes": 2,
"cpus_per_task": 32,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,38 @@
"executable_path": "bcl2fastq",
"per_process_memory_limit": "100gb"
},
"bcl-convert": {
"nodes": 1,
"nprocs": 16,
"queue": "qiita",
"wallclock_time_in_minutes": 216,
"modules_to_load": [
"bclconvert_3.7.5"
],
"executable_path": "bcl-convert",
"per_process_memory_limit": "10gb"
},
"qc": {
"nodes": 1,
"nprocs": 16,
"queue": "qiita",
"wallclock_time_in_minutes": 60,
"minimap2_databases": [
"/databases/minimap2/human-phix-db.mmi"
],
"kraken2_database": "/databases/minimap2/hp_kraken-db.mmi",
"modules_to_load": [
"fastp_0.20.1",
"samtools_1.12",
"minimap2_2.18"
],
"fastp_executable_path": "fastp",
"minimap2_executable_path": "minimap2",
"samtools_executable_path": "samtools",
"job_total_memory_limit": "20gb",
"job_pool_size": 30,
"job_max_array_length": 1000
},
"nu-qc": {
"nodes": 4,
"cpus_per_task": 32,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,38 @@
"executable_path": "bcl2fastq",
"per_process_memory_limit": "100gb"
},
"bcl-convert": {
"nodes": 1,
"nprocs": 16,
"queue": "qiita",
"wallclock_time_in_minutes": 216,
"modules_to_load": [
"bclconvert_3.7.5"
],
"executable_path": "bcl-convert",
"per_process_memory_limit": "10gb"
},
"qc": {
"nodes": 1,
"nprocs": 16,
"queue": "qiita",
"wallclock_time_in_minutes": 60,
"minimap2_databases": [
"/databases/minimap2/human-phix-db.mmi"
],
"kraken2_database": "/databases/minimap2/hp_kraken-db.mmi",
"modules_to_load": [
"fastp_0.20.1",
"samtools_1.12",
"minimap2_2.18"
],
"fastp_executable_path": "fastp",
"minimap2_executable_path": "minimap2",
"samtools_executable_path": "samtools",
"job_total_memory_limit": "20gb",
"job_pool_size": 30,
"job_max_array_length": 1000
},
"nu-qc": {
"nodes": 4,
"cpus_per_task": 32,
Expand Down
34 changes: 8 additions & 26 deletions sequence_processing_pipeline/tests/test_Pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from sequence_processing_pipeline.Pipeline import Pipeline, InstrumentUtils
import unittest
from os import makedirs, walk
from os.path import abspath, basename, join
from os.path import abspath, basename, join, exists
from functools import partial
import re
from shutil import copy
Expand Down Expand Up @@ -90,7 +90,9 @@ def delete_rtacomplete_file(self):

def delete_more_files(self):
for file_path in self.delete_these:
os.remove(file_path)
if exists(file_path):
# if file no longer exists, that's okay.
os.remove(file_path)

def _make_mapping_file(self, output_file_path):
cols = ('sample_name', 'barcode', 'library_construction_protocol',
Expand Down Expand Up @@ -309,31 +311,11 @@ def test_creation(self):
self.output_file_path,
self.qiita_id, Pipeline.METAGENOMIC_PTYPE)

# test Error returned when 'assay_type' does not exist in default
# profile. Error should not be returned in this case as default
# shouldn't have an assay_type.
with open(bad_json_file, 'w') as f:
f.write('{ "profile": { "instrument_type": "default", '
'"configuration": { "bcl2fastq": { "nodes": 1, "nprocs": '
'16, "queue": "qiita", "wallclock_time_in_minutes": 216, '
'"modules_to_load": [ "bcl2fastq_2.20.0.422" ], '
'"executable_path": "bcl2fastq", '
'"per_process_memory_limit": "10gb" } } } }')

pipeline = Pipeline(self.good_config_file, self.good_run_id,
self.good_sample_sheet_path, None,
self.output_file_path, self.qiita_id,
Pipeline.METAGENOMIC_PTYPE)

self.assertIsNotNone(pipeline)

# test Error returned when a non-default profile is missing assay_type
bad_json_file = self.path('configuration_profiles', 'bad.json')
self.delete_these.append(bad_json_file)

another_bad_json_file = self.path('configuration_profiles',
'more_bad.json')
self.delete_these.append(another_bad_json_file)

with open(another_bad_json_file, 'w') as f:
with open(bad_json_file, 'w') as f:
f.write('{ "profile": { "instrument_type": "MiSeq", '
'"configuration": { "bcl2fastq": { "nodes": 1, "nprocs": '
'16, "queue": "qiita", "wallclock_time_in_minutes": 216, '
Expand All @@ -345,7 +327,7 @@ def test_creation(self):
"attribute in 'sequence_"
"processing_pipeline/tests/"
"data/configuration_profiles/"
"more_bad.json'"):
"bad.json'"):
Pipeline(self.good_config_file,
self.good_run_id,
self.good_sample_sheet_path, None,
Expand Down

0 comments on commit 2350f03

Please sign in to comment.