Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Removed default profile functionality #135

Merged
merged 3 commits into from
Mar 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
68 changes: 13 additions & 55 deletions sequence_processing_pipeline/Pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,8 +271,7 @@ def _configure_profile(self):
profile_paths.append(some_path)

# There must be at least one valid profile for the Pipeline to
# continue operation. There must also be a default profile, described
# below.
# continue operation.
if not profile_paths:
raise ValueError(f"'{profile_dir}' doesn't contain profile files")

Expand All @@ -293,72 +292,31 @@ def _configure_profile(self):

# the 'profile' attribute must have a dictionary as its value.
# all profiles must contain 'instrument_type' and 'assay_type',
# unless instrument_type == 'default', in which case the
# profile defines the defaults across all instrument-types and
# assay-types.
if 'instrument_type' not in contents['profile']:
raise ValueError("'instrument_type' is not an attribute "
f"in '{profile_path}'.profile")

if 'assay_type' not in contents['profile']:
if contents['profile']['instrument_type'] != 'default':
raise ValueError("'assay_type' is not an attribute "
f"in '{profile_path}'.profile")
raise ValueError("'assay_type' is not an attribute "
f"in '{profile_path}'.profile")

profiles.append(contents)

# The default profile provides 'fall-through' configuration settings
# for all items. This allows the user to not have to redefine settings
# for all items for all instrument and assay combinations.

# the final profile is created by taking the default profile and using
# it as a base. If a profile matching the run-directory's instrument
# and assay types is found, settings from that profile will overwrite
# the base-profile settings as appropriate.
base_profile = None
selected_profile = None

# iterate through all the profiles, searching for a default
# profile and the first profile w/matching instrument and assay types.
# if a matching profile isn't found, that's okay, but if a default
# profile isn't found, then raise an Error.

for profile in profiles:
p_i_type = profile['profile']['instrument_type']
if p_i_type == 'default':
base_profile = profile
else:
p_a_type = profile['profile']['assay_type']
i_type = profile['profile']['instrument_type']
a_type = profile['profile']['assay_type']

# if both items have been found, it's safe to break early.
if base_profile is not None and selected_profile is not None:
break
if i_type == instr_type and a_type == assay_type:
selected_profile = profile
break

if selected_profile is None:
raise ValueError(f"a matching profile ({instr_type}, {assay_type}"
charles-cowart marked this conversation as resolved.
Show resolved Hide resolved
") was not found. Please notify an administrator")

if p_i_type == instr_type and p_a_type == assay_type:
selected_profile = profile

if base_profile is None:
raise ValueError("a 'default' profile was not found")

if selected_profile:
# overwrite the configuration values in the base-profile with those
# in the matching profile as appropriate.
for attribute in selected_profile['profile']['configuration']:
value = selected_profile['profile']['configuration'][attribute]
base_profile['profile']['configuration'][attribute] = value

# overwrite default info w/selected profile (if one was found)
# so that complete profile can be written to working directory
# as a log.
base_profile['profile']['instrument_type'] = instr_type
base_profile['profile']['assay_type'] = assay_type

# load the default first to create a default entry for everything.
# then overwrite the defaults as they appear once you've identified
# the correct (instrument-type, assay-type) pair.
# set this to a new self.config_profile variable and modify the tests
# and code accordingly.
self.config_profile = base_profile
self.config_profile = selected_profile

def _search_for_run_dir(self):
# this method will catch a run directory as well as its products
Expand Down
Original file line number Diff line number Diff line change
@@ -1,17 +1,18 @@
{
"profile": {
"instrument_type": "default",
"instrument_type": "MiSeq",
"assay_type": "TruSeq HT",
"configuration": {
"bcl2fastq": {
"nodes": 1,
"nprocs": 16,
"nodes": 2,
"nprocs": 62,
"queue": "qiita",
"wallclock_time_in_minutes": 216,
"wallclock_time_in_minutes": 1022,
"modules_to_load": [
"bcl2fastq_2.20.0.422"
"bcl2fastq_2.20.0.222"
],
"executable_path": "bcl2fastq",
"per_process_memory_limit": "10gb"
"per_process_memory_limit": "100gb"
},
"bcl-convert": {
"nodes": 1,
Expand Down Expand Up @@ -46,10 +47,10 @@
"job_max_array_length": 1000
},
"nu-qc": {
"nodes": 1,
"cpus_per_task": 8,
"nodes": 2,
"cpus_per_task": 32,
"queue": "qiita",
"wallclock_time_in_minutes": 240,
"wallclock_time_in_minutes": 2028,
"minimap2_databases": "/scratch/databases/minimap2",
"modules_to_load": [
"fastp_0.20.1",
Expand All @@ -64,27 +65,27 @@
"known_adapters_path": "fastp_known_adapters_formatted.fna",
"bucket_size": 8,
"length_limit": 100,
"cores_per_task": 4
"cores_per_task": 2
},
"seqpro": {
"seqpro_path": "seqpro",
"modules_to_load": []
},
"fastqc": {
"nodes": 1,
"nprocs": 16,
"nodes": 2,
"nprocs": 62,
"queue": "qiita",
"nthreads": 16,
"wallclock_time_in_minutes": 60,
"nthreads": 62,
"wallclock_time_in_minutes": 220,
"modules_to_load": [
"fastqc_0.11.5"
],
"fastqc_executable_path": "fastqc",
"multiqc_executable_path": "multiqc",
"multiqc_config_file_path": "sequence_processing_pipeline/multiqc-bclconvert-config.yaml",
"job_total_memory_limit": "20gb",
"job_pool_size": 30,
"job_max_array_length": 1000
"job_pool_size": 120,
"job_max_array_length": 2000
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,38 @@
"executable_path": "bcl2fastq",
"per_process_memory_limit": "100gb"
},
"bcl-convert": {
"nodes": 1,
"nprocs": 16,
"queue": "qiita",
"wallclock_time_in_minutes": 216,
"modules_to_load": [
"bclconvert_3.7.5"
],
"executable_path": "bcl-convert",
"per_process_memory_limit": "10gb"
},
"qc": {
charles-cowart marked this conversation as resolved.
Show resolved Hide resolved
"nodes": 1,
"nprocs": 16,
"queue": "qiita",
"wallclock_time_in_minutes": 60,
"minimap2_databases": [
"/databases/minimap2/human-phix-db.mmi"
],
"kraken2_database": "/databases/minimap2/hp_kraken-db.mmi",
"modules_to_load": [
"fastp_0.20.1",
"samtools_1.12",
"minimap2_2.18"
],
"fastp_executable_path": "fastp",
"minimap2_executable_path": "minimap2",
"samtools_executable_path": "samtools",
"job_total_memory_limit": "20gb",
"job_pool_size": 30,
"job_max_array_length": 1000
},
"nu-qc": {
"nodes": 2,
"cpus_per_task": 32,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,38 @@
"executable_path": "bcl2fastq",
"per_process_memory_limit": "100gb"
},
"bcl-convert": {
"nodes": 1,
"nprocs": 16,
"queue": "qiita",
"wallclock_time_in_minutes": 216,
"modules_to_load": [
"bclconvert_3.7.5"
],
"executable_path": "bcl-convert",
"per_process_memory_limit": "10gb"
},
"qc": {
charles-cowart marked this conversation as resolved.
Show resolved Hide resolved
"nodes": 1,
"nprocs": 16,
"queue": "qiita",
"wallclock_time_in_minutes": 60,
"minimap2_databases": [
"/databases/minimap2/human-phix-db.mmi"
],
"kraken2_database": "/databases/minimap2/hp_kraken-db.mmi",
"modules_to_load": [
"fastp_0.20.1",
"samtools_1.12",
"minimap2_2.18"
],
"fastp_executable_path": "fastp",
"minimap2_executable_path": "minimap2",
"samtools_executable_path": "samtools",
"job_total_memory_limit": "20gb",
"job_pool_size": 30,
"job_max_array_length": 1000
},
"nu-qc": {
"nodes": 4,
"cpus_per_task": 32,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,38 @@
"executable_path": "bcl2fastq",
"per_process_memory_limit": "100gb"
},
"bcl-convert": {
"nodes": 1,
"nprocs": 16,
"queue": "qiita",
"wallclock_time_in_minutes": 216,
"modules_to_load": [
"bclconvert_3.7.5"
],
"executable_path": "bcl-convert",
"per_process_memory_limit": "10gb"
},
"qc": {
"nodes": 1,
"nprocs": 16,
"queue": "qiita",
"wallclock_time_in_minutes": 60,
"minimap2_databases": [
"/databases/minimap2/human-phix-db.mmi"
],
"kraken2_database": "/databases/minimap2/hp_kraken-db.mmi",
"modules_to_load": [
"fastp_0.20.1",
"samtools_1.12",
"minimap2_2.18"
],
"fastp_executable_path": "fastp",
"minimap2_executable_path": "minimap2",
"samtools_executable_path": "samtools",
"job_total_memory_limit": "20gb",
"job_pool_size": 30,
"job_max_array_length": 1000
},
"nu-qc": {
"nodes": 4,
"cpus_per_task": 32,
Expand Down
34 changes: 8 additions & 26 deletions sequence_processing_pipeline/tests/test_Pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from sequence_processing_pipeline.Pipeline import Pipeline, InstrumentUtils
import unittest
from os import makedirs, walk
from os.path import abspath, basename, join
from os.path import abspath, basename, join, exists
from functools import partial
import re
from shutil import copy
Expand Down Expand Up @@ -90,7 +90,9 @@ def delete_rtacomplete_file(self):

def delete_more_files(self):
for file_path in self.delete_these:
os.remove(file_path)
if exists(file_path):
# if file no longer exists, that's okay.
os.remove(file_path)

def _make_mapping_file(self, output_file_path):
cols = ('sample_name', 'barcode', 'library_construction_protocol',
Expand Down Expand Up @@ -309,31 +311,11 @@ def test_creation(self):
self.output_file_path,
self.qiita_id, Pipeline.METAGENOMIC_PTYPE)

# test Error returned when 'assay_type' does not exist in default
# profile. Error should not be returned in this case as default
# shouldn't have an assay_type.
with open(bad_json_file, 'w') as f:
f.write('{ "profile": { "instrument_type": "default", '
'"configuration": { "bcl2fastq": { "nodes": 1, "nprocs": '
'16, "queue": "qiita", "wallclock_time_in_minutes": 216, '
'"modules_to_load": [ "bcl2fastq_2.20.0.422" ], '
'"executable_path": "bcl2fastq", '
'"per_process_memory_limit": "10gb" } } } }')

pipeline = Pipeline(self.good_config_file, self.good_run_id,
self.good_sample_sheet_path, None,
self.output_file_path, self.qiita_id,
Pipeline.METAGENOMIC_PTYPE)

self.assertIsNotNone(pipeline)

# test Error returned when a non-default profile is missing assay_type
bad_json_file = self.path('configuration_profiles', 'bad.json')
self.delete_these.append(bad_json_file)

another_bad_json_file = self.path('configuration_profiles',
'more_bad.json')
self.delete_these.append(another_bad_json_file)

with open(another_bad_json_file, 'w') as f:
with open(bad_json_file, 'w') as f:
f.write('{ "profile": { "instrument_type": "MiSeq", '
'"configuration": { "bcl2fastq": { "nodes": 1, "nprocs": '
'16, "queue": "qiita", "wallclock_time_in_minutes": 216, '
Expand All @@ -345,7 +327,7 @@ def test_creation(self):
"attribute in 'sequence_"
"processing_pipeline/tests/"
"data/configuration_profiles/"
"more_bad.json'"):
"bad.json'"):
Pipeline(self.good_config_file,
self.good_run_id,
self.good_sample_sheet_path, None,
Expand Down