Skip to content

Commit

Permalink
Merge pull request #107 from antgonza/changes-from-qiita-rc-10.2023
Browse files Browse the repository at this point in the history
changes for replicates
  • Loading branch information
charles-cowart committed Oct 12, 2023
2 parents ae0ec58 + b148e55 commit 1715f0c
Show file tree
Hide file tree
Showing 11 changed files with 84 additions and 66 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -127,3 +127,6 @@ dmypy.json

# Pyre type checker
.pyre/

# test output
sequence_processing_pipeline/tests/data/output_dir/
16 changes: 11 additions & 5 deletions sequence_processing_pipeline/GenPrepFileJob.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
class GenPrepFileJob(Job):
def __init__(self, run_dir, convert_job_path, qc_job_path, output_path,
input_file_path, seqpro_path, projects, modules_to_load,
qiita_job_id, is_amplicon=False, has_replicates=False):
qiita_job_id, is_amplicon=False):

super().__init__(run_dir,
output_path,
Expand All @@ -29,7 +29,7 @@ def __init__(self, run_dir, convert_job_path, qc_job_path, output_path,
self.is_amplicon = is_amplicon
self.prep_file_paths = None
self.commands = []
self.has_replicates = has_replicates
self.has_replicates = False
self.replicate_count = 0

# make the 'root' of your run_directory
Expand Down Expand Up @@ -82,11 +82,15 @@ def __init__(self, run_dir, convert_job_path, qc_job_path, output_path,
# parse_prep extended to support parsing pre-prep files as well.
fp = parse_prep(self.input_file_path)
if pre_prep_needs_demuxing(fp):
self.has_replicates = True

# overwrite default setting
file_paths = self._write_to_file(demux_pre_prep(fp))
else:
fp = KLSampleSheet(self.input_file_path)
if sheet_needs_demuxing(fp):
self.has_replicates = True

# overwrite default setting
file_paths = self._write_to_file(demux_sample_sheet(fp))

Expand All @@ -108,18 +112,20 @@ def __init__(self, run_dir, convert_job_path, qc_job_path, output_path,
def _write_to_file(self, demuxed):
'''
Saves the new plate-replicate-specific sample-sheet or pre-prep file
w/a unique name.
w/a unique name. Assume demuxed is a list of DataFrames originating
from a single sample-sheet or pre-prep file.
:param demuxed:
:return:
'''
results = []
for count, replicate in enumerate(demuxed, 1):
if self.is_amplicon:
fp = join(self.output_path, f"sheet_{count}.txt")
replicate['sample_name'] = replicate['orig_name']
fp = join(self.output_path, f"replicate_sheet_{count}.txt")
replicate.to_csv(fp, sep='\t', index=False, header=True)
results.append(fp)
else:
fp = join(self.output_path, f"sheet_{count}.csv")
fp = join(self.output_path, f"replicate_sheet_{count}.csv")
with open(fp, 'w') as f:
replicate.write(f)
results.append(fp)
Expand Down
12 changes: 6 additions & 6 deletions sequence_processing_pipeline/Job.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ def _system_call(self, cmd, allow_return_codes=[], callback=None):
stdout=PIPE, stderr=PIPE)

if callback is not None:
callback(id=proc.pid, status='RUNNING')
callback(jid=proc.pid, status='RUNNING')

# Communicate pulls all stdout/stderr from the PIPEs
# This call blocks until the command is done
Expand All @@ -169,7 +169,7 @@ def _system_call(self, cmd, allow_return_codes=[], callback=None):

if return_code not in acceptable_return_codes:
if callback is not None:
callback(id=proc.pid, status='ERROR')
callback(jid=proc.pid, status='ERROR')
msg = (
'Execute command-line statement failure:\n'
f'Command: {cmd}\n'
Expand All @@ -180,7 +180,7 @@ def _system_call(self, cmd, allow_return_codes=[], callback=None):
raise PipelineError(message=msg)

if callback is not None:
callback(id=proc.pid, status='COMPLETED')
callback(jid=proc.pid, status='COMPLETED')

return {'stdout': stdout, 'stderr': stderr, 'return_code': return_code}

Expand Down Expand Up @@ -257,7 +257,7 @@ def submit_job(self, script_path, job_parameters=None,
job_info['exit_status'] = f'{estatuses}'

if callback is not None:
callback(id=job_id, status=f'{states}')
callback(jid=job_id, status=f'{states}')

logging.debug("Job info: %s" % job_info)

Expand All @@ -271,7 +271,7 @@ def submit_job(self, script_path, job_parameters=None,
if job_info['job_id'] is not None:
# job was once in the queue
if callback is not None:
callback(id=job_id, status=job_info['job_state'])
callback(jid=job_id, status=job_info['job_state'])

if set(states) == {'COMPLETED'}:
if 'exit_status' in job_info:
Expand All @@ -292,7 +292,7 @@ def submit_job(self, script_path, job_parameters=None,
else:
# job was never in the queue - return an error.
if callback is not None:
callback(id=job_id, status='ERROR')
callback(jid=job_id, status='ERROR')

raise PipelineError("job %s never appeared in the queue." % job_id)

Expand Down
31 changes: 16 additions & 15 deletions sequence_processing_pipeline/Pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,19 +34,20 @@ class Pipeline:
None, 32.5, -117.25, 'control blank', 'metagenome', 256318,
None, 'adaptation', 'TRUE', 'UCSD', 'FALSE']

mapping_file_columns = {'barcode', 'library_construction_protocol',
'mastermix_lot', 'sample_plate',
'center_project_name', 'instrument_model',
'tm1000_8_tool', 'well_id', 'tm50_8_tool',
'well_description', 'run_prefix', 'run_date',
'center_name', 'tm300_8_tool', 'extraction_robot',
'experiment_design_description', 'platform',
'water_lot', 'project_name', 'pcr_primers',
'sequencing_meth', 'plating', 'orig_name',
'linker', 'runid', 'target_subfragment', 'primer',
'primer_plate', 'sample_name', 'run_center',
'primer_date', 'target_gene', 'processing_robot',
'extractionkit_lot', 'qiita_prep_id'}
mapping_file_columns = {'sample_name', 'barcode', 'center_name',
'center_project_name',
'experiment_design_description',
'instrument_model',
'library_construction_protocol',
'platform', 'run_center', 'run_date', 'run_prefix',
'runid', 'sample_plate', 'sequencing_meth',
'linker', 'primer', 'primer_plate', 'well_id_384',
'plating', 'extractionkit_lot', 'extraction_robot',
'tm1000_8_tool', 'primer_date', 'mastermix_lot',
'water_lot', 'processing_robot', 'tm300_8_tool',
'tm50_8_tool', 'project_name', 'orig_name',
'well_description', 'pcr_primers', 'target_gene',
'tm10_8_tool', 'target_subfragment', 'well_id_96'}

METAGENOMIC_PTYPE = 'Metagenomic'
METATRANSCRIPTOMIC_PTYPE = 'Metatranscriptomic'
Expand Down Expand Up @@ -209,7 +210,7 @@ def run(self, callback=None):
"""
Run all jobs added to Pipeline in the order they were added.
:param callback: Optional function to call and upstate status with.
:param callback(id=): a string identifying the current running process.
:param callback(jid=): string identifying the current running process.
:param callback(status=): a string message or description.
:return:
"""
Expand Down Expand Up @@ -344,7 +345,7 @@ def _validate_mapping_file(self, mapping_file_path):
missing_columns = Pipeline.mapping_file_columns - obs
if missing_columns:
raise PipelineError("Mapping-file is missing columns: "
"%s" % ', '.join(missing_columns))
"%s" % ', '.join(sorted(missing_columns)))

# if an observed column is unexpected, that is a warning.
unexpected_columns = obs - Pipeline.mapping_file_columns
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Sample_Name BARCODE center_name center_project_name experiment_design_description extraction_robot extractionkit_lot instrument_model library_construction_protocol linker mastermix_lot orig_name pcr_primers platform plating primer primer_date primer_plate processing_robot project_name qiita_prep_id run_center run_date run_prefix runid sample_plate sequencing_meth target_gene target_subfragment tm1000_8_tool tm300_8_tool tm50_8_tool water_lot well_description well_id
Sample_Name BARCODE center_name center_project_name experiment_design_description extraction_robot extractionkit_lot instrument_model library_construction_protocol linker mastermix_lot orig_name pcr_primers platform plating primer primer_date primer_plate processing_robot project_name qiita_prep_id run_center run_date run_prefix runid sample_plate sequencing_meth target_gene target_subfragment tm1000_8_tool tm300_8_tool tm50_8_tool water_lot well_description well_id_96 well_id_384 tm10_8_tool
11.1.21.RK.FH TATGCCAGAGAT UCSDMI Knight_ABTX Samples from ABTX NA NA Illumina MiSeq Illumina EMP protocol 515fbc, 806r amplification of 16S rRNA V4 GT 1331807 11.1.21.RK.FH FWD:GTGYCAGCMGCCGCGGTAA; REV:GGACTACNVGGGTWTCTAAT Illumina LNH GTGTGYCAGCMGCCGCGGTAA 122822 1 Echo 550 ABTX_20230208_ABTX_11052 14339 UCSDMI 2/7/23 ABTX_Plates_238_239_240_242_S1_L001 230207_M05314_0346_000000000-KVMGL ABTX_20230208_11052_Plate_238 Sequencing by synthesis 16S rRNA V4 108379Z NA NA 1317793 ABTX_20230208_11052_Plate_238_11.1.21.RK.FH_C4 C4
11.1.21.RK.LH ATCTAGTGGCAA UCSDMI Knight_ABTX Samples from ABTX NA NA Illumina MiSeq Illumina EMP protocol 515fbc, 806r amplification of 16S rRNA V4 GT 1331807 11.1.21.RK.LH FWD:GTGYCAGCMGCCGCGGTAA; REV:GGACTACNVGGGTWTCTAAT Illumina LNH GTGTGYCAGCMGCCGCGGTAA 122822 1 Echo 550 ABTX_20230208_ABTX_11052 14339 UCSDMI 2/7/23 ABTX_Plates_238_239_240_242_S1_L001 230207_M05314_0346_000000000-KVMGL ABTX_20230208_11052_Plate_238 Sequencing by synthesis 16S rRNA V4 108379Z NA NA 1317793 ABTX_20230208_11052_Plate_238_11.1.21.RK.LH_D4 D4
11.1.21.RK.RH TTCCTTAGTAGT UCSDMI Knight_ABTX Samples from ABTX NA NA Illumina MiSeq Illumina EMP protocol 515fbc, 806r amplification of 16S rRNA V4 GT 1331807 11.1.21.RK.RH FWD:GTGYCAGCMGCCGCGGTAA; REV:GGACTACNVGGGTWTCTAAT Illumina LNH GTGTGYCAGCMGCCGCGGTAA 122822 1 Echo 550 ABTX_20230208_ABTX_11052 14339 UCSDMI 2/7/23 ABTX_Plates_238_239_240_242_S1_L001 230207_M05314_0346_000000000-KVMGL ABTX_20230208_11052_Plate_238 Sequencing by synthesis 16S rRNA V4 108379Z NA NA 1317793 ABTX_20230208_11052_Plate_238_11.1.21.RK.RH_B4 B4
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
sample_name barcode center_name Barcode center_project_name experiment_design_description extraction_robot extractionkit_lot instrument_model library_construction_protocol linker mastermix_lot orig_name pcr_primers platform plating primer primer_date primer_plate processing_robot project_name qiita_prep_id run_center run_date run_prefix runid sample_plate sequencing_meth target_gene target_subfragment tm1000_8_tool tm300_8_tool tm50_8_tool water_lot well_description well_id
sample_name barcode center_name Barcode center_project_name experiment_design_description extraction_robot extractionkit_lot instrument_model library_construction_protocol linker mastermix_lot orig_name pcr_primers platform plating primer primer_date primer_plate processing_robot project_name qiita_prep_id run_center run_date run_prefix runid sample_plate sequencing_meth target_gene target_subfragment tm1000_8_tool tm300_8_tool tm50_8_tool water_lot well_description well_id_96
11.1.21.RK.FH TATGCCAGAGAT UCSDMI TATGCCAGAGAT Knight_ABTX Samples from ABTX NA NA Illumina MiSeq "Illumina EMP protocol 515fbc, 806r amplification of 16S rRNA V4" GT 1331807 11.1.21.RK.FH FWD:GTGYCAGCMGCCGCGGTAA; REV:GGACTACNVGGGTWTCTAAT Illumina LNH GTGTGYCAGCMGCCGCGGTAA 122822 1 Echo 550 ABTX_20230208_ABTX_11052 14339 UCSDMI 2/7/23 ABTX_Plates_238_239_240_242_S1_L001 230207_M05314_0346_000000000-KVMGL ABTX_20230208_11052_Plate_238 Sequencing by synthesis 16S rRNA V4 108379Z NA NA 1317793 ABTX_20230208_11052_Plate_238_11.1.21.RK.FH_C4 C4
11.1.21.RK.LH ATCTAGTGGCAA UCSDMI ATCTAGTGGCAA Knight_ABTX Samples from ABTX NA NA Illumina MiSeq "Illumina EMP protocol 515fbc, 806r amplification of 16S rRNA V4" GT 1331807 11.1.21.RK.LH FWD:GTGYCAGCMGCCGCGGTAA; REV:GGACTACNVGGGTWTCTAAT Illumina LNH GTGTGYCAGCMGCCGCGGTAA 122822 1 Echo 550 ABTX_20230208_ABTX_11052 14339 UCSDMI 2/7/23 ABTX_Plates_238_239_240_242_S1_L001 230207_M05314_0346_000000000-KVMGL ABTX_20230208_11052_Plate_238 Sequencing by synthesis 16S rRNA V4 108379Z NA NA 1317793 ABTX_20230208_11052_Plate_238_11.1.21.RK.LH_D4 D4
11.1.21.RK.FH TATGCCAGAGAT UCSDMI TATGCCAGAGAT Knight_ABTX Samples from ABTX NA NA Illumina MiSeq "Illumina EMP protocol 515fbc, 806r amplification of 16S rRNA V4" GT 1331807 11.1.21.RK.FH FWD:GTGYCAGCMGCCGCGGTAA; REV:GGACTACNVGGGTWTCTAAT Illumina LNH GTGTGYCAGCMGCCGCGGTAA 122822 1 Echo 550 ABTX_20230208_ABTX_11052 14339 UCSDMI 2/7/23 ABTX_Plates_238_239_240_242_S1_L001 230207_M05314_0346_000000000-KVMGL ABTX_20230208_11052_Plate_238 Sequencing by synthesis 16S rRNA V4 108379Z NA NA 1317793 ABTX_20230208_11052_Plate_238_11.1.21.RK.FH_C4 C4
Expand Down Expand Up @@ -384,4 +384,4 @@ BLANK242.9D CGTCGCAGCCTT UCSDMI CGTCGCAGCCTT Knight_ABTX Samples from ABTX NA NA
BLANK242.9E AGCGATTCCTCG UCSDMI AGCGATTCCTCG Knight_ABTX Samples from ABTX NA NA Illumina MiSeq "Illumina EMP protocol 515fbc, 806r amplification of 16S rRNA V221" GT 1331807 BLANK242.9E FWD:GTGYCAGCMGCCGCGGTAA; REV:GGACTACNVGGGTWTCTAAT Illumina MCM GTGTGYCAGCMGCCGCGGTAA 122822 4 Echo 550 ABTX_20230208_ABTX_11052 14339 UCSDMI 2/7/23 ABTX_Plates_238_239_240_242_S1_L001 230207_M05314_0346_000000000-KVMGL ABTX_20230208_11052_Plate_242 Sequencing by synthesis 16S rRNA V4 108379Z NA NA 1317793 ABTX_20230208_11052_Plate_242_BLANK242.9E_E9 E9
BLANK242.9F CGTGAGGACCAG UCSDMI CGTGAGGACCAG Knight_ABTX Samples from ABTX NA NA Illumina MiSeq "Illumina EMP protocol 515fbc, 806r amplification of 16S rRNA V233" GT 1331807 BLANK242.9F FWD:GTGYCAGCMGCCGCGGTAA; REV:GGACTACNVGGGTWTCTAAT Illumina MCM GTGTGYCAGCMGCCGCGGTAA 122822 4 Echo 550 ABTX_20230208_ABTX_11052 14339 UCSDMI 2/7/23 ABTX_Plates_238_239_240_242_S1_L001 230207_M05314_0346_000000000-KVMGL ABTX_20230208_11052_Plate_242 Sequencing by synthesis 16S rRNA V4 108379Z NA NA 1317793 ABTX_20230208_11052_Plate_242_BLANK242.9F_F9 F9
BLANK242.9G AGAGACGCGTAG UCSDMI AGAGACGCGTAG Knight_ABTX Samples from ABTX NA NA Illumina MiSeq "Illumina EMP protocol 515fbc, 806r amplification of 16S rRNA V245" GT 1331807 BLANK242.9G FWD:GTGYCAGCMGCCGCGGTAA; REV:GGACTACNVGGGTWTCTAAT Illumina MCM GTGTGYCAGCMGCCGCGGTAA 122822 4 Echo 550 ABTX_20230208_ABTX_11052 14339 UCSDMI 2/7/23 ABTX_Plates_238_239_240_242_S1_L001 230207_M05314_0346_000000000-KVMGL ABTX_20230208_11052_Plate_242 Sequencing by synthesis 16S rRNA V4 108379Z NA NA 1317793 ABTX_20230208_11052_Plate_242_BLANK242.9G_G9 G9
BLANK242.9H GGACCTGGATGG UCSDMI GGACCTGGATGG Knight_ABTX Samples from ABTX NA NA Illumina MiSeq "Illumina EMP protocol 515fbc, 806r amplification of 16S rRNA V257" GT 1331807 BLANK242.9H FWD:GTGYCAGCMGCCGCGGTAA; REV:GGACTACNVGGGTWTCTAAT Illumina MCM GTGTGYCAGCMGCCGCGGTAA 122822 4 Echo 550 ABTX_20230208_ABTX_11052 14339 UCSDMI 2/7/23 ABTX_Plates_238_239_240_242_S1_L001 230207_M05314_0346_000000000-KVMGL ABTX_20230208_11052_Plate_242 Sequencing by synthesis 16S rRNA V4 108379Z NA NA 1317793 ABTX_20230208_11052_Plate_242_BLANK242.9H_H9 H9
BLANK242.9H GGACCTGGATGG UCSDMI GGACCTGGATGG Knight_ABTX Samples from ABTX NA NA Illumina MiSeq "Illumina EMP protocol 515fbc, 806r amplification of 16S rRNA V257" GT 1331807 BLANK242.9H FWD:GTGYCAGCMGCCGCGGTAA; REV:GGACTACNVGGGTWTCTAAT Illumina MCM GTGTGYCAGCMGCCGCGGTAA 122822 4 Echo 550 ABTX_20230208_ABTX_11052 14339 UCSDMI 2/7/23 ABTX_Plates_238_239_240_242_S1_L001 230207_M05314_0346_000000000-KVMGL ABTX_20230208_11052_Plate_242 Sequencing by synthesis 16S rRNA V4 108379Z NA NA 1317793 ABTX_20230208_11052_Plate_242_BLANK242.9H_H9 H9
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
sample_name barcode center_name center_project_name experiment_design_description extraction_robot extractionkit_lot instrument_model library_construction_protocol linker mastermix_lot orig_name pcr_primers platform plating primer primer_date primer_plate processing_robot project_name qiita_prep_id run_center run_date run_prefix runid sample_plate sequencing_meth target_gene target_subfragment tm1000_8_tool tm300_8_tool water_lot well_description well_id
sample_name barcode center_name center_project_name experiment_design_description extraction_robot extractionkit_lot instrument_model library_construction_protocol linker mastermix_lot orig_name pcr_primers platform plating primer primer_date primer_plate processing_robot project_name qiita_prep_id run_center run_date run_prefix runid sample_plate sequencing_meth target_gene target_subfragment tm1000_8_tool tm300_8_tool water_lot well_description well_id_96
11.1.21.RK.FH TATGCCAGAGAT UCSDMI Knight_ABTX Samples from ABTX NA NA Illumina MiSeq "Illumina EMP protocol 515fbc, 806r amplification of 16S rRNA V4" GT 1331807 11.1.21.RK.FH FWD:GTGYCAGCMGCCGCGGTAA; REV:GGACTACNVGGGTWTCTAAT Illumina LNH GTGTGYCAGCMGCCGCGGTAA 122822 1 Echo 550 ABTX_20230208_ABTX_11052 14339 UCSDMI 2/7/23 ABTX_Plates_238_239_240_242_S1_L001 230207_M05314_0346_000000000-KVMGL ABTX_20230208_11052_Plate_238 Sequencing by synthesis 16S rRNA V4 108379Z NA 1317793 ABTX_20230208_11052_Plate_238_11.1.21.RK.FH_C4 C4
11.1.21.RK.LH ATCTAGTGGCAA UCSDMI Knight_ABTX Samples from ABTX NA NA Illumina MiSeq "Illumina EMP protocol 515fbc, 806r amplification of 16S rRNA V4" GT 1331807 11.1.21.RK.LH FWD:GTGYCAGCMGCCGCGGTAA; REV:GGACTACNVGGGTWTCTAAT Illumina LNH GTGTGYCAGCMGCCGCGGTAA 122822 1 Echo 550 ABTX_20230208_ABTX_11052 14339 UCSDMI 2/7/23 ABTX_Plates_238_239_240_242_S1_L001 230207_M05314_0346_000000000-KVMGL ABTX_20230208_11052_Plate_238 Sequencing by synthesis 16S rRNA V4 108379Z NA 1317793 ABTX_20230208_11052_Plate_238_11.1.21.RK.LH_D4 D4
11.1.21.RK.RH TTCCTTAGTAGT UCSDMI Knight_ABTX Samples from ABTX NA NA Illumina MiSeq "Illumina EMP protocol 515fbc, 806r amplification of 16S rRNA V4" GT 1331807 11.1.21.RK.RH FWD:GTGYCAGCMGCCGCGGTAA; REV:GGACTACNVGGGTWTCTAAT Illumina LNH GTGTGYCAGCMGCCGCGGTAA 122822 1 Echo 550 ABTX_20230208_ABTX_11052 14339 UCSDMI 2/7/23 ABTX_Plates_238_239_240_242_S1_L001 230207_M05314_0346_000000000-KVMGL ABTX_20230208_11052_Plate_238 Sequencing by synthesis 16S rRNA V4 108379Z NA 1317793 ABTX_20230208_11052_Plate_238_11.1.21.RK.RH_B4 B4
Expand Down
Loading

0 comments on commit 1715f0c

Please sign in to comment.