From c2233ceb770d20e4eb70b85a0294943fd3ae62c6 Mon Sep 17 00:00:00 2001 From: Antonio Gonzalez Date: Wed, 11 Oct 2023 11:37:04 -0600 Subject: [PATCH 1/5] changes for replicates --- .../GenPrepFileJob.py | 15 +++++++--- sequence_processing_pipeline/Job.py | 12 ++++---- sequence_processing_pipeline/Pipeline.py | 29 ++++++++++--------- .../tests/test_GenPrepFileJob.py | 18 ++++++++---- .../tests/test_Job.py | 4 +-- 5 files changed, 46 insertions(+), 32 deletions(-) diff --git a/sequence_processing_pipeline/GenPrepFileJob.py b/sequence_processing_pipeline/GenPrepFileJob.py index 63dc056b..e5b58c90 100644 --- a/sequence_processing_pipeline/GenPrepFileJob.py +++ b/sequence_processing_pipeline/GenPrepFileJob.py @@ -13,7 +13,7 @@ class GenPrepFileJob(Job): def __init__(self, run_dir, convert_job_path, qc_job_path, output_path, input_file_path, seqpro_path, projects, modules_to_load, - qiita_job_id, is_amplicon=False, has_replicates=False): + qiita_job_id, is_amplicon=False): super().__init__(run_dir, output_path, @@ -29,7 +29,7 @@ def __init__(self, run_dir, convert_job_path, qc_job_path, output_path, self.is_amplicon = is_amplicon self.prep_file_paths = None self.commands = [] - self.has_replicates = has_replicates + self.has_replicates = False self.replicate_count = 0 # make the 'root' of your run_directory @@ -82,11 +82,15 @@ def __init__(self, run_dir, convert_job_path, qc_job_path, output_path, # parse_prep extended to support parsing pre-prep files as well. fp = parse_prep(self.input_file_path) if pre_prep_needs_demuxing(fp): + self.has_replicates = True + # overwrite default setting file_paths = self._write_to_file(demux_pre_prep(fp)) else: fp = KLSampleSheet(self.input_file_path) if sheet_needs_demuxing(fp): + self.has_replicates = True + # overwrite default setting file_paths = self._write_to_file(demux_sample_sheet(fp)) @@ -108,18 +112,21 @@ def __init__(self, run_dir, convert_job_path, qc_job_path, output_path, def _write_to_file(self, demuxed): ''' Saves the new plate-replicate-specific sample-sheet or pre-prep file - w/a unique name. + w/a unique name. Assume demuxed is a list of DataFrames originating + from a single sample-sheet or pre-prep file. :param demuxed: :return: ''' results = [] for count, replicate in enumerate(demuxed, 1): if self.is_amplicon: + # hack to make demuxed pre-prep files comply with the + # columns expected for a reglar muxed file. fp = join(self.output_path, f"sheet_{count}.txt") replicate.to_csv(fp, sep='\t', index=False, header=True) results.append(fp) else: - fp = join(self.output_path, f"sheet_{count}.csv") + fp = join(self.output_path, f"replicate_sheet_{count}.csv") with open(fp, 'w') as f: replicate.write(f) results.append(fp) diff --git a/sequence_processing_pipeline/Job.py b/sequence_processing_pipeline/Job.py index e8afafac..e91e8beb 100644 --- a/sequence_processing_pipeline/Job.py +++ b/sequence_processing_pipeline/Job.py @@ -154,7 +154,7 @@ def _system_call(self, cmd, allow_return_codes=[], callback=None): stdout=PIPE, stderr=PIPE) if callback is not None: - callback(id=proc.pid, status='RUNNING') + callback(jid=proc.pid, status='RUNNING') # Communicate pulls all stdout/stderr from the PIPEs # This call blocks until the command is done @@ -169,7 +169,7 @@ def _system_call(self, cmd, allow_return_codes=[], callback=None): if return_code not in acceptable_return_codes: if callback is not None: - callback(id=proc.pid, status='ERROR') + callback(jid=proc.pid, status='ERROR') msg = ( 'Execute command-line statement failure:\n' f'Command: {cmd}\n' @@ -180,7 +180,7 @@ def _system_call(self, cmd, allow_return_codes=[], callback=None): raise PipelineError(message=msg) if callback is not None: - callback(id=proc.pid, status='COMPLETED') + callback(jid=proc.pid, status='COMPLETED') return {'stdout': stdout, 'stderr': stderr, 'return_code': return_code} @@ -257,7 +257,7 @@ def submit_job(self, script_path, job_parameters=None, job_info['exit_status'] = f'{estatuses}' if callback is not None: - callback(id=job_id, status=f'{states}') + callback(jid=job_id, status=f'{states}') logging.debug("Job info: %s" % job_info) @@ -271,7 +271,7 @@ def submit_job(self, script_path, job_parameters=None, if job_info['job_id'] is not None: # job was once in the queue if callback is not None: - callback(id=job_id, status=job_info['job_state']) + callback(jid=job_id, status=job_info['job_state']) if set(states) == {'COMPLETED'}: if 'exit_status' in job_info: @@ -292,7 +292,7 @@ def submit_job(self, script_path, job_parameters=None, else: # job was never in the queue - return an error. if callback is not None: - callback(id=job_id, status='ERROR') + callback(jid=job_id, status='ERROR') raise PipelineError("job %s never appeared in the queue." % job_id) diff --git a/sequence_processing_pipeline/Pipeline.py b/sequence_processing_pipeline/Pipeline.py index f9bd8f69..15be734c 100644 --- a/sequence_processing_pipeline/Pipeline.py +++ b/sequence_processing_pipeline/Pipeline.py @@ -34,19 +34,20 @@ class Pipeline: None, 32.5, -117.25, 'control blank', 'metagenome', 256318, None, 'adaptation', 'TRUE', 'UCSD', 'FALSE'] - mapping_file_columns = {'barcode', 'library_construction_protocol', - 'mastermix_lot', 'sample_plate', - 'center_project_name', 'instrument_model', - 'tm1000_8_tool', 'well_id', 'tm50_8_tool', - 'well_description', 'run_prefix', 'run_date', - 'center_name', 'tm300_8_tool', 'extraction_robot', - 'experiment_design_description', 'platform', - 'water_lot', 'project_name', 'pcr_primers', - 'sequencing_meth', 'plating', 'orig_name', - 'linker', 'runid', 'target_subfragment', 'primer', - 'primer_plate', 'sample_name', 'run_center', - 'primer_date', 'target_gene', 'processing_robot', - 'extractionkit_lot', 'qiita_prep_id'} + mapping_file_columns = {'sample_name', 'barcode', 'center_name', + 'center_project_name', + 'experiment_design_description', + 'instrument_model', + 'library_construction_protocol', + 'platform', 'run_center', 'run_date', 'run_prefix', + 'runid', 'sample_plate', 'sequencing_meth', + 'linker', 'primer', 'primer_plate', 'well_id_384', + 'plating', 'extractionkit_lot', 'extraction_robot', + 'tm1000_8_tool', 'primer_date', 'mastermix_lot', + 'water_lot', 'processing_robot', 'tm300_8_tool', + 'tm50_8_tool', 'project_name', 'orig_name', + 'well_description', 'pcr_primers', 'target_gene', + 'tm10_8_tool', 'target_subfragment', 'well_id_96'} METAGENOMIC_PTYPE = 'Metagenomic' METATRANSCRIPTOMIC_PTYPE = 'Metatranscriptomic' @@ -209,7 +210,7 @@ def run(self, callback=None): """ Run all jobs added to Pipeline in the order they were added. :param callback: Optional function to call and upstate status with. - :param callback(id=): a string identifying the current running process. + :param callback(jid=): string identifying the current running process. :param callback(status=): a string message or description. :return: """ diff --git a/sequence_processing_pipeline/tests/test_GenPrepFileJob.py b/sequence_processing_pipeline/tests/test_GenPrepFileJob.py index 6e36ee56..d0b766c1 100644 --- a/sequence_processing_pipeline/tests/test_GenPrepFileJob.py +++ b/sequence_processing_pipeline/tests/test_GenPrepFileJob.py @@ -157,7 +157,8 @@ def test_sample_sheet_replicate_file_creation(self): '65721149c1e8/OutputPath/GenPrepFileJob/210518_A00953_0305' '_TEST'), ('"sequence_processing_pipeline/b197f317-1c06-4619-9af3-' - '65721149c1e8/OutputPath/GenPrepFileJob/sheet_1.csv"'), + '65721149c1e8/OutputPath/GenPrepFileJob/' + 'replicate_sheet_1.csv"'), ('sequence_processing_pipeline/b197f317-1c06-4619-9af3-' '65721149c1e8/OutputPath/GenPrepFileJob/PrepFiles')], ['seqpro', '--verbose', @@ -165,7 +166,8 @@ def test_sample_sheet_replicate_file_creation(self): '65721149c1e8/OutputPath/GenPrepFileJob/210518_A00953_0305' '_TEST'), ('"sequence_processing_pipeline/b197f317-1c06-4619-9af3-' - '65721149c1e8/OutputPath/GenPrepFileJob/sheet_2.csv"'), + '65721149c1e8/OutputPath/GenPrepFileJob/' + 'replicate_sheet_2.csv"'), ('sequence_processing_pipeline/b197f317-1c06-4619-9af3-' '65721149c1e8/OutputPath/GenPrepFileJob/PrepFiles')], ['seqpro', '--verbose', @@ -173,7 +175,8 @@ def test_sample_sheet_replicate_file_creation(self): '65721149c1e8/OutputPath/GenPrepFileJob/210518_A00953_0305' '_TEST', '"sequence_processing_pipeline/b197f317-1c06-4619-9af3-' - '65721149c1e8/OutputPath/GenPrepFileJob/sheet_3.csv"', + '65721149c1e8/OutputPath/GenPrepFileJob/' + 'replicate_sheet_3.csv"', 'sequence_processing_pipeline/b197f317-1c06-4619-9af3-' '65721149c1e8/OutputPath/GenPrepFileJob/PrepFiles']] @@ -204,7 +207,8 @@ def test_pre_prep_replicate_file_creation(self): '65721149c1e8/OutputPath/GenPrepFileJob/210518_A00953_0305' '_TEST'), ('"sequence_processing_pipeline/b197f317-1c06-4619-9af3-' - '65721149c1e8/OutputPath/GenPrepFileJob/sheet_1.txt"'), + '65721149c1e8/OutputPath/GenPrepFileJob/' + 'replicate_sheet_1.txt"'), ('sequence_processing_pipeline/b197f317-1c06-4619-9af3-' '65721149c1e8/OutputPath/GenPrepFileJob/PrepFiles')], ['seqpro', '--verbose', @@ -212,7 +216,8 @@ def test_pre_prep_replicate_file_creation(self): '65721149c1e8/OutputPath/GenPrepFileJob/210518_A00953_0305' '_TEST'), ('"sequence_processing_pipeline/b197f317-1c06-4619-9af3-' - '65721149c1e8/OutputPath/GenPrepFileJob/sheet_2.txt"'), + '65721149c1e8/OutputPath/GenPrepFileJob/' + 'replicate_sheet_2.txt"'), ('sequence_processing_pipeline/b197f317-1c06-4619-9af3-' '65721149c1e8/OutputPath/GenPrepFileJob/PrepFiles')], ['seqpro', '--verbose', @@ -220,7 +225,8 @@ def test_pre_prep_replicate_file_creation(self): '65721149c1e8/OutputPath/GenPrepFileJob/210518_A00953_0305' '_TEST'), ('"sequence_processing_pipeline/b197f317-1c06-4619-9af3-' - '65721149c1e8/OutputPath/GenPrepFileJob/sheet_3.txt"'), + '65721149c1e8/OutputPath/GenPrepFileJob/' + 'replicate_sheet_3.txt"'), ('sequence_processing_pipeline/b197f317-1c06-4619-9af3-' '65721149c1e8/OutputPath/GenPrepFileJob/PrepFiles')]] diff --git a/sequence_processing_pipeline/tests/test_Job.py b/sequence_processing_pipeline/tests/test_Job.py index 0f68da7c..b01e651c 100644 --- a/sequence_processing_pipeline/tests/test_Job.py +++ b/sequence_processing_pipeline/tests/test_Job.py @@ -40,8 +40,8 @@ def test_system_call(self): callback_results = [] - def my_callback(id=None, status=None): - callback_results.append((id, status)) + def my_callback(jid=None, status=None): + callback_results.append((jid, status)) obs = job._system_call('ls ' + join(package_root, 'tests', 'bin'), callback=my_callback) From 9544d5da4309fe384f514871c54b60a886b0b8e5 Mon Sep 17 00:00:00 2001 From: Antonio Gonzalez Date: Wed, 11 Oct 2023 14:51:22 -0600 Subject: [PATCH 2/5] well_id_96 changes --- .../tests/data/good-mapping-file.txt | 2 +- .../tests/data/mf-duplicate-sample.txt | 4 +- .../tests/data/mf-missing-column.txt | 2 +- .../tests/data/pre_prep_w_replicates.csv | 40 +++++++++---------- .../tests/test_Pipeline.py | 3 +- 5 files changed, 26 insertions(+), 25 deletions(-) diff --git a/sequence_processing_pipeline/tests/data/good-mapping-file.txt b/sequence_processing_pipeline/tests/data/good-mapping-file.txt index 7027e83d..6adc941e 100644 --- a/sequence_processing_pipeline/tests/data/good-mapping-file.txt +++ b/sequence_processing_pipeline/tests/data/good-mapping-file.txt @@ -1,4 +1,4 @@ -Sample_Name BARCODE center_name center_project_name experiment_design_description extraction_robot extractionkit_lot instrument_model library_construction_protocol linker mastermix_lot orig_name pcr_primers platform plating primer primer_date primer_plate processing_robot project_name qiita_prep_id run_center run_date run_prefix runid sample_plate sequencing_meth target_gene target_subfragment tm1000_8_tool tm300_8_tool tm50_8_tool water_lot well_description well_id +Sample_Name BARCODE center_name center_project_name experiment_design_description extraction_robot extractionkit_lot instrument_model library_construction_protocol linker mastermix_lot orig_name pcr_primers platform plating primer primer_date primer_plate processing_robot project_name qiita_prep_id run_center run_date run_prefix runid sample_plate sequencing_meth target_gene target_subfragment tm1000_8_tool tm300_8_tool tm50_8_tool water_lot well_description well_id_96 11.1.21.RK.FH TATGCCAGAGAT UCSDMI Knight_ABTX Samples from ABTX NA NA Illumina MiSeq Illumina EMP protocol 515fbc, 806r amplification of 16S rRNA V4 GT 1331807 11.1.21.RK.FH FWD:GTGYCAGCMGCCGCGGTAA; REV:GGACTACNVGGGTWTCTAAT Illumina LNH GTGTGYCAGCMGCCGCGGTAA 122822 1 Echo 550 ABTX_20230208_ABTX_11052 14339 UCSDMI 2/7/23 ABTX_Plates_238_239_240_242_S1_L001 230207_M05314_0346_000000000-KVMGL ABTX_20230208_11052_Plate_238 Sequencing by synthesis 16S rRNA V4 108379Z NA NA 1317793 ABTX_20230208_11052_Plate_238_11.1.21.RK.FH_C4 C4 11.1.21.RK.LH ATCTAGTGGCAA UCSDMI Knight_ABTX Samples from ABTX NA NA Illumina MiSeq Illumina EMP protocol 515fbc, 806r amplification of 16S rRNA V4 GT 1331807 11.1.21.RK.LH FWD:GTGYCAGCMGCCGCGGTAA; REV:GGACTACNVGGGTWTCTAAT Illumina LNH GTGTGYCAGCMGCCGCGGTAA 122822 1 Echo 550 ABTX_20230208_ABTX_11052 14339 UCSDMI 2/7/23 ABTX_Plates_238_239_240_242_S1_L001 230207_M05314_0346_000000000-KVMGL ABTX_20230208_11052_Plate_238 Sequencing by synthesis 16S rRNA V4 108379Z NA NA 1317793 ABTX_20230208_11052_Plate_238_11.1.21.RK.LH_D4 D4 11.1.21.RK.RH TTCCTTAGTAGT UCSDMI Knight_ABTX Samples from ABTX NA NA Illumina MiSeq Illumina EMP protocol 515fbc, 806r amplification of 16S rRNA V4 GT 1331807 11.1.21.RK.RH FWD:GTGYCAGCMGCCGCGGTAA; REV:GGACTACNVGGGTWTCTAAT Illumina LNH GTGTGYCAGCMGCCGCGGTAA 122822 1 Echo 550 ABTX_20230208_ABTX_11052 14339 UCSDMI 2/7/23 ABTX_Plates_238_239_240_242_S1_L001 230207_M05314_0346_000000000-KVMGL ABTX_20230208_11052_Plate_238 Sequencing by synthesis 16S rRNA V4 108379Z NA NA 1317793 ABTX_20230208_11052_Plate_238_11.1.21.RK.RH_B4 B4 diff --git a/sequence_processing_pipeline/tests/data/mf-duplicate-sample.txt b/sequence_processing_pipeline/tests/data/mf-duplicate-sample.txt index 39f26e5b..0fa0a0c1 100644 --- a/sequence_processing_pipeline/tests/data/mf-duplicate-sample.txt +++ b/sequence_processing_pipeline/tests/data/mf-duplicate-sample.txt @@ -1,4 +1,4 @@ -sample_name barcode center_name Barcode center_project_name experiment_design_description extraction_robot extractionkit_lot instrument_model library_construction_protocol linker mastermix_lot orig_name pcr_primers platform plating primer primer_date primer_plate processing_robot project_name qiita_prep_id run_center run_date run_prefix runid sample_plate sequencing_meth target_gene target_subfragment tm1000_8_tool tm300_8_tool tm50_8_tool water_lot well_description well_id +sample_name barcode center_name Barcode center_project_name experiment_design_description extraction_robot extractionkit_lot instrument_model library_construction_protocol linker mastermix_lot orig_name pcr_primers platform plating primer primer_date primer_plate processing_robot project_name qiita_prep_id run_center run_date run_prefix runid sample_plate sequencing_meth target_gene target_subfragment tm1000_8_tool tm300_8_tool tm50_8_tool water_lot well_description well_id_96 11.1.21.RK.FH TATGCCAGAGAT UCSDMI TATGCCAGAGAT Knight_ABTX Samples from ABTX NA NA Illumina MiSeq "Illumina EMP protocol 515fbc, 806r amplification of 16S rRNA V4" GT 1331807 11.1.21.RK.FH FWD:GTGYCAGCMGCCGCGGTAA; REV:GGACTACNVGGGTWTCTAAT Illumina LNH GTGTGYCAGCMGCCGCGGTAA 122822 1 Echo 550 ABTX_20230208_ABTX_11052 14339 UCSDMI 2/7/23 ABTX_Plates_238_239_240_242_S1_L001 230207_M05314_0346_000000000-KVMGL ABTX_20230208_11052_Plate_238 Sequencing by synthesis 16S rRNA V4 108379Z NA NA 1317793 ABTX_20230208_11052_Plate_238_11.1.21.RK.FH_C4 C4 11.1.21.RK.LH ATCTAGTGGCAA UCSDMI ATCTAGTGGCAA Knight_ABTX Samples from ABTX NA NA Illumina MiSeq "Illumina EMP protocol 515fbc, 806r amplification of 16S rRNA V4" GT 1331807 11.1.21.RK.LH FWD:GTGYCAGCMGCCGCGGTAA; REV:GGACTACNVGGGTWTCTAAT Illumina LNH GTGTGYCAGCMGCCGCGGTAA 122822 1 Echo 550 ABTX_20230208_ABTX_11052 14339 UCSDMI 2/7/23 ABTX_Plates_238_239_240_242_S1_L001 230207_M05314_0346_000000000-KVMGL ABTX_20230208_11052_Plate_238 Sequencing by synthesis 16S rRNA V4 108379Z NA NA 1317793 ABTX_20230208_11052_Plate_238_11.1.21.RK.LH_D4 D4 11.1.21.RK.FH TATGCCAGAGAT UCSDMI TATGCCAGAGAT Knight_ABTX Samples from ABTX NA NA Illumina MiSeq "Illumina EMP protocol 515fbc, 806r amplification of 16S rRNA V4" GT 1331807 11.1.21.RK.FH FWD:GTGYCAGCMGCCGCGGTAA; REV:GGACTACNVGGGTWTCTAAT Illumina LNH GTGTGYCAGCMGCCGCGGTAA 122822 1 Echo 550 ABTX_20230208_ABTX_11052 14339 UCSDMI 2/7/23 ABTX_Plates_238_239_240_242_S1_L001 230207_M05314_0346_000000000-KVMGL ABTX_20230208_11052_Plate_238 Sequencing by synthesis 16S rRNA V4 108379Z NA NA 1317793 ABTX_20230208_11052_Plate_238_11.1.21.RK.FH_C4 C4 @@ -384,4 +384,4 @@ BLANK242.9D CGTCGCAGCCTT UCSDMI CGTCGCAGCCTT Knight_ABTX Samples from ABTX NA NA BLANK242.9E AGCGATTCCTCG UCSDMI AGCGATTCCTCG Knight_ABTX Samples from ABTX NA NA Illumina MiSeq "Illumina EMP protocol 515fbc, 806r amplification of 16S rRNA V221" GT 1331807 BLANK242.9E FWD:GTGYCAGCMGCCGCGGTAA; REV:GGACTACNVGGGTWTCTAAT Illumina MCM GTGTGYCAGCMGCCGCGGTAA 122822 4 Echo 550 ABTX_20230208_ABTX_11052 14339 UCSDMI 2/7/23 ABTX_Plates_238_239_240_242_S1_L001 230207_M05314_0346_000000000-KVMGL ABTX_20230208_11052_Plate_242 Sequencing by synthesis 16S rRNA V4 108379Z NA NA 1317793 ABTX_20230208_11052_Plate_242_BLANK242.9E_E9 E9 BLANK242.9F CGTGAGGACCAG UCSDMI CGTGAGGACCAG Knight_ABTX Samples from ABTX NA NA Illumina MiSeq "Illumina EMP protocol 515fbc, 806r amplification of 16S rRNA V233" GT 1331807 BLANK242.9F FWD:GTGYCAGCMGCCGCGGTAA; REV:GGACTACNVGGGTWTCTAAT Illumina MCM GTGTGYCAGCMGCCGCGGTAA 122822 4 Echo 550 ABTX_20230208_ABTX_11052 14339 UCSDMI 2/7/23 ABTX_Plates_238_239_240_242_S1_L001 230207_M05314_0346_000000000-KVMGL ABTX_20230208_11052_Plate_242 Sequencing by synthesis 16S rRNA V4 108379Z NA NA 1317793 ABTX_20230208_11052_Plate_242_BLANK242.9F_F9 F9 BLANK242.9G AGAGACGCGTAG UCSDMI AGAGACGCGTAG Knight_ABTX Samples from ABTX NA NA Illumina MiSeq "Illumina EMP protocol 515fbc, 806r amplification of 16S rRNA V245" GT 1331807 BLANK242.9G FWD:GTGYCAGCMGCCGCGGTAA; REV:GGACTACNVGGGTWTCTAAT Illumina MCM GTGTGYCAGCMGCCGCGGTAA 122822 4 Echo 550 ABTX_20230208_ABTX_11052 14339 UCSDMI 2/7/23 ABTX_Plates_238_239_240_242_S1_L001 230207_M05314_0346_000000000-KVMGL ABTX_20230208_11052_Plate_242 Sequencing by synthesis 16S rRNA V4 108379Z NA NA 1317793 ABTX_20230208_11052_Plate_242_BLANK242.9G_G9 G9 -BLANK242.9H GGACCTGGATGG UCSDMI GGACCTGGATGG Knight_ABTX Samples from ABTX NA NA Illumina MiSeq "Illumina EMP protocol 515fbc, 806r amplification of 16S rRNA V257" GT 1331807 BLANK242.9H FWD:GTGYCAGCMGCCGCGGTAA; REV:GGACTACNVGGGTWTCTAAT Illumina MCM GTGTGYCAGCMGCCGCGGTAA 122822 4 Echo 550 ABTX_20230208_ABTX_11052 14339 UCSDMI 2/7/23 ABTX_Plates_238_239_240_242_S1_L001 230207_M05314_0346_000000000-KVMGL ABTX_20230208_11052_Plate_242 Sequencing by synthesis 16S rRNA V4 108379Z NA NA 1317793 ABTX_20230208_11052_Plate_242_BLANK242.9H_H9 H9 \ No newline at end of file +BLANK242.9H GGACCTGGATGG UCSDMI GGACCTGGATGG Knight_ABTX Samples from ABTX NA NA Illumina MiSeq "Illumina EMP protocol 515fbc, 806r amplification of 16S rRNA V257" GT 1331807 BLANK242.9H FWD:GTGYCAGCMGCCGCGGTAA; REV:GGACTACNVGGGTWTCTAAT Illumina MCM GTGTGYCAGCMGCCGCGGTAA 122822 4 Echo 550 ABTX_20230208_ABTX_11052 14339 UCSDMI 2/7/23 ABTX_Plates_238_239_240_242_S1_L001 230207_M05314_0346_000000000-KVMGL ABTX_20230208_11052_Plate_242 Sequencing by synthesis 16S rRNA V4 108379Z NA NA 1317793 ABTX_20230208_11052_Plate_242_BLANK242.9H_H9 H9 diff --git a/sequence_processing_pipeline/tests/data/mf-missing-column.txt b/sequence_processing_pipeline/tests/data/mf-missing-column.txt index 23c34252..9466ec62 100644 --- a/sequence_processing_pipeline/tests/data/mf-missing-column.txt +++ b/sequence_processing_pipeline/tests/data/mf-missing-column.txt @@ -1,4 +1,4 @@ -sample_name barcode center_name center_project_name experiment_design_description extraction_robot extractionkit_lot instrument_model library_construction_protocol linker mastermix_lot orig_name pcr_primers platform plating primer primer_date primer_plate processing_robot project_name qiita_prep_id run_center run_date run_prefix runid sample_plate sequencing_meth target_gene target_subfragment tm1000_8_tool tm300_8_tool water_lot well_description well_id +sample_name barcode center_name center_project_name experiment_design_description extraction_robot extractionkit_lot instrument_model library_construction_protocol linker mastermix_lot orig_name pcr_primers platform plating primer primer_date primer_plate processing_robot project_name qiita_prep_id run_center run_date run_prefix runid sample_plate sequencing_meth target_gene target_subfragment tm1000_8_tool tm300_8_tool water_lot well_description well_id_96 11.1.21.RK.FH TATGCCAGAGAT UCSDMI Knight_ABTX Samples from ABTX NA NA Illumina MiSeq "Illumina EMP protocol 515fbc, 806r amplification of 16S rRNA V4" GT 1331807 11.1.21.RK.FH FWD:GTGYCAGCMGCCGCGGTAA; REV:GGACTACNVGGGTWTCTAAT Illumina LNH GTGTGYCAGCMGCCGCGGTAA 122822 1 Echo 550 ABTX_20230208_ABTX_11052 14339 UCSDMI 2/7/23 ABTX_Plates_238_239_240_242_S1_L001 230207_M05314_0346_000000000-KVMGL ABTX_20230208_11052_Plate_238 Sequencing by synthesis 16S rRNA V4 108379Z NA 1317793 ABTX_20230208_11052_Plate_238_11.1.21.RK.FH_C4 C4 11.1.21.RK.LH ATCTAGTGGCAA UCSDMI Knight_ABTX Samples from ABTX NA NA Illumina MiSeq "Illumina EMP protocol 515fbc, 806r amplification of 16S rRNA V4" GT 1331807 11.1.21.RK.LH FWD:GTGYCAGCMGCCGCGGTAA; REV:GGACTACNVGGGTWTCTAAT Illumina LNH GTGTGYCAGCMGCCGCGGTAA 122822 1 Echo 550 ABTX_20230208_ABTX_11052 14339 UCSDMI 2/7/23 ABTX_Plates_238_239_240_242_S1_L001 230207_M05314_0346_000000000-KVMGL ABTX_20230208_11052_Plate_238 Sequencing by synthesis 16S rRNA V4 108379Z NA 1317793 ABTX_20230208_11052_Plate_238_11.1.21.RK.LH_D4 D4 11.1.21.RK.RH TTCCTTAGTAGT UCSDMI Knight_ABTX Samples from ABTX NA NA Illumina MiSeq "Illumina EMP protocol 515fbc, 806r amplification of 16S rRNA V4" GT 1331807 11.1.21.RK.RH FWD:GTGYCAGCMGCCGCGGTAA; REV:GGACTACNVGGGTWTCTAAT Illumina LNH GTGTGYCAGCMGCCGCGGTAA 122822 1 Echo 550 ABTX_20230208_ABTX_11052 14339 UCSDMI 2/7/23 ABTX_Plates_238_239_240_242_S1_L001 230207_M05314_0346_000000000-KVMGL ABTX_20230208_11052_Plate_238 Sequencing by synthesis 16S rRNA V4 108379Z NA 1317793 ABTX_20230208_11052_Plate_238_11.1.21.RK.RH_B4 B4 diff --git a/sequence_processing_pipeline/tests/data/pre_prep_w_replicates.csv b/sequence_processing_pipeline/tests/data/pre_prep_w_replicates.csv index 21430346..40b837f9 100644 --- a/sequence_processing_pipeline/tests/data/pre_prep_w_replicates.csv +++ b/sequence_processing_pipeline/tests/data/pre_prep_w_replicates.csv @@ -1,25 +1,25 @@ -orig_name sample_name contains_replicates barcode primer primer_plate well_id destination_well_id plating extractionkit_lot extraction_robot tm1000_8_tool primer_date mastermix_lot water_lot processing_robot tm300_8_tool tm50_8_tool sample_plate project_name well_description experiment_design_description library_construction_protocol linker platform run_center run_date run_prefix pcr_primers sequencing_meth target_gene target_subfragment center_name center_project_name instrument_model runid -9.18.19.RK.ST.900 9.18.19.RK.ST.900.A1 TRUE ATGTTAGGGAAT GTGYCAGCMGCCGCGGTAA 5 A1 A1 SF 163051748 Carmen_HOWE_KF3 109379Z 12/28/22 1266015 RNBL1950 Echo550 not applicable not applicable ABTX_Plate_174 ABTX_11052 description exp design description "Illumina EMP protocol 515fbc, 806r amplification of 16S rRNA V4" GT Illumina UCSDMI 3/2/23 FWD:GTGYCAGCMGCCGCGGTAA; REV:GGACTACNVGGGTWTCTAAT Sequencing by synthesis 16S rRNA V4 UCSDMI Rob ABTX Illumina MiSeq +orig_name sample_name contains_replicates barcode primer primer_plate well_id_96 well_id_384 plating extractionkit_lot extraction_robot tm1000_8_tool primer_date mastermix_lot water_lot processing_robot tm300_8_tool tm50_8_tool sample_plate project_name well_description experiment_design_description library_construction_protocol linker platform run_center run_date run_prefix pcr_primers sequencing_meth target_gene target_subfragment center_name center_project_name instrument_model runid +9.18.19.RK.ST.900 9.18.19.RK.ST.900.A1 TRUE ATGTTAGGGAAT GTGYCAGCMGCCGCGGTAA 5 A1 A1 SF 163051748 Carmen_HOWE_KF3 109379Z 12/28/22 1266015 RNBL1950 Echo550 not applicable not applicable ABTX_Plate_174 ABTX_11052 description exp design description "Illumina EMP protocol 515fbc, 806r amplification of 16S rRNA V4" GT Illumina UCSDMI 3/2/23 FWD:GTGYCAGCMGCCGCGGTAA; REV:GGACTACNVGGGTWTCTAAT Sequencing by synthesis 16S rRNA V4 UCSDMI Rob ABTX Illumina MiSeq 9.19.19.RK.ST.1100 9.19.19.RK.ST.1100.A3 True AAGTGGCTATCC GTGYCAGCMGCCGCGGTAA 5 A2 A3 SF 163051748 Carmen_HOWE_KF3 109379Z 12/28/22 1266015 RNBL1950 Echo550 not applicable not applicable ABTX_Plate_174 ABTX_11052 description exp design description "Illumina EMP protocol 515fbc, 806r amplification of 16S rRNA V4" GT Illumina UCSDMI 3/2/23 FWD:GTGYCAGCMGCCGCGGTAA; REV:GGACTACNVGGGTWTCTAAT Sequencing by synthesis 16S rRNA V4 UCSDMI Rob ABTX Illumina MiSeq 9.20.19.RK.ST.700 9.20.19.RK.ST.700.A5 true GTCGTTACCCGC GTGYCAGCMGCCGCGGTAA 5 A3 A5 SF 163051748 Carmen_HOWE_KF3 109379Z 12/28/22 1266015 RNBL1950 Echo550 not applicable not applicable ABTX_Plate_174 ABTX_11052 description exp design description "Illumina EMP protocol 515fbc, 806r amplification of 16S rRNA V4" GT Illumina UCSDMI 3/2/23 FWD:GTGYCAGCMGCCGCGGTAA; REV:GGACTACNVGGGTWTCTAAT Sequencing by synthesis 16S rRNA V4 UCSDMI Rob ABTX Illumina MiSeq 9.20.19.RK.ST.1100 9.20.19.RK.ST.1100.A7 tRuE AGTATATGTTTC GTGYCAGCMGCCGCGGTAA 5 A4 A7 SF 163051748 Carmen_HOWE_KF3 109379Z 12/28/22 1266015 RNBL1950 Echo550 not applicable not applicable ABTX_Plate_174 ABTX_11052 description exp design description "Illumina EMP protocol 515fbc, 806r amplification of 16S rRNA V4" GT Illumina UCSDMI 3/2/23 FWD:GTGYCAGCMGCCGCGGTAA; REV:GGACTACNVGGGTWTCTAAT Sequencing by synthesis 16S rRNA V4 UCSDMI Rob ABTX Illumina MiSeq 9.21.19.RK.ST.500 9.21.19.RK.ST.500.A9 TRUE GGCTCGTCGGAG GTGYCAGCMGCCGCGGTAA 5 A5 A9 SF 163051748 Carmen_HOWE_KF3 109379Z 12/28/22 1266015 RNBL1950 Echo550 not applicable not applicable ABTX_Plate_174 ABTX_11052 description exp design description "Illumina EMP protocol 515fbc, 806r amplification of 16S rRNA V4" GT Illumina UCSDMI 3/2/23 FWD:GTGYCAGCMGCCGCGGTAA; REV:GGACTACNVGGGTWTCTAAT Sequencing by synthesis 16S rRNA V4 UCSDMI Rob ABTX Illumina MiSeq -9.22.19.RK.ST.700 9.22.19.RK.ST.700.A11 TRUE GACATCTGACAC GTGYCAGCMGCCGCGGTAA 5 A6 A11 SF 163051748 Carmen_HOWE_KF3 109379Z 12/28/22 1266015 RNBL1950 Echo550 not applicable not applicable ABTX_Plate_174 ABTX_11052 description exp design description "Illumina EMP protocol 515fbc, 806r amplification of 16S rRNA V4" GT Illumina UCSDMI 3/2/23 FWD:GTGYCAGCMGCCGCGGTAA; REV:GGACTACNVGGGTWTCTAAT Sequencing by synthesis 16S rRNA V4 UCSDMI Rob ABTX Illumina MiSeq -9.16.19.RK.ST.700 9.16.19.RK.ST.700.A13 TRUE AATTTCCTAACA GTGYCAGCMGCCGCGGTAA 5 A7 A13 SF 163051748 Carmen_HOWE_KF3 109379Z 12/28/22 1266015 RNBL1950 Echo550 not applicable not applicable ABTX_Plate_174 ABTX_11052 description exp design description "Illumina EMP protocol 515fbc, 806r amplification of 16S rRNA V4" GT Illumina UCSDMI 3/2/23 FWD:GTGYCAGCMGCCGCGGTAA; REV:GGACTACNVGGGTWTCTAAT Sequencing by synthesis 16S rRNA V4 UCSDMI Rob ABTX Illumina MiSeq +9.22.19.RK.ST.700 9.22.19.RK.ST.700.A11 TRUE GACATCTGACAC GTGYCAGCMGCCGCGGTAA 5 A6 A11 SF 163051748 Carmen_HOWE_KF3 109379Z 12/28/22 1266015 RNBL1950 Echo550 not applicable not applicable ABTX_Plate_174 ABTX_11052 description exp design description "Illumina EMP protocol 515fbc, 806r amplification of 16S rRNA V4" GT Illumina UCSDMI 3/2/23 FWD:GTGYCAGCMGCCGCGGTAA; REV:GGACTACNVGGGTWTCTAAT Sequencing by synthesis 16S rRNA V4 UCSDMI Rob ABTX Illumina MiSeq +9.16.19.RK.ST.700 9.16.19.RK.ST.700.A13 TRUE AATTTCCTAACA GTGYCAGCMGCCGCGGTAA 5 A7 A13 SF 163051748 Carmen_HOWE_KF3 109379Z 12/28/22 1266015 RNBL1950 Echo550 not applicable not applicable ABTX_Plate_174 ABTX_11052 description exp design description "Illumina EMP protocol 515fbc, 806r amplification of 16S rRNA V4" GT Illumina UCSDMI 3/2/23 FWD:GTGYCAGCMGCCGCGGTAA; REV:GGACTACNVGGGTWTCTAAT Sequencing by synthesis 16S rRNA V4 UCSDMI Rob ABTX Illumina MiSeq 9.16.19.RK.ST.1100 9.16.19.RK.ST.1100.A15 TRUE ATAAACGGACAT GTGYCAGCMGCCGCGGTAA 5 A8 A15 SF 163051748 Carmen_HOWE_KF3 109379Z 12/28/22 1266015 RNBL1950 Echo550 not applicable not applicable ABTX_Plate_174 ABTX_11052 description exp design description "Illumina EMP protocol 515fbc, 806r amplification of 16S rRNA V4" GT Illumina UCSDMI 3/2/23 FWD:GTGYCAGCMGCCGCGGTAA; REV:GGACTACNVGGGTWTCTAAT Sequencing by synthesis 16S rRNA V4 UCSDMI Rob ABTX Illumina MiSeq -9.18.19.RK.ST.900 9.18.19.RK.ST.900.B1 TRUE ATGTTAGGGAAT GTGYCAGCMGCCGCGGTAA 5 A1 B1 SF 163051748 Carmen_HOWE_KF3 109379Z 12/28/22 1266015 RNBL1950 Echo550 not applicable not applicable ABTX_Plate_174 ABTX_11052 description exp design description "Illumina EMP protocol 515fbc, 806r amplification of 16S rRNA V4" GT Illumina UCSDMI 3/2/23 FWD:GTGYCAGCMGCCGCGGTAA; REV:GGACTACNVGGGTWTCTAAT Sequencing by synthesis 16S rRNA V4 UCSDMI Rob ABTX Illumina MiSeq -9.19.19.RK.ST.1100 9.19.19.RK.ST.1100.B3 TRUE AAGTGGCTATCC GTGYCAGCMGCCGCGGTAA 5 A2 B3 SF 163051748 Carmen_HOWE_KF3 109379Z 12/28/22 1266015 RNBL1950 Echo550 not applicable not applicable ABTX_Plate_174 ABTX_11052 description exp design description "Illumina EMP protocol 515fbc, 806r amplification of 16S rRNA V4" GT Illumina UCSDMI 3/2/23 FWD:GTGYCAGCMGCCGCGGTAA; REV:GGACTACNVGGGTWTCTAAT Sequencing by synthesis 16S rRNA V4 UCSDMI Rob ABTX Illumina MiSeq -9.20.19.RK.ST.700 9.20.19.RK.ST.700.B5 TRUE GTCGTTACCCGC GTGYCAGCMGCCGCGGTAA 5 A3 B5 SF 163051748 Carmen_HOWE_KF3 109379Z 12/28/22 1266015 RNBL1950 Echo550 not applicable not applicable ABTX_Plate_174 ABTX_11052 description exp design description "Illumina EMP protocol 515fbc, 806r amplification of 16S rRNA V4" GT Illumina UCSDMI 3/2/23 FWD:GTGYCAGCMGCCGCGGTAA; REV:GGACTACNVGGGTWTCTAAT Sequencing by synthesis 16S rRNA V4 UCSDMI Rob ABTX Illumina MiSeq -9.20.19.RK.ST.1100 9.20.19.RK.ST.1100.B7 TRUE AGTATATGTTTC GTGYCAGCMGCCGCGGTAA 5 A4 B7 SF 163051748 Carmen_HOWE_KF3 109379Z 12/28/22 1266015 RNBL1950 Echo550 not applicable not applicable ABTX_Plate_174 ABTX_11052 description exp design description "Illumina EMP protocol 515fbc, 806r amplification of 16S rRNA V4" GT Illumina UCSDMI 3/2/23 FWD:GTGYCAGCMGCCGCGGTAA; REV:GGACTACNVGGGTWTCTAAT Sequencing by synthesis 16S rRNA V4 UCSDMI Rob ABTX Illumina MiSeq -9.21.19.RK.ST.500 9.21.19.RK.ST.500.B9 TRUE GGCTCGTCGGAG GTGYCAGCMGCCGCGGTAA 5 A5 B9 SF 163051748 Carmen_HOWE_KF3 109379Z 12/28/22 1266015 RNBL1950 Echo550 not applicable not applicable ABTX_Plate_174 ABTX_11052 description exp design description "Illumina EMP protocol 515fbc, 806r amplification of 16S rRNA V4" GT Illumina UCSDMI 3/2/23 FWD:GTGYCAGCMGCCGCGGTAA; REV:GGACTACNVGGGTWTCTAAT Sequencing by synthesis 16S rRNA V4 UCSDMI Rob ABTX Illumina MiSeq -9.22.19.RK.ST.700 9.22.19.RK.ST.700.B11 TRUE GACATCTGACAC GTGYCAGCMGCCGCGGTAA 5 A6 B11 SF 163051748 Carmen_HOWE_KF3 109379Z 12/28/22 1266015 RNBL1950 Echo550 not applicable not applicable ABTX_Plate_174 ABTX_11052 description exp design description "Illumina EMP protocol 515fbc, 806r amplification of 16S rRNA V4" GT Illumina UCSDMI 3/2/23 FWD:GTGYCAGCMGCCGCGGTAA; REV:GGACTACNVGGGTWTCTAAT Sequencing by synthesis 16S rRNA V4 UCSDMI Rob ABTX Illumina MiSeq -9.16.19.RK.ST.700 9.16.19.RK.ST.700.B13 TRUE AATTTCCTAACA GTGYCAGCMGCCGCGGTAA 5 A7 B13 SF 163051748 Carmen_HOWE_KF3 109379Z 12/28/22 1266015 RNBL1950 Echo550 not applicable not applicable ABTX_Plate_174 ABTX_11052 description exp design description "Illumina EMP protocol 515fbc, 806r amplification of 16S rRNA V4" GT Illumina UCSDMI 3/2/23 FWD:GTGYCAGCMGCCGCGGTAA; REV:GGACTACNVGGGTWTCTAAT Sequencing by synthesis 16S rRNA V4 UCSDMI Rob ABTX Illumina MiSeq -9.16.19.RK.ST.1100 9.16.19.RK.ST.1100.B15 TRUE ATAAACGGACAT GTGYCAGCMGCCGCGGTAA 5 A8 B15 SF 163051748 Carmen_HOWE_KF3 109379Z 12/28/22 1266015 RNBL1950 Echo550 not applicable not applicable ABTX_Plate_174 ABTX_11052 description exp design description "Illumina EMP protocol 515fbc, 806r amplification of 16S rRNA V4" GT Illumina UCSDMI 3/2/23 FWD:GTGYCAGCMGCCGCGGTAA; REV:GGACTACNVGGGTWTCTAAT Sequencing by synthesis 16S rRNA V4 UCSDMI Rob ABTX Illumina MiSeq -9.18.19.RK.ST.900 9.18.19.RK.ST.900.A2 TRUE ATGTTAGGGAAT GTGYCAGCMGCCGCGGTAA 5 A1 A2 SF 163051748 Carmen_HOWE_KF3 109379Z 12/28/22 1266015 RNBL1950 Echo550 not applicable not applicable ABTX_Plate_174 ABTX_11052 description exp design description "Illumina EMP protocol 515fbc, 806r amplification of 16S rRNA V4" GT Illumina UCSDMI 3/2/23 FWD:GTGYCAGCMGCCGCGGTAA; REV:GGACTACNVGGGTWTCTAAT Sequencing by synthesis 16S rRNA V4 UCSDMI Rob ABTX Illumina MiSeq -9.19.19.RK.ST.1100 9.19.19.RK.ST.1100.A4 TRUE AAGTGGCTATCC GTGYCAGCMGCCGCGGTAA 5 A2 A4 SF 163051748 Carmen_HOWE_KF3 109379Z 12/28/22 1266015 RNBL1950 Echo550 not applicable not applicable ABTX_Plate_174 ABTX_11052 description exp design description "Illumina EMP protocol 515fbc, 806r amplification of 16S rRNA V4" GT Illumina UCSDMI 3/2/23 FWD:GTGYCAGCMGCCGCGGTAA; REV:GGACTACNVGGGTWTCTAAT Sequencing by synthesis 16S rRNA V4 UCSDMI Rob ABTX Illumina MiSeq -9.20.19.RK.ST.700 9.20.19.RK.ST.700.A6 TRUE GTCGTTACCCGC GTGYCAGCMGCCGCGGTAA 5 A3 A6 SF 163051748 Carmen_HOWE_KF3 109379Z 12/28/22 1266015 RNBL1950 Echo550 not applicable not applicable ABTX_Plate_174 ABTX_11052 description exp design description "Illumina EMP protocol 515fbc, 806r amplification of 16S rRNA V4" GT Illumina UCSDMI 3/2/23 FWD:GTGYCAGCMGCCGCGGTAA; REV:GGACTACNVGGGTWTCTAAT Sequencing by synthesis 16S rRNA V4 UCSDMI Rob ABTX Illumina MiSeq -9.20.19.RK.ST.1100 9.20.19.RK.ST.1100.A8 TRUE AGTATATGTTTC GTGYCAGCMGCCGCGGTAA 5 A4 A8 SF 163051748 Carmen_HOWE_KF3 109379Z 12/28/22 1266015 RNBL1950 Echo550 not applicable not applicable ABTX_Plate_174 ABTX_11052 description exp design description "Illumina EMP protocol 515fbc, 806r amplification of 16S rRNA V4" GT Illumina UCSDMI 3/2/23 FWD:GTGYCAGCMGCCGCGGTAA; REV:GGACTACNVGGGTWTCTAAT Sequencing by synthesis 16S rRNA V4 UCSDMI Rob ABTX Illumina MiSeq -9.21.19.RK.ST.500 9.21.19.RK.ST.500.A10 TRUE GGCTCGTCGGAG GTGYCAGCMGCCGCGGTAA 5 A5 A10 SF 163051748 Carmen_HOWE_KF3 109379Z 12/28/22 1266015 RNBL1950 Echo550 not applicable not applicable ABTX_Plate_174 ABTX_11052 description exp design description "Illumina EMP protocol 515fbc, 806r amplification of 16S rRNA V4" GT Illumina UCSDMI 3/2/23 FWD:GTGYCAGCMGCCGCGGTAA; REV:GGACTACNVGGGTWTCTAAT Sequencing by synthesis 16S rRNA V4 UCSDMI Rob ABTX Illumina MiSeq -9.22.19.RK.ST.700 9.22.19.RK.ST.700.A12 TRUE GACATCTGACAC GTGYCAGCMGCCGCGGTAA 5 A6 A12 SF 163051748 Carmen_HOWE_KF3 109379Z 12/28/22 1266015 RNBL1950 Echo550 not applicable not applicable ABTX_Plate_174 ABTX_11052 description exp design description "Illumina EMP protocol 515fbc, 806r amplification of 16S rRNA V4" GT Illumina UCSDMI 3/2/23 FWD:GTGYCAGCMGCCGCGGTAA; REV:GGACTACNVGGGTWTCTAAT Sequencing by synthesis 16S rRNA V4 UCSDMI Rob ABTX Illumina MiSeq -9.16.19.RK.ST.700 9.16.19.RK.ST.700.A14 TRUE AATTTCCTAACA GTGYCAGCMGCCGCGGTAA 5 A7 A14 SF 163051748 Carmen_HOWE_KF3 109379Z 12/28/22 1266015 RNBL1950 Echo550 not applicable not applicable ABTX_Plate_174 ABTX_11052 description exp design description "Illumina EMP protocol 515fbc, 806r amplification of 16S rRNA V4" GT Illumina UCSDMI 3/2/23 FWD:GTGYCAGCMGCCGCGGTAA; REV:GGACTACNVGGGTWTCTAAT Sequencing by synthesis 16S rRNA V4 UCSDMI Rob ABTX Illumina MiSeq -9.16.19.RK.ST.1100 9.16.19.RK.ST.1100.A16 TRUE ATAAACGGACAT GTGYCAGCMGCCGCGGTAA 5 A8 A16 SF 163051748 Carmen_HOWE_KF3 109379Z 12/28/22 1266015 RNBL1950 Echo550 not applicable not applicable ABTX_Plate_174 ABTX_11052 description exp design description "Illumina EMP protocol 515fbc, 806r amplification of 16S rRNA V4" GT Illumina UCSDMI 3/2/23 FWD:GTGYCAGCMGCCGCGGTAA; REV:GGACTACNVGGGTWTCTAAT Sequencing by synthesis 16S rRNA V4 UCSDMI Rob ABTX Illumina MiSeq +9.18.19.RK.ST.900 9.18.19.RK.ST.900.B1 TRUE ATGTTAGGGAAT GTGYCAGCMGCCGCGGTAA 5 A1 B1 SF 163051748 Carmen_HOWE_KF3 109379Z 12/28/22 1266015 RNBL1950 Echo550 not applicable not applicable ABTX_Plate_174 ABTX_11052 description exp design description "Illumina EMP protocol 515fbc, 806r amplification of 16S rRNA V4" GT Illumina UCSDMI 3/2/23 FWD:GTGYCAGCMGCCGCGGTAA; REV:GGACTACNVGGGTWTCTAAT Sequencing by synthesis 16S rRNA V4 UCSDMI Rob ABTX Illumina MiSeq +9.19.19.RK.ST.1100 9.19.19.RK.ST.1100.B3 TRUE AAGTGGCTATCC GTGYCAGCMGCCGCGGTAA 5 A2 B3 SF 163051748 Carmen_HOWE_KF3 109379Z 12/28/22 1266015 RNBL1950 Echo550 not applicable not applicable ABTX_Plate_174 ABTX_11052 description exp design description "Illumina EMP protocol 515fbc, 806r amplification of 16S rRNA V4" GT Illumina UCSDMI 3/2/23 FWD:GTGYCAGCMGCCGCGGTAA; REV:GGACTACNVGGGTWTCTAAT Sequencing by synthesis 16S rRNA V4 UCSDMI Rob ABTX Illumina MiSeq +9.20.19.RK.ST.700 9.20.19.RK.ST.700.B5 TRUE GTCGTTACCCGC GTGYCAGCMGCCGCGGTAA 5 A3 B5 SF 163051748 Carmen_HOWE_KF3 109379Z 12/28/22 1266015 RNBL1950 Echo550 not applicable not applicable ABTX_Plate_174 ABTX_11052 description exp design description "Illumina EMP protocol 515fbc, 806r amplification of 16S rRNA V4" GT Illumina UCSDMI 3/2/23 FWD:GTGYCAGCMGCCGCGGTAA; REV:GGACTACNVGGGTWTCTAAT Sequencing by synthesis 16S rRNA V4 UCSDMI Rob ABTX Illumina MiSeq +9.20.19.RK.ST.1100 9.20.19.RK.ST.1100.B7 TRUE AGTATATGTTTC GTGYCAGCMGCCGCGGTAA 5 A4 B7 SF 163051748 Carmen_HOWE_KF3 109379Z 12/28/22 1266015 RNBL1950 Echo550 not applicable not applicable ABTX_Plate_174 ABTX_11052 description exp design description "Illumina EMP protocol 515fbc, 806r amplification of 16S rRNA V4" GT Illumina UCSDMI 3/2/23 FWD:GTGYCAGCMGCCGCGGTAA; REV:GGACTACNVGGGTWTCTAAT Sequencing by synthesis 16S rRNA V4 UCSDMI Rob ABTX Illumina MiSeq +9.21.19.RK.ST.500 9.21.19.RK.ST.500.B9 TRUE GGCTCGTCGGAG GTGYCAGCMGCCGCGGTAA 5 A5 B9 SF 163051748 Carmen_HOWE_KF3 109379Z 12/28/22 1266015 RNBL1950 Echo550 not applicable not applicable ABTX_Plate_174 ABTX_11052 description exp design description "Illumina EMP protocol 515fbc, 806r amplification of 16S rRNA V4" GT Illumina UCSDMI 3/2/23 FWD:GTGYCAGCMGCCGCGGTAA; REV:GGACTACNVGGGTWTCTAAT Sequencing by synthesis 16S rRNA V4 UCSDMI Rob ABTX Illumina MiSeq +9.22.19.RK.ST.700 9.22.19.RK.ST.700.B11 TRUE GACATCTGACAC GTGYCAGCMGCCGCGGTAA 5 A6 B11 SF 163051748 Carmen_HOWE_KF3 109379Z 12/28/22 1266015 RNBL1950 Echo550 not applicable not applicable ABTX_Plate_174 ABTX_11052 description exp design description "Illumina EMP protocol 515fbc, 806r amplification of 16S rRNA V4" GT Illumina UCSDMI 3/2/23 FWD:GTGYCAGCMGCCGCGGTAA; REV:GGACTACNVGGGTWTCTAAT Sequencing by synthesis 16S rRNA V4 UCSDMI Rob ABTX Illumina MiSeq +9.16.19.RK.ST.700 9.16.19.RK.ST.700.B13 TRUE AATTTCCTAACA GTGYCAGCMGCCGCGGTAA 5 A7 B13 SF 163051748 Carmen_HOWE_KF3 109379Z 12/28/22 1266015 RNBL1950 Echo550 not applicable not applicable ABTX_Plate_174 ABTX_11052 description exp design description "Illumina EMP protocol 515fbc, 806r amplification of 16S rRNA V4" GT Illumina UCSDMI 3/2/23 FWD:GTGYCAGCMGCCGCGGTAA; REV:GGACTACNVGGGTWTCTAAT Sequencing by synthesis 16S rRNA V4 UCSDMI Rob ABTX Illumina MiSeq +9.16.19.RK.ST.1100 9.16.19.RK.ST.1100.B15 TRUE ATAAACGGACAT GTGYCAGCMGCCGCGGTAA 5 A8 B15 SF 163051748 Carmen_HOWE_KF3 109379Z 12/28/22 1266015 RNBL1950 Echo550 not applicable not applicable ABTX_Plate_174 ABTX_11052 description exp design description "Illumina EMP protocol 515fbc, 806r amplification of 16S rRNA V4" GT Illumina UCSDMI 3/2/23 FWD:GTGYCAGCMGCCGCGGTAA; REV:GGACTACNVGGGTWTCTAAT Sequencing by synthesis 16S rRNA V4 UCSDMI Rob ABTX Illumina MiSeq +9.18.19.RK.ST.900 9.18.19.RK.ST.900.A2 TRUE ATGTTAGGGAAT GTGYCAGCMGCCGCGGTAA 5 A1 A2 SF 163051748 Carmen_HOWE_KF3 109379Z 12/28/22 1266015 RNBL1950 Echo550 not applicable not applicable ABTX_Plate_174 ABTX_11052 description exp design description "Illumina EMP protocol 515fbc, 806r amplification of 16S rRNA V4" GT Illumina UCSDMI 3/2/23 FWD:GTGYCAGCMGCCGCGGTAA; REV:GGACTACNVGGGTWTCTAAT Sequencing by synthesis 16S rRNA V4 UCSDMI Rob ABTX Illumina MiSeq +9.19.19.RK.ST.1100 9.19.19.RK.ST.1100.A4 TRUE AAGTGGCTATCC GTGYCAGCMGCCGCGGTAA 5 A2 A4 SF 163051748 Carmen_HOWE_KF3 109379Z 12/28/22 1266015 RNBL1950 Echo550 not applicable not applicable ABTX_Plate_174 ABTX_11052 description exp design description "Illumina EMP protocol 515fbc, 806r amplification of 16S rRNA V4" GT Illumina UCSDMI 3/2/23 FWD:GTGYCAGCMGCCGCGGTAA; REV:GGACTACNVGGGTWTCTAAT Sequencing by synthesis 16S rRNA V4 UCSDMI Rob ABTX Illumina MiSeq +9.20.19.RK.ST.700 9.20.19.RK.ST.700.A6 TRUE GTCGTTACCCGC GTGYCAGCMGCCGCGGTAA 5 A3 A6 SF 163051748 Carmen_HOWE_KF3 109379Z 12/28/22 1266015 RNBL1950 Echo550 not applicable not applicable ABTX_Plate_174 ABTX_11052 description exp design description "Illumina EMP protocol 515fbc, 806r amplification of 16S rRNA V4" GT Illumina UCSDMI 3/2/23 FWD:GTGYCAGCMGCCGCGGTAA; REV:GGACTACNVGGGTWTCTAAT Sequencing by synthesis 16S rRNA V4 UCSDMI Rob ABTX Illumina MiSeq +9.20.19.RK.ST.1100 9.20.19.RK.ST.1100.A8 TRUE AGTATATGTTTC GTGYCAGCMGCCGCGGTAA 5 A4 A8 SF 163051748 Carmen_HOWE_KF3 109379Z 12/28/22 1266015 RNBL1950 Echo550 not applicable not applicable ABTX_Plate_174 ABTX_11052 description exp design description "Illumina EMP protocol 515fbc, 806r amplification of 16S rRNA V4" GT Illumina UCSDMI 3/2/23 FWD:GTGYCAGCMGCCGCGGTAA; REV:GGACTACNVGGGTWTCTAAT Sequencing by synthesis 16S rRNA V4 UCSDMI Rob ABTX Illumina MiSeq +9.21.19.RK.ST.500 9.21.19.RK.ST.500.A10 TRUE GGCTCGTCGGAG GTGYCAGCMGCCGCGGTAA 5 A5 A10 SF 163051748 Carmen_HOWE_KF3 109379Z 12/28/22 1266015 RNBL1950 Echo550 not applicable not applicable ABTX_Plate_174 ABTX_11052 description exp design description "Illumina EMP protocol 515fbc, 806r amplification of 16S rRNA V4" GT Illumina UCSDMI 3/2/23 FWD:GTGYCAGCMGCCGCGGTAA; REV:GGACTACNVGGGTWTCTAAT Sequencing by synthesis 16S rRNA V4 UCSDMI Rob ABTX Illumina MiSeq +9.22.19.RK.ST.700 9.22.19.RK.ST.700.A12 TRUE GACATCTGACAC GTGYCAGCMGCCGCGGTAA 5 A6 A12 SF 163051748 Carmen_HOWE_KF3 109379Z 12/28/22 1266015 RNBL1950 Echo550 not applicable not applicable ABTX_Plate_174 ABTX_11052 description exp design description "Illumina EMP protocol 515fbc, 806r amplification of 16S rRNA V4" GT Illumina UCSDMI 3/2/23 FWD:GTGYCAGCMGCCGCGGTAA; REV:GGACTACNVGGGTWTCTAAT Sequencing by synthesis 16S rRNA V4 UCSDMI Rob ABTX Illumina MiSeq +9.16.19.RK.ST.700 9.16.19.RK.ST.700.A14 TRUE AATTTCCTAACA GTGYCAGCMGCCGCGGTAA 5 A7 A14 SF 163051748 Carmen_HOWE_KF3 109379Z 12/28/22 1266015 RNBL1950 Echo550 not applicable not applicable ABTX_Plate_174 ABTX_11052 description exp design description "Illumina EMP protocol 515fbc, 806r amplification of 16S rRNA V4" GT Illumina UCSDMI 3/2/23 FWD:GTGYCAGCMGCCGCGGTAA; REV:GGACTACNVGGGTWTCTAAT Sequencing by synthesis 16S rRNA V4 UCSDMI Rob ABTX Illumina MiSeq +9.16.19.RK.ST.1100 9.16.19.RK.ST.1100.A16 TRUE ATAAACGGACAT GTGYCAGCMGCCGCGGTAA 5 A8 A16 SF 163051748 Carmen_HOWE_KF3 109379Z 12/28/22 1266015 RNBL1950 Echo550 not applicable not applicable ABTX_Plate_174 ABTX_11052 description exp design description "Illumina EMP protocol 515fbc, 806r amplification of 16S rRNA V4" GT Illumina UCSDMI 3/2/23 FWD:GTGYCAGCMGCCGCGGTAA; REV:GGACTACNVGGGTWTCTAAT Sequencing by synthesis 16S rRNA V4 UCSDMI Rob ABTX Illumina MiSeq diff --git a/sequence_processing_pipeline/tests/test_Pipeline.py b/sequence_processing_pipeline/tests/test_Pipeline.py index 19d315ab..9177d727 100644 --- a/sequence_processing_pipeline/tests/test_Pipeline.py +++ b/sequence_processing_pipeline/tests/test_Pipeline.py @@ -87,7 +87,8 @@ def delete_rtacomplete_file(self): def _make_mapping_file(self, output_file_path): cols = ('sample_name', 'barcode', 'library_construction_protocol', 'mastermix_lot', 'sample_plate', 'center_project_name', - 'instrument_model', 'tm1000_8_tool', 'well_id', 'tm50_8_tool', + 'instrument_model', 'tm1000_8_tool', 'well_id_96', + 'tm50_8_tool', 'well_description', 'run_prefix', 'run_date', 'center_name', 'tm300_8_tool', 'extraction_robot', 'qiita_prep_id', 'experiment_design_description', 'platform', 'water_lot', From 3f8f895b775598b378478b972c7fcbe9117d2d22 Mon Sep 17 00:00:00 2001 From: Antonio Gonzalez Date: Thu, 12 Oct 2023 12:57:04 -0600 Subject: [PATCH 3/5] fix errors --- .gitignore | 3 +++ sequence_processing_pipeline/Pipeline.py | 2 +- .../tests/data/good-mapping-file.txt | 2 +- sequence_processing_pipeline/tests/test_Pipeline.py | 5 +++-- 4 files changed, 8 insertions(+), 4 deletions(-) diff --git a/.gitignore b/.gitignore index b6e47617..fef37e4c 100644 --- a/.gitignore +++ b/.gitignore @@ -127,3 +127,6 @@ dmypy.json # Pyre type checker .pyre/ + +# test output +sequence_processing_pipeline/tests/data/output_dir/ diff --git a/sequence_processing_pipeline/Pipeline.py b/sequence_processing_pipeline/Pipeline.py index 15be734c..570e9563 100644 --- a/sequence_processing_pipeline/Pipeline.py +++ b/sequence_processing_pipeline/Pipeline.py @@ -345,7 +345,7 @@ def _validate_mapping_file(self, mapping_file_path): missing_columns = Pipeline.mapping_file_columns - obs if missing_columns: raise PipelineError("Mapping-file is missing columns: " - "%s" % ', '.join(missing_columns)) + "%s" % ', '.join(sorted(missing_columns))) # if an observed column is unexpected, that is a warning. unexpected_columns = obs - Pipeline.mapping_file_columns diff --git a/sequence_processing_pipeline/tests/data/good-mapping-file.txt b/sequence_processing_pipeline/tests/data/good-mapping-file.txt index 6adc941e..5a603867 100644 --- a/sequence_processing_pipeline/tests/data/good-mapping-file.txt +++ b/sequence_processing_pipeline/tests/data/good-mapping-file.txt @@ -1,4 +1,4 @@ -Sample_Name BARCODE center_name center_project_name experiment_design_description extraction_robot extractionkit_lot instrument_model library_construction_protocol linker mastermix_lot orig_name pcr_primers platform plating primer primer_date primer_plate processing_robot project_name qiita_prep_id run_center run_date run_prefix runid sample_plate sequencing_meth target_gene target_subfragment tm1000_8_tool tm300_8_tool tm50_8_tool water_lot well_description well_id_96 +Sample_Name BARCODE center_name center_project_name experiment_design_description extraction_robot extractionkit_lot instrument_model library_construction_protocol linker mastermix_lot orig_name pcr_primers platform plating primer primer_date primer_plate processing_robot project_name qiita_prep_id run_center run_date run_prefix runid sample_plate sequencing_meth target_gene target_subfragment tm1000_8_tool tm300_8_tool tm50_8_tool water_lot well_description well_id_96 well_id_384 tm10_8_tool 11.1.21.RK.FH TATGCCAGAGAT UCSDMI Knight_ABTX Samples from ABTX NA NA Illumina MiSeq Illumina EMP protocol 515fbc, 806r amplification of 16S rRNA V4 GT 1331807 11.1.21.RK.FH FWD:GTGYCAGCMGCCGCGGTAA; REV:GGACTACNVGGGTWTCTAAT Illumina LNH GTGTGYCAGCMGCCGCGGTAA 122822 1 Echo 550 ABTX_20230208_ABTX_11052 14339 UCSDMI 2/7/23 ABTX_Plates_238_239_240_242_S1_L001 230207_M05314_0346_000000000-KVMGL ABTX_20230208_11052_Plate_238 Sequencing by synthesis 16S rRNA V4 108379Z NA NA 1317793 ABTX_20230208_11052_Plate_238_11.1.21.RK.FH_C4 C4 11.1.21.RK.LH ATCTAGTGGCAA UCSDMI Knight_ABTX Samples from ABTX NA NA Illumina MiSeq Illumina EMP protocol 515fbc, 806r amplification of 16S rRNA V4 GT 1331807 11.1.21.RK.LH FWD:GTGYCAGCMGCCGCGGTAA; REV:GGACTACNVGGGTWTCTAAT Illumina LNH GTGTGYCAGCMGCCGCGGTAA 122822 1 Echo 550 ABTX_20230208_ABTX_11052 14339 UCSDMI 2/7/23 ABTX_Plates_238_239_240_242_S1_L001 230207_M05314_0346_000000000-KVMGL ABTX_20230208_11052_Plate_238 Sequencing by synthesis 16S rRNA V4 108379Z NA NA 1317793 ABTX_20230208_11052_Plate_238_11.1.21.RK.LH_D4 D4 11.1.21.RK.RH TTCCTTAGTAGT UCSDMI Knight_ABTX Samples from ABTX NA NA Illumina MiSeq Illumina EMP protocol 515fbc, 806r amplification of 16S rRNA V4 GT 1331807 11.1.21.RK.RH FWD:GTGYCAGCMGCCGCGGTAA; REV:GGACTACNVGGGTWTCTAAT Illumina LNH GTGTGYCAGCMGCCGCGGTAA 122822 1 Echo 550 ABTX_20230208_ABTX_11052 14339 UCSDMI 2/7/23 ABTX_Plates_238_239_240_242_S1_L001 230207_M05314_0346_000000000-KVMGL ABTX_20230208_11052_Plate_238 Sequencing by synthesis 16S rRNA V4 108379Z NA NA 1317793 ABTX_20230208_11052_Plate_238_11.1.21.RK.RH_B4 B4 diff --git a/sequence_processing_pipeline/tests/test_Pipeline.py b/sequence_processing_pipeline/tests/test_Pipeline.py index 9177d727..d827a30b 100644 --- a/sequence_processing_pipeline/tests/test_Pipeline.py +++ b/sequence_processing_pipeline/tests/test_Pipeline.py @@ -88,7 +88,7 @@ def _make_mapping_file(self, output_file_path): cols = ('sample_name', 'barcode', 'library_construction_protocol', 'mastermix_lot', 'sample_plate', 'center_project_name', 'instrument_model', 'tm1000_8_tool', 'well_id_96', - 'tm50_8_tool', + 'tm50_8_tool', 'tm10_8_tool', 'well_id_384', 'well_description', 'run_prefix', 'run_date', 'center_name', 'tm300_8_tool', 'extraction_robot', 'qiita_prep_id', 'experiment_design_description', 'platform', 'water_lot', @@ -1645,7 +1645,8 @@ def test_mapping_file_validation(self): self.output_file_path, self.qiita_id, Pipeline.AMPLICON_PTYPE, None) self.assertEqual(str(e.exception), ('Mapping-file is missing ' - 'columns: tm50_8_tool')) + 'columns: tm10_8_tool, ' + 'tm50_8_tool, well_id_384')) # test unsuccessful validation of a bad mapping-file. with self.assertRaises(PipelineError) as e: From b67478f18e662c002f3558d3dc430caca40e4e2c Mon Sep 17 00:00:00 2001 From: Antonio Gonzalez Date: Thu, 12 Oct 2023 13:19:45 -0600 Subject: [PATCH 4/5] fix GenPrepFileJob.py --- sequence_processing_pipeline/GenPrepFileJob.py | 2 +- .../tests/test_GenPrepFileJob.py | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/sequence_processing_pipeline/GenPrepFileJob.py b/sequence_processing_pipeline/GenPrepFileJob.py index e5b58c90..92feda20 100644 --- a/sequence_processing_pipeline/GenPrepFileJob.py +++ b/sequence_processing_pipeline/GenPrepFileJob.py @@ -122,7 +122,7 @@ def _write_to_file(self, demuxed): if self.is_amplicon: # hack to make demuxed pre-prep files comply with the # columns expected for a reglar muxed file. - fp = join(self.output_path, f"sheet_{count}.txt") + fp = join(self.output_path, f"replicate_sheet_{count}.txt") replicate.to_csv(fp, sep='\t', index=False, header=True) results.append(fp) else: diff --git a/sequence_processing_pipeline/tests/test_GenPrepFileJob.py b/sequence_processing_pipeline/tests/test_GenPrepFileJob.py index d0b766c1..7819345e 100644 --- a/sequence_processing_pipeline/tests/test_GenPrepFileJob.py +++ b/sequence_processing_pipeline/tests/test_GenPrepFileJob.py @@ -160,7 +160,7 @@ def test_sample_sheet_replicate_file_creation(self): '65721149c1e8/OutputPath/GenPrepFileJob/' 'replicate_sheet_1.csv"'), ('sequence_processing_pipeline/b197f317-1c06-4619-9af3-' - '65721149c1e8/OutputPath/GenPrepFileJob/PrepFiles')], + '65721149c1e8/OutputPath/GenPrepFileJob/PrepFiles/1')], ['seqpro', '--verbose', ('sequence_processing_pipeline/b197f317-1c06-4619-9af3-' '65721149c1e8/OutputPath/GenPrepFileJob/210518_A00953_0305' @@ -169,7 +169,7 @@ def test_sample_sheet_replicate_file_creation(self): '65721149c1e8/OutputPath/GenPrepFileJob/' 'replicate_sheet_2.csv"'), ('sequence_processing_pipeline/b197f317-1c06-4619-9af3-' - '65721149c1e8/OutputPath/GenPrepFileJob/PrepFiles')], + '65721149c1e8/OutputPath/GenPrepFileJob/PrepFiles/2')], ['seqpro', '--verbose', 'sequence_processing_pipeline/b197f317-1c06-4619-9af3-' '65721149c1e8/OutputPath/GenPrepFileJob/210518_A00953_0305' @@ -178,7 +178,7 @@ def test_sample_sheet_replicate_file_creation(self): '65721149c1e8/OutputPath/GenPrepFileJob/' 'replicate_sheet_3.csv"', 'sequence_processing_pipeline/b197f317-1c06-4619-9af3-' - '65721149c1e8/OutputPath/GenPrepFileJob/PrepFiles']] + '65721149c1e8/OutputPath/GenPrepFileJob/PrepFiles/3']] self.assertEqual(job.commands, exp) @@ -210,7 +210,7 @@ def test_pre_prep_replicate_file_creation(self): '65721149c1e8/OutputPath/GenPrepFileJob/' 'replicate_sheet_1.txt"'), ('sequence_processing_pipeline/b197f317-1c06-4619-9af3-' - '65721149c1e8/OutputPath/GenPrepFileJob/PrepFiles')], + '65721149c1e8/OutputPath/GenPrepFileJob/PrepFiles/1')], ['seqpro', '--verbose', ('sequence_processing_pipeline/b197f317-1c06-4619-9af3-' '65721149c1e8/OutputPath/GenPrepFileJob/210518_A00953_0305' @@ -219,7 +219,7 @@ def test_pre_prep_replicate_file_creation(self): '65721149c1e8/OutputPath/GenPrepFileJob/' 'replicate_sheet_2.txt"'), ('sequence_processing_pipeline/b197f317-1c06-4619-9af3-' - '65721149c1e8/OutputPath/GenPrepFileJob/PrepFiles')], + '65721149c1e8/OutputPath/GenPrepFileJob/PrepFiles/2')], ['seqpro', '--verbose', ('sequence_processing_pipeline/b197f317-1c06-4619-9af3-' '65721149c1e8/OutputPath/GenPrepFileJob/210518_A00953_0305' @@ -228,7 +228,7 @@ def test_pre_prep_replicate_file_creation(self): '65721149c1e8/OutputPath/GenPrepFileJob/' 'replicate_sheet_3.txt"'), ('sequence_processing_pipeline/b197f317-1c06-4619-9af3-' - '65721149c1e8/OutputPath/GenPrepFileJob/PrepFiles')]] + '65721149c1e8/OutputPath/GenPrepFileJob/PrepFiles/3')]] self.assertEqual(job.commands, exp) From b148e5502a106ba1c0428d1b29386a04d7be62e1 Mon Sep 17 00:00:00 2001 From: Antonio Gonzalez Date: Thu, 12 Oct 2023 14:14:25 -0600 Subject: [PATCH 5/5] add replicate[orig_name] --- sequence_processing_pipeline/GenPrepFileJob.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sequence_processing_pipeline/GenPrepFileJob.py b/sequence_processing_pipeline/GenPrepFileJob.py index 92feda20..7bf6555d 100644 --- a/sequence_processing_pipeline/GenPrepFileJob.py +++ b/sequence_processing_pipeline/GenPrepFileJob.py @@ -120,8 +120,7 @@ def _write_to_file(self, demuxed): results = [] for count, replicate in enumerate(demuxed, 1): if self.is_amplicon: - # hack to make demuxed pre-prep files comply with the - # columns expected for a reglar muxed file. + replicate['sample_name'] = replicate['orig_name'] fp = join(self.output_path, f"replicate_sheet_{count}.txt") replicate.to_csv(fp, sep='\t', index=False, header=True) results.append(fp)