# Prepare data for Pipeline

## Setup and settings

In [1]:
# Importing packages
import os
import yaml
import logging
import pandas as pd
from Bio.Seq import Seq
from qiime2 import Artifact, Visualization, Metadata
from qiime2.plugins import demux
from qiime2.plugins.cutadapt.methods import trim_paired, trim_single
from qiime2.plugins.demux.methods import filter_samples

### Receiving the parameters

The following cell can receive parameters using the [papermill](https://papermill.readthedocs.io/en/latest/) tool.

In [2]:
params_path = os.path.join('..', 'params', 'parameter-set-01.yaml')
experiment_name = 'exp-01'
base_dir = os.path.join('/', 'home', 'username', 'pipeline-dir')
manifest_file = os.path.join(base_dir, 'data', 'raw', 'manifest', 'manifest.csv')
replace_files = False
trim = None
metadata_file = None
threads = 1
demux_file = None

In [3]:
# Parameters
base_dir = "/mnt/nupeb/rede-micro/redemicro-ana-flavia-nutri"
class_col = "group-id"
classifier_file = "/mnt/nupeb/rede-micro/datasets/16S_classifiers_qiime2/silva-138-99-nb-classifier.qza"
experiment_name = "ana-flavia-STD-NCxSTD-NRxHSD-NCxHSD-NR"
manifest_file = "/mnt/nupeb/rede-micro/redemicro-ana-flavia-nutri/data/raw/selected_groups/manifest_STD-NCxSTD-NRxHSD-NCxHSD-NR.csv"
metadata_file = "/mnt/nupeb/rede-micro/redemicro-ana-flavia-nutri/data/raw/selected_groups/metadata_STD-NCxSTD-NRxHSD-NCxHSD-NR.tsv"
overlap = 12
phred = 20
replace_files = False
threads = 6
top_n = 20
trim = {"forward_primer": "CCTACGGGRSGCAGCAG", "overlap": 8, "reverse_primer": "GGACTACHVGGGTWTCTAAT"}
trunc_f = 0
trunc_r = 0


In [4]:
out_dir = os.path.join(base_dir, 'experiments', experiment_name, 'qiime-artifacts')
img_folder = os.path.abspath(os.path.join(base_dir, 'experiments', experiment_name, 'imgs'))
data_dir =  os.path.abspath(os.path.join(base_dir, 'data'))
raw_data_dir =  os.path.abspath(os.path.join(data_dir, 'raw'))
interim_data_dir =  os.path.abspath(os.path.join(data_dir, 'interim'))

### Defining names and paths

In [5]:
# Create path if it not exists
if not os.path.isdir(out_dir):
    os.makedirs(out_dir)
    print(f'New qiime-artifacts folder path created: {out_dir}')

# Create path if it not exists
if not os.path.isdir(img_folder):
    os.makedirs(img_folder)
    print(f'New img folder path created: {img_folder}')

New qiime-artifacts folder path created: /mnt/nupeb/rede-micro/redemicro-ana-flavia-nutri/experiments/ana-flavia-STD-NCxSTD-NRxHSD-NCxHSD-NR/qiime-artifacts
New img folder path created: /mnt/nupeb/rede-micro/redemicro-ana-flavia-nutri/experiments/ana-flavia-STD-NCxSTD-NRxHSD-NCxHSD-NR/imgs


In [6]:
manifest_df = pd.read_csv(manifest_file)
n_directions = len(manifest_df['direction'].unique())
if n_directions == 1:
    d_type = 'SampleData[SequencesWithQuality]'
    v_type = 'SingleEndFastqManifestPhred33'
    direction = 'single'
elif n_directions == 2:
    d_type = 'SampleData[PairedEndSequencesWithQuality]'
    v_type = 'PairedEndFastqManifestPhred33'
    direction = 'paired'
else:
    print(f'ERROR: invalid number of directions {n_directions}')

In [7]:
sufix = str(direction)
if trim and len(trim.keys()) == 3:
    sufix += f'-{trim}'
    
# Define the output artifact full path
if demux_file is None:
    demux_file = os.path.join(out_dir, f'demux-{direction}.qza')
demux_view = os.path.join(out_dir, f'demux-{direction}.qzv')
demux_file_trim = os.path.join(out_dir, f'demux-{direction}-trim.qza')
demux_view_trim = os.path.join(out_dir, f'demux-{direction}-trim.qzv')

## Step execution

This Step import all `fastq` files in a **QIIME2 Artifact** object and save it to a new `qza` file.

In [8]:
# If the metadata file is not defined, use the default metadata file
metadata_qa = Metadata.load(metadata_file)

In [9]:
# If the demux file does not exist or if the old file will be replaced
if not os.path.isfile(demux_file) or replace_files:
    
    interim_demux_path = os.path.join(interim_data_dir, f'demux-{direction}.qza')
    
    # Create new file path for interim dir
    if not os.path.isdir(interim_data_dir):
        !mkdir -p {interim_data_dir}
        
    # Load Artifact from interim folder
    if os.path.isfile(interim_demux_path):
        artifact = Artifact.load(interim_demux_path)
        
    # Create new Artifact using Manifest
    else:
        artifact = Artifact.import_data(d_type, manifest_file, view_type=v_type)
        if not os.path.isfile(interim_demux_path):
            artifact.save(interim_demux_path)
            
    # Filter and Save Artifact
    artifact = filter_samples(demux=artifact, metadata=metadata_qa).filtered_demux
    artifact.save(demux_file)
    
    # Visualization
    if os.path.isfile(demux_view):
        demux_view_obj = Visualization.load(demux_view)
    else:
        demux_view_obj = demux.visualizers.summarize(artifact).visualization
        Visualization.save(demux_view_obj, filepath=demux_view)
        
elif os.path.isfile(demux_file):
    
    # Load Artifact
    artifact = Artifact.load(demux_file)
    artifact = filter_samples(demux=artifact, metadata=metadata_qa).filtered_demux
    
    # Visualization
    if os.path.isfile(demux_view):
        demux_view_obj = Visualization.load(demux_view)
    else:
        demux_view_obj = demux.visualizers.summarize(artifact).visualization
        Visualization.save(demux_view_obj, filepath=demux_view)

  context['result_data'] = context['result_data'].append(df)


  context['result_data'] = context['result_data'].append(df)


## Step report


In [10]:
print(demux_view_obj)

# Render Visualization
demux_view_obj

<visualization: Visualization uuid: 0402fb10-3b84-4de0-b674-a323890e3feb>


[cutadapt](https://docs.qiime2.org/2022.2/plugins/available/cutadapt/trim-paired/)

We trim the forward primer and the reverse complement of the reverse primer from the forward reads. We trim the reverse primer and reverse complement of the forward primer from the reverse reads.

In [11]:
demux_file_trim = os.path.join(out_dir, f'demux-{direction}-trim.qza')
demux_view_trim = os.path.join(out_dir, f'demux-{direction}-trim.qzv')

if trim and (not os.path.isfile(demux_file_trim) or replace_files):
    forward_primer = [trim['forward_primer']] # ['CCTACGGGRSGCAGCAG']
    reverse_primer = [trim['reverse_primer']] # ['GGACTACHVGGGTWTCTAAT']
    forward_reverse_complement = [str(Seq(forward_primer[0]).reverse_complement())]
    reverse_reverse_complement = [str(Seq(reverse_primer[0]).reverse_complement())]

    res = None
    
    if n_directions == 1:
        res = trim_single(
            demultiplexed_sequences=artifact,
            front=forward_primer,
            adapter=reverse_reverse_complement,
            cores=threads,
            indels=False,
            match_read_wildcards=True,
            match_adapter_wildcards=True,
            error_rate=0.01,
            discard_untrimmed=True,
        ).trimmed_sequences
    elif n_directions == 2:
        res = trim_paired(
            demultiplexed_sequences=artifact,
            front_f=forward_primer,
            front_r=reverse_primer,
            # adapter_f=reverse_reverse_complement,
            # adapter_r=forward_reverse_complement,
            cores=threads,
            overlap=trim['overlap'],
            indels=False,
            match_read_wildcards=True,
            match_adapter_wildcards=True,
            error_rate=0.01,
            discard_untrimmed=True,
        ).trimmed_sequences
    else:
        print(f'ERROR: invalid number of directions {n_directions}')

    if res is not None:
        res.save(demux_file_trim)
        Visualization.save(demux.visualizers.summarize(res).visualization, filepath=demux_view_trim)

Running external command line application. This may print messages to stdout and/or stderr.
The commands to be run are below. These commands cannot be manually re-run as they will depend on temporary files that no longer exist.

Command: cutadapt --cores 6 --error-rate 0.01 --times 1 --overlap 8 --minimum-length 1 -q 0,0 --quality-base 33 -o /tmp/q2-CasavaOneEightSingleLanePerSampleDirFmt-wqise0jh/S210421121682_18_L001_R1_001.fastq.gz -p /tmp/q2-CasavaOneEightSingleLanePerSampleDirFmt-wqise0jh/S210421121682_19_L001_R2_001.fastq.gz --front CCTACGGGRSGCAGCAG -G GGACTACHVGGGTWTCTAAT --no-indels --match-read-wildcards --discard-untrimmed /tmp/qiime2/lauro/data/c76200a9-35eb-4174-886e-501b4deb8e2c/data/S210421121682_18_L001_R1_001.fastq.gz /tmp/qiime2/lauro/data/c76200a9-35eb-4174-886e-501b4deb8e2c/data/S210421121682_19_L001_R2_001.fastq.gz

This is cutadapt 4.4 with Python 3.8.15
Command line parameters: --cores 6 --error-rate 0.01 --times 1 --overlap 8 --minimum-length 1 -q 0,0 --quality-

Finished in 0.693 s (4.180 µs/read; 14.35 M reads/minute).

=== Summary ===

Total read pairs processed:            165,821
  Read 1 with adapter:                 161,411 (97.3%)
  Read 2 with adapter:                 154,014 (92.9%)

== Read fate breakdown ==
Pairs that were too short:                   0 (0.0%)
Pairs discarded as untrimmed:           15,834 (9.5%)
Pairs written (passing filters):       149,987 (90.5%)

Total basepairs processed:    84,568,710 bp
  Read 1:    50,575,405 bp
  Read 2:    33,993,305 bp
Quality-trimmed:                       0 bp (0.0%)
  Read 1:             0 bp
  Read 2:             0 bp
Total written (filtered):     69,924,570 bp (82.7%)
  Read 1:    42,679,301 bp
  Read 2:    27,245,269 bp

=== First read: Adapter 1 ===

Sequence: CCTACGGGRSGCAGCAG; Type: regular 5'; Length: 17; Trimmed: 161411 times

Minimum overlap: 8
No. of allowed errors:
1-17 bp: 0

Overview of removed sequences
length	count	expect	max.err	error counts
8	3	2.5	0	3
9	1	0.6	0	1
10	

Finished in 0.643 s (4.303 µs/read; 13.94 M reads/minute).

=== Summary ===

Total read pairs processed:            149,412
  Read 1 with adapter:                 145,228 (97.2%)
  Read 2 with adapter:                 138,274 (92.5%)

== Read fate breakdown ==
Pairs that were too short:                   0 (0.0%)
Pairs discarded as untrimmed:           14,953 (10.0%)
Pairs written (passing filters):       134,459 (90.0%)

Total basepairs processed:    76,200,120 bp
  Read 1:    45,570,660 bp
  Read 2:    30,629,460 bp
Quality-trimmed:                       0 bp (0.0%)
  Read 1:             0 bp
  Read 2:             0 bp
Total written (filtered):     62,685,460 bp (82.3%)
  Read 1:    38,260,442 bp
  Read 2:    24,425,018 bp

=== First read: Adapter 1 ===

Sequence: CCTACGGGRSGCAGCAG; Type: regular 5'; Length: 17; Trimmed: 145228 times

Minimum overlap: 8
No. of allowed errors:
1-17 bp: 0

Overview of removed sequences
length	count	expect	max.err	error counts
8	1	2.3	0	1
10	1	0.1	0	1
1

Finished in 1.212 s (3.529 µs/read; 17.00 M reads/minute).

=== Summary ===

Total read pairs processed:            343,261
  Read 1 with adapter:                 334,370 (97.4%)
  Read 2 with adapter:                 318,364 (92.7%)

== Read fate breakdown ==
Pairs that were too short:                   0 (0.0%)
Pairs discarded as untrimmed:           32,990 (9.6%)
Pairs written (passing filters):       310,271 (90.4%)

Total basepairs processed:   175,063,110 bp
  Read 1:   104,694,605 bp
  Read 2:    70,368,505 bp
Quality-trimmed:                       0 bp (0.0%)
  Read 1:             0 bp
  Read 2:             0 bp
Total written (filtered):    144,648,355 bp (82.6%)
  Read 1:    88,289,164 bp
  Read 2:    56,359,191 bp

=== First read: Adapter 1 ===

Sequence: CCTACGGGRSGCAGCAG; Type: regular 5'; Length: 17; Trimmed: 334370 times

Minimum overlap: 8
No. of allowed errors:
1-17 bp: 0

Overview of removed sequences
length	count	expect	max.err	error counts
8	1	5.2	0	1
9	5	1.3	0	5
10	

Finished in 0.982 s (3.859 µs/read; 15.55 M reads/minute).

=== Summary ===

Total read pairs processed:            254,393
  Read 1 with adapter:                 247,382 (97.2%)
  Read 2 with adapter:                 234,645 (92.2%)

== Read fate breakdown ==
Pairs that were too short:                   0 (0.0%)
Pairs discarded as untrimmed:           26,087 (10.3%)
Pairs written (passing filters):       228,306 (89.7%)

Total basepairs processed:   129,740,430 bp
  Read 1:    77,589,865 bp
  Read 2:    52,150,565 bp
Quality-trimmed:                       0 bp (0.0%)
  Read 1:             0 bp
  Read 2:             0 bp
Total written (filtered):    106,436,280 bp (82.0%)
  Read 1:    64,964,669 bp
  Read 2:    41,471,611 bp

=== First read: Adapter 1 ===

Sequence: CCTACGGGRSGCAGCAG; Type: regular 5'; Length: 17; Trimmed: 247382 times

Minimum overlap: 8
No. of allowed errors:
1-17 bp: 0

Overview of removed sequences
length	count	expect	max.err	error counts
8	3	3.9	0	3
9	3	1.0	0	3
10

Finished in 0.481 s (4.632 µs/read; 12.95 M reads/minute).

=== Summary ===

Total read pairs processed:            103,905
  Read 1 with adapter:                 101,232 (97.4%)
  Read 2 with adapter:                  96,332 (92.7%)

== Read fate breakdown ==
Pairs that were too short:                   0 (0.0%)
Pairs discarded as untrimmed:           10,027 (9.7%)
Pairs written (passing filters):        93,878 (90.3%)

Total basepairs processed:    52,991,550 bp
  Read 1:    31,691,025 bp
  Read 2:    21,300,525 bp
Quality-trimmed:                       0 bp (0.0%)
  Read 1:             0 bp
  Read 2:             0 bp
Total written (filtered):     43,766,915 bp (82.6%)
  Read 1:    26,714,313 bp
  Read 2:    17,052,602 bp

=== First read: Adapter 1 ===

Sequence: CCTACGGGRSGCAGCAG; Type: regular 5'; Length: 17; Trimmed: 101232 times

Minimum overlap: 8
No. of allowed errors:
1-17 bp: 0

Overview of removed sequences
length	count	expect	max.err	error counts
8	1	1.6	0	1
9	2	0.4	0	2
10	

Finished in 0.336 s (5.059 µs/read; 11.86 M reads/minute).

=== Summary ===

Total read pairs processed:             66,368
  Read 1 with adapter:                  64,521 (97.2%)
  Read 2 with adapter:                  61,032 (92.0%)

== Read fate breakdown ==
Pairs that were too short:                   0 (0.0%)
Pairs discarded as untrimmed:            7,022 (10.6%)
Pairs written (passing filters):        59,346 (89.4%)

Total basepairs processed:    33,847,680 bp
  Read 1:    20,242,240 bp
  Read 2:    13,605,440 bp
Quality-trimmed:                       0 bp (0.0%)
  Read 1:             0 bp
  Read 2:             0 bp
Total written (filtered):     27,666,882 bp (81.7%)
  Read 1:    16,886,558 bp
  Read 2:    10,780,324 bp

=== First read: Adapter 1 ===

Sequence: CCTACGGGRSGCAGCAG; Type: regular 5'; Length: 17; Trimmed: 64521 times

Minimum overlap: 8
No. of allowed errors:
1-17 bp: 0

Overview of removed sequences
length	count	expect	max.err	error counts
10	1	0.1	0	1
11	2	0.0	0	2
1

Finished in 0.576 s (4.210 µs/read; 14.25 M reads/minute).

=== Summary ===

Total read pairs processed:            136,741
  Read 1 with adapter:                 132,822 (97.1%)
  Read 2 with adapter:                 126,455 (92.5%)

== Read fate breakdown ==
Pairs that were too short:                   0 (0.0%)
Pairs discarded as untrimmed:           13,839 (10.1%)
Pairs written (passing filters):       122,902 (89.9%)

Total basepairs processed:    69,737,910 bp
  Read 1:    41,706,005 bp
  Read 2:    28,031,905 bp
Quality-trimmed:                       0 bp (0.0%)
  Read 1:             0 bp
  Read 2:             0 bp
Total written (filtered):     57,294,347 bp (82.2%)
  Read 1:    34,969,730 bp
  Read 2:    22,324,617 bp

=== First read: Adapter 1 ===

Sequence: CCTACGGGRSGCAGCAG; Type: regular 5'; Length: 17; Trimmed: 132822 times

Minimum overlap: 8
No. of allowed errors:
1-17 bp: 0

Overview of removed sequences
length	count	expect	max.err	error counts
8	1	2.1	0	1
9	2	0.5	0	2
12

Finished in 0.680 s (3.743 µs/read; 16.03 M reads/minute).

=== Summary ===

Total read pairs processed:            181,624
  Read 1 with adapter:                 176,525 (97.2%)
  Read 2 with adapter:                 167,756 (92.4%)

== Read fate breakdown ==
Pairs that were too short:                   0 (0.0%)
Pairs discarded as untrimmed:           18,504 (10.2%)
Pairs written (passing filters):       163,120 (89.8%)

Total basepairs processed:    92,628,240 bp
  Read 1:    55,395,320 bp
  Read 2:    37,232,920 bp
Quality-trimmed:                       0 bp (0.0%)
  Read 1:             0 bp
  Read 2:             0 bp
Total written (filtered):     76,044,296 bp (82.1%)
  Read 1:    46,413,857 bp
  Read 2:    29,630,439 bp

=== First read: Adapter 1 ===

Sequence: CCTACGGGRSGCAGCAG; Type: regular 5'; Length: 17; Trimmed: 176525 times

Minimum overlap: 8
No. of allowed errors:
1-17 bp: 0

Overview of removed sequences
length	count	expect	max.err	error counts
8	1	2.8	0	1
10	1	0.2	0	1
1

Finished in 0.573 s (4.264 µs/read; 14.07 M reads/minute).

=== Summary ===

Total read pairs processed:            134,399
  Read 1 with adapter:                 130,633 (97.2%)
  Read 2 with adapter:                 124,093 (92.3%)

== Read fate breakdown ==
Pairs that were too short:                   0 (0.0%)
Pairs discarded as untrimmed:           13,697 (10.2%)
Pairs written (passing filters):       120,702 (89.8%)

Total basepairs processed:    68,543,490 bp
  Read 1:    40,991,695 bp
  Read 2:    27,551,795 bp
Quality-trimmed:                       0 bp (0.0%)
  Read 1:             0 bp
  Read 2:             0 bp
Total written (filtered):     56,270,832 bp (82.1%)
  Read 1:    34,345,858 bp
  Read 2:    21,924,974 bp

=== First read: Adapter 1 ===

Sequence: CCTACGGGRSGCAGCAG; Type: regular 5'; Length: 17; Trimmed: 130633 times

Minimum overlap: 8
No. of allowed errors:
1-17 bp: 0

Overview of removed sequences
length	count	expect	max.err	error counts
10	2	0.1	0	2
11	2	0.0	0	2


Finished in 0.882 s (3.933 µs/read; 15.26 M reads/minute).

=== Summary ===

Total read pairs processed:            224,175
  Read 1 with adapter:                 217,928 (97.2%)
  Read 2 with adapter:                 207,950 (92.8%)

== Read fate breakdown ==
Pairs that were too short:                   0 (0.0%)
Pairs discarded as untrimmed:           21,909 (9.8%)
Pairs written (passing filters):       202,266 (90.2%)

Total basepairs processed:   114,329,250 bp
  Read 1:    68,373,375 bp
  Read 2:    45,955,875 bp
Quality-trimmed:                       0 bp (0.0%)
  Read 1:             0 bp
  Read 2:             0 bp
Total written (filtered):     94,295,455 bp (82.5%)
  Read 1:    57,554,676 bp
  Read 2:    36,740,779 bp

=== First read: Adapter 1 ===

Sequence: CCTACGGGRSGCAGCAG; Type: regular 5'; Length: 17; Trimmed: 217928 times

Minimum overlap: 8
No. of allowed errors:
1-17 bp: 0

Overview of removed sequences
length	count	expect	max.err	error counts
8	1	3.4	0	1
9	4	0.9	0	4
10	

Finished in 0.358 s (4.459 µs/read; 13.46 M reads/minute).

=== Summary ===

Total read pairs processed:             80,216
  Read 1 with adapter:                  77,965 (97.2%)
  Read 2 with adapter:                  73,953 (92.2%)

== Read fate breakdown ==
Pairs that were too short:                   0 (0.0%)
Pairs discarded as untrimmed:            8,302 (10.3%)
Pairs written (passing filters):        71,914 (89.7%)

Total basepairs processed:    40,910,160 bp
  Read 1:    24,465,880 bp
  Read 2:    16,444,280 bp
Quality-trimmed:                       0 bp (0.0%)
  Read 1:             0 bp
  Read 2:             0 bp
Total written (filtered):     33,525,312 bp (81.9%)
  Read 1:    20,462,757 bp
  Read 2:    13,062,555 bp

=== First read: Adapter 1 ===

Sequence: CCTACGGGRSGCAGCAG; Type: regular 5'; Length: 17; Trimmed: 77965 times

Minimum overlap: 8
No. of allowed errors:
1-17 bp: 0

Overview of removed sequences
length	count	expect	max.err	error counts
8	1	1.2	0	1
9	1	0.3	0	1
14	

Finished in 0.671 s (4.077 µs/read; 14.72 M reads/minute).

=== Summary ===

Total read pairs processed:            164,608
  Read 1 with adapter:                 160,048 (97.2%)
  Read 2 with adapter:                 152,781 (92.8%)

== Read fate breakdown ==
Pairs that were too short:                   0 (0.0%)
Pairs discarded as untrimmed:           15,982 (9.7%)
Pairs written (passing filters):       148,626 (90.3%)

Total basepairs processed:    83,950,080 bp
  Read 1:    50,205,440 bp
  Read 2:    33,744,640 bp
Quality-trimmed:                       0 bp (0.0%)
  Read 1:             0 bp
  Read 2:             0 bp
Total written (filtered):     69,288,385 bp (82.5%)
  Read 1:    42,290,904 bp
  Read 2:    26,997,481 bp

=== First read: Adapter 1 ===

Sequence: CCTACGGGRSGCAGCAG; Type: regular 5'; Length: 17; Trimmed: 160048 times

Minimum overlap: 8
No. of allowed errors:
1-17 bp: 0

Overview of removed sequences
length	count	expect	max.err	error counts
8	1	2.5	0	1
9	3	0.6	0	3
10	

Finished in 0.314 s (5.321 µs/read; 11.28 M reads/minute).

=== Summary ===

Total read pairs processed:             58,992
  Read 1 with adapter:                  57,341 (97.2%)
  Read 2 with adapter:                  54,208 (91.9%)

== Read fate breakdown ==
Pairs that were too short:                   0 (0.0%)
Pairs discarded as untrimmed:            6,281 (10.6%)
Pairs written (passing filters):        52,711 (89.4%)

Total basepairs processed:    30,085,920 bp
  Read 1:    17,992,560 bp
  Read 2:    12,093,360 bp
Quality-trimmed:                       0 bp (0.0%)
  Read 1:             0 bp
  Read 2:             0 bp
Total written (filtered):     24,574,036 bp (81.7%)
  Read 1:    14,999,414 bp
  Read 2:     9,574,622 bp

=== First read: Adapter 1 ===

Sequence: CCTACGGGRSGCAGCAG; Type: regular 5'; Length: 17; Trimmed: 57341 times

Minimum overlap: 8
No. of allowed errors:
1-17 bp: 0

Overview of removed sequences
length	count	expect	max.err	error counts
9	1	0.2	0	1
11	1	0.0	0	1
12

Finished in 0.897 s (3.903 µs/read; 15.37 M reads/minute).

=== Summary ===

Total read pairs processed:            229,809
  Read 1 with adapter:                 223,555 (97.3%)
  Read 2 with adapter:                 212,081 (92.3%)

== Read fate breakdown ==
Pairs that were too short:                   0 (0.0%)
Pairs discarded as untrimmed:           23,391 (10.2%)
Pairs written (passing filters):       206,418 (89.8%)

Total basepairs processed:   117,202,590 bp
  Read 1:    70,091,745 bp
  Read 2:    47,110,845 bp
Quality-trimmed:                       0 bp (0.0%)
  Read 1:             0 bp
  Read 2:             0 bp
Total written (filtered):     96,229,804 bp (82.1%)
  Read 1:    58,736,074 bp
  Read 2:    37,493,730 bp

=== First read: Adapter 1 ===

Sequence: CCTACGGGRSGCAGCAG; Type: regular 5'; Length: 17; Trimmed: 223555 times

Minimum overlap: 8
No. of allowed errors:
1-17 bp: 0

Overview of removed sequences
length	count	expect	max.err	error counts
8	2	3.5	0	2
9	4	0.9	0	4
10

Finished in 0.519 s (4.523 µs/read; 13.26 M reads/minute).

=== Summary ===

Total read pairs processed:            114,634
  Read 1 with adapter:                 111,413 (97.2%)
  Read 2 with adapter:                 104,800 (91.4%)

== Read fate breakdown ==
Pairs that were too short:                   0 (0.0%)
Pairs discarded as untrimmed:           12,724 (11.1%)
Pairs written (passing filters):       101,910 (88.9%)

Total basepairs processed:    58,463,340 bp
  Read 1:    34,963,370 bp
  Read 2:    23,499,970 bp
Quality-trimmed:                       0 bp (0.0%)
  Read 1:             0 bp
  Read 2:             0 bp
Total written (filtered):     47,509,206 bp (81.3%)
  Read 1:    28,997,978 bp
  Read 2:    18,511,228 bp

=== First read: Adapter 1 ===

Sequence: CCTACGGGRSGCAGCAG; Type: regular 5'; Length: 17; Trimmed: 111413 times

Minimum overlap: 8
No. of allowed errors:
1-17 bp: 0

Overview of removed sequences
length	count	expect	max.err	error counts
8	1	1.7	0	1
10	4	0.1	0	4
1

Finished in 0.264 s (5.354 µs/read; 11.21 M reads/minute).

=== Summary ===

Total read pairs processed:             49,335
  Read 1 with adapter:                  47,930 (97.2%)
  Read 2 with adapter:                  45,553 (92.3%)

== Read fate breakdown ==
Pairs that were too short:                   0 (0.0%)
Pairs discarded as untrimmed:            5,055 (10.2%)
Pairs written (passing filters):        44,280 (89.8%)

Total basepairs processed:    25,160,850 bp
  Read 1:    15,047,175 bp
  Read 2:    10,113,675 bp
Quality-trimmed:                       0 bp (0.0%)
  Read 1:             0 bp
  Read 2:             0 bp
Total written (filtered):     20,642,581 bp (82.0%)
  Read 1:    12,599,485 bp
  Read 2:     8,043,096 bp

=== First read: Adapter 1 ===

Sequence: CCTACGGGRSGCAGCAG; Type: regular 5'; Length: 17; Trimmed: 47930 times

Minimum overlap: 8
No. of allowed errors:
1-17 bp: 0

Overview of removed sequences
length	count	expect	max.err	error counts
9	1	0.2	0	1
11	2	0.0	0	2
12


Command: cutadapt --cores 6 --error-rate 0.01 --times 1 --overlap 8 --minimum-length 1 -q 0,0 --quality-base 33 -o /tmp/q2-CasavaOneEightSingleLanePerSampleDirFmt-wqise0jh/S210421121699_52_L001_R1_001.fastq.gz -p /tmp/q2-CasavaOneEightSingleLanePerSampleDirFmt-wqise0jh/S210421121699_53_L001_R2_001.fastq.gz --front CCTACGGGRSGCAGCAG -G GGACTACHVGGGTWTCTAAT --no-indels --match-read-wildcards --discard-untrimmed /tmp/qiime2/lauro/data/c76200a9-35eb-4174-886e-501b4deb8e2c/data/S210421121699_52_L001_R1_001.fastq.gz /tmp/qiime2/lauro/data/c76200a9-35eb-4174-886e-501b4deb8e2c/data/S210421121699_53_L001_R2_001.fastq.gz

This is cutadapt 4.4 with Python 3.8.15
Command line parameters: --cores 6 --error-rate 0.01 --times 1 --overlap 8 --minimum-length 1 -q 0,0 --quality-base 33 -o /tmp/q2-CasavaOneEightSingleLanePerSampleDirFmt-wqise0jh/S210421121699_52_L001_R1_001.fastq.gz -p /tmp/q2-CasavaOneEightSingleLanePerSampleDirFmt-wqise0jh/S210421121699_53_L001_R2_001.fastq.gz --front CCTACGGGRSGCAGCA

Finished in 0.179 s (8.520 µs/read; 7.04 M reads/minute).

=== Summary ===

Total read pairs processed:             20,959
  Read 1 with adapter:                  20,357 (97.1%)
  Read 2 with adapter:                  19,037 (90.8%)

== Read fate breakdown ==
Pairs that were too short:                   0 (0.0%)
Pairs discarded as untrimmed:            2,450 (11.7%)
Pairs written (passing filters):        18,509 (88.3%)

Total basepairs processed:    10,689,090 bp
  Read 1:     6,392,495 bp
  Read 2:     4,296,595 bp
Quality-trimmed:                       0 bp (0.0%)
  Read 1:             0 bp
  Read 2:             0 bp
Total written (filtered):      8,628,512 bp (80.7%)
  Read 1:     5,266,608 bp
  Read 2:     3,361,904 bp

=== First read: Adapter 1 ===

Sequence: CCTACGGGRSGCAGCAG; Type: regular 5'; Length: 17; Trimmed: 20357 times

Minimum overlap: 8
No. of allowed errors:
1-17 bp: 0

Overview of removed sequences
length	count	expect	max.err	error counts
15	1	0.0	0	1
16	1	0.0	0	1
17

Finished in 0.161 s (7.958 µs/read; 7.54 M reads/minute).

=== Summary ===

Total read pairs processed:             20,194
  Read 1 with adapter:                  19,610 (97.1%)
  Read 2 with adapter:                  18,045 (89.4%)

== Read fate breakdown ==
Pairs that were too short:                   0 (0.0%)
Pairs discarded as untrimmed:            2,655 (13.1%)
Pairs written (passing filters):        17,539 (86.9%)

Total basepairs processed:    10,298,940 bp
  Read 1:     6,159,170 bp
  Read 2:     4,139,770 bp
Quality-trimmed:                       0 bp (0.0%)
  Read 1:             0 bp
  Read 2:             0 bp
Total written (filtered):      8,176,314 bp (79.4%)
  Read 1:     4,990,682 bp
  Read 2:     3,185,632 bp

=== First read: Adapter 1 ===

Sequence: CCTACGGGRSGCAGCAG; Type: regular 5'; Length: 17; Trimmed: 19610 times

Minimum overlap: 8
No. of allowed errors:
1-17 bp: 0

Overview of removed sequences
length	count	expect	max.err	error counts
15	3	0.0	0	3
17	7	0.0	0	7
18

Finished in 0.341 s (5.060 µs/read; 11.86 M reads/minute).

=== Summary ===

Total read pairs processed:             67,293
  Read 1 with adapter:                  65,501 (97.3%)
  Read 2 with adapter:                  61,930 (92.0%)

== Read fate breakdown ==
Pairs that were too short:                   0 (0.0%)
Pairs discarded as untrimmed:            6,987 (10.4%)
Pairs written (passing filters):        60,306 (89.6%)

Total basepairs processed:    34,319,430 bp
  Read 1:    20,524,365 bp
  Read 2:    13,795,065 bp
Quality-trimmed:                       0 bp (0.0%)
  Read 1:             0 bp
  Read 2:             0 bp
Total written (filtered):     28,113,964 bp (81.9%)
  Read 1:    17,159,382 bp
  Read 2:    10,954,582 bp

=== First read: Adapter 1 ===

Sequence: CCTACGGGRSGCAGCAG; Type: regular 5'; Length: 17; Trimmed: 65501 times

Minimum overlap: 8
No. of allowed errors:
1-17 bp: 0

Overview of removed sequences
length	count	expect	max.err	error counts
11	1	0.0	0	1
12	1	0.0	0	1
1

This is cutadapt 4.4 with Python 3.8.15
Command line parameters: --cores 6 --error-rate 0.01 --times 1 --overlap 8 --minimum-length 1 -q 0,0 --quality-base 33 -o /tmp/q2-CasavaOneEightSingleLanePerSampleDirFmt-wqise0jh/S210421121703_60_L001_R1_001.fastq.gz -p /tmp/q2-CasavaOneEightSingleLanePerSampleDirFmt-wqise0jh/S210421121703_61_L001_R2_001.fastq.gz --front CCTACGGGRSGCAGCAG -G GGACTACHVGGGTWTCTAAT --no-indels --match-read-wildcards --discard-untrimmed /tmp/qiime2/lauro/data/c76200a9-35eb-4174-886e-501b4deb8e2c/data/S210421121703_60_L001_R1_001.fastq.gz /tmp/qiime2/lauro/data/c76200a9-35eb-4174-886e-501b4deb8e2c/data/S210421121703_61_L001_R2_001.fastq.gz
Processing paired-end reads on 6 cores ...
Finished in 0.198 s (8.165 µs/read; 7.35 M reads/minute).

=== Summary ===

Total read pairs processed:             24,189
  Read 1 with adapter:                  23,503 (97.2%)
  Read 2 with adapter:                  21,685 (89.6%)

== Read fate breakdown ==
Pairs that were too short:     


Command: cutadapt --cores 6 --error-rate 0.01 --times 1 --overlap 8 --minimum-length 1 -q 0,0 --quality-base 33 -o /tmp/q2-CasavaOneEightSingleLanePerSampleDirFmt-wqise0jh/S210421121704_62_L001_R1_001.fastq.gz -p /tmp/q2-CasavaOneEightSingleLanePerSampleDirFmt-wqise0jh/S210421121704_63_L001_R2_001.fastq.gz --front CCTACGGGRSGCAGCAG -G GGACTACHVGGGTWTCTAAT --no-indels --match-read-wildcards --discard-untrimmed /tmp/qiime2/lauro/data/c76200a9-35eb-4174-886e-501b4deb8e2c/data/S210421121704_62_L001_R1_001.fastq.gz /tmp/qiime2/lauro/data/c76200a9-35eb-4174-886e-501b4deb8e2c/data/S210421121704_63_L001_R2_001.fastq.gz

This is cutadapt 4.4 with Python 3.8.15
Command line parameters: --cores 6 --error-rate 0.01 --times 1 --overlap 8 --minimum-length 1 -q 0,0 --quality-base 33 -o /tmp/q2-CasavaOneEightSingleLanePerSampleDirFmt-wqise0jh/S210421121704_62_L001_R1_001.fastq.gz -p /tmp/q2-CasavaOneEightSingleLanePerSampleDirFmt-wqise0jh/S210421121704_63_L001_R2_001.fastq.gz --front CCTACGGGRSGCAGCA

This is cutadapt 4.4 with Python 3.8.15
Command line parameters: --cores 6 --error-rate 0.01 --times 1 --overlap 8 --minimum-length 1 -q 0,0 --quality-base 33 -o /tmp/q2-CasavaOneEightSingleLanePerSampleDirFmt-wqise0jh/S210421121705_64_L001_R1_001.fastq.gz -p /tmp/q2-CasavaOneEightSingleLanePerSampleDirFmt-wqise0jh/S210421121705_65_L001_R2_001.fastq.gz --front CCTACGGGRSGCAGCAG -G GGACTACHVGGGTWTCTAAT --no-indels --match-read-wildcards --discard-untrimmed /tmp/qiime2/lauro/data/c76200a9-35eb-4174-886e-501b4deb8e2c/data/S210421121705_64_L001_R1_001.fastq.gz /tmp/qiime2/lauro/data/c76200a9-35eb-4174-886e-501b4deb8e2c/data/S210421121705_65_L001_R2_001.fastq.gz
Processing paired-end reads on 6 cores ...


Finished in 1.307 s (3.796 µs/read; 15.81 M reads/minute).

=== Summary ===

Total read pairs processed:            344,337
  Read 1 with adapter:                 334,766 (97.2%)
  Read 2 with adapter:                 317,372 (92.2%)

== Read fate breakdown ==
Pairs that were too short:                   0 (0.0%)
Pairs discarded as untrimmed:           35,670 (10.4%)
Pairs written (passing filters):       308,667 (89.6%)

Total basepairs processed:   175,611,870 bp
  Read 1:   105,022,785 bp
  Read 2:    70,589,085 bp
Quality-trimmed:                       0 bp (0.0%)
  Read 1:             0 bp
  Read 2:             0 bp
Total written (filtered):    143,899,876 bp (81.9%)
  Read 1:    87,831,910 bp
  Read 2:    56,067,966 bp

=== First read: Adapter 1 ===

Sequence: CCTACGGGRSGCAGCAG; Type: regular 5'; Length: 17; Trimmed: 334766 times

Minimum overlap: 8
No. of allowed errors:
1-17 bp: 0

Overview of removed sequences
length	count	expect	max.err	error counts
8	1	5.3	0	1
9	4	1.3	0	4
10

Finished in 2.052 s (4.930 µs/read; 12.17 M reads/minute).

=== Summary ===

Total read pairs processed:            416,322
  Read 1 with adapter:                 405,067 (97.3%)
  Read 2 with adapter:                 385,489 (92.6%)

== Read fate breakdown ==
Pairs that were too short:                   0 (0.0%)
Pairs discarded as untrimmed:           41,066 (9.9%)
Pairs written (passing filters):       375,256 (90.1%)

Total basepairs processed:   212,324,220 bp
  Read 1:   126,978,210 bp
  Read 2:    85,346,010 bp
Quality-trimmed:                       0 bp (0.0%)
  Read 1:             0 bp
  Read 2:             0 bp
Total written (filtered):    174,944,302 bp (82.4%)
  Read 1:   106,779,862 bp
  Read 2:    68,164,440 bp

=== First read: Adapter 1 ===

Sequence: CCTACGGGRSGCAGCAG; Type: regular 5'; Length: 17; Trimmed: 405067 times

Minimum overlap: 8
No. of allowed errors:
1-17 bp: 0

Overview of removed sequences
length	count	expect	max.err	error counts
8	4	6.4	0	4
9	2	1.6	0	2
10	

Finished in 0.418 s (5.128 µs/read; 11.70 M reads/minute).

=== Summary ===

Total read pairs processed:             81,485
  Read 1 with adapter:                  79,303 (97.3%)
  Read 2 with adapter:                  74,804 (91.8%)

== Read fate breakdown ==
Pairs that were too short:                   0 (0.0%)
Pairs discarded as untrimmed:            8,666 (10.6%)
Pairs written (passing filters):        72,819 (89.4%)

Total basepairs processed:    41,557,350 bp
  Read 1:    24,852,925 bp
  Read 2:    16,704,425 bp
Quality-trimmed:                       0 bp (0.0%)
  Read 1:             0 bp
  Read 2:             0 bp
Total written (filtered):     33,947,823 bp (81.7%)
  Read 1:    20,720,493 bp
  Read 2:    13,227,330 bp

=== First read: Adapter 1 ===

Sequence: CCTACGGGRSGCAGCAG; Type: regular 5'; Length: 17; Trimmed: 79303 times

Minimum overlap: 8
No. of allowed errors:
1-17 bp: 0

Overview of removed sequences
length	count	expect	max.err	error counts
9	2	0.3	0	2
10	2	0.1	0	2
12

Finished in 0.199 s (7.313 µs/read; 8.20 M reads/minute).

=== Summary ===

Total read pairs processed:             27,196
  Read 1 with adapter:                  26,427 (97.2%)
  Read 2 with adapter:                  24,481 (90.0%)

== Read fate breakdown ==
Pairs that were too short:                   0 (0.0%)
Pairs discarded as untrimmed:            3,391 (12.5%)
Pairs written (passing filters):        23,805 (87.5%)

Total basepairs processed:    13,869,960 bp
  Read 1:     8,294,780 bp
  Read 2:     5,575,180 bp
Quality-trimmed:                       0 bp (0.0%)
  Read 1:             0 bp
  Read 2:             0 bp
Total written (filtered):     11,097,905 bp (80.0%)
  Read 1:     6,774,112 bp
  Read 2:     4,323,793 bp

=== First read: Adapter 1 ===

Sequence: CCTACGGGRSGCAGCAG; Type: regular 5'; Length: 17; Trimmed: 26427 times

Minimum overlap: 8
No. of allowed errors:
1-17 bp: 0

Overview of removed sequences
length	count	expect	max.err	error counts
13	1	0.0	0	1
14	2	0.0	0	2
17

Finished in 0.222 s (6.745 µs/read; 8.90 M reads/minute).

=== Summary ===

Total read pairs processed:             32,857
  Read 1 with adapter:                  31,953 (97.2%)
  Read 2 with adapter:                  29,876 (90.9%)

== Read fate breakdown ==
Pairs that were too short:                   0 (0.0%)
Pairs discarded as untrimmed:            3,780 (11.5%)
Pairs written (passing filters):        29,077 (88.5%)

Total basepairs processed:    16,757,070 bp
  Read 1:    10,021,385 bp
  Read 2:     6,735,685 bp
Quality-trimmed:                       0 bp (0.0%)
  Read 1:             0 bp
  Read 2:             0 bp
Total written (filtered):     13,555,322 bp (80.9%)
  Read 1:     8,273,880 bp
  Read 2:     5,281,442 bp

=== First read: Adapter 1 ===

Sequence: CCTACGGGRSGCAGCAG; Type: regular 5'; Length: 17; Trimmed: 31953 times

Minimum overlap: 8
No. of allowed errors:
1-17 bp: 0

Overview of removed sequences
length	count	expect	max.err	error counts
11	2	0.0	0	2
12	1	0.0	0	1
13

  context['result_data'] = context['result_data'].append(df)


  context['result_data'] = context['result_data'].append(df)
