# Prepare data for Pipeline

## Setup and settings

In [1]:
# Importing packages
import os
import yaml
import logging
import pandas as pd
from Bio.Seq import Seq
from qiime2 import Artifact, Visualization, Metadata
from qiime2.plugins import demux
from qiime2.plugins.cutadapt.methods import trim_paired, trim_single
from qiime2.plugins.demux.methods import filter_samples

### Receiving the parameters

The following cell can receive parameters using the [papermill](https://papermill.readthedocs.io/en/latest/) tool.

In [2]:
params_path = os.path.join('..', 'params', 'parameter-set-01.yaml')
experiment_name = 'exp-01'
base_dir = os.path.join('/', 'home', 'username', 'pipeline-dir')
manifest_file = os.path.join(base_dir, 'data', 'raw', 'manifest', 'manifest.csv')
replace_files = False
trim = None
metadata_file = None
threads = 1
demux_file = None

In [3]:
# Parameters
experiment_name = "ana-flavia-STD-NRxHSD-NR-trim"
base_dir = "/home/lauro/nupeb/rede-micro/redemicro-ana-flavia-nutri"
manifest_file = "/home/lauro/nupeb/rede-micro/redemicro-ana-flavia-nutri/data/raw/manifest/manifest-ana-flavia-STD-NRxHSD-NR.csv"
metadata_file = "/home/lauro/nupeb/rede-micro/redemicro-ana-flavia-nutri/data/raw/metadata/metadata-ana-flavia-STD-NRxHSD-NR.tsv"
class_col = "group-id"
classifier_file = "/home/lauro/nupeb/rede-micro/models/silva-138-99-nb-classifier.qza"
top_n = 20
replace_files = False
phred = 20
trunc_f = 0
trunc_r = 0
overlap = 12
threads = 6
trim = {
    "overlap": 8,
    "forward_primer": "CCTACGGGRSGCAGCAG",
    "reverse_primer": "GGACTACHVGGGTWTCTAAT",
}


In [4]:
out_dir = os.path.join(base_dir, 'experiments', experiment_name, 'qiime-artifacts')
img_folder = os.path.abspath(os.path.join(base_dir, 'experiments', experiment_name, 'imgs'))
data_dir =  os.path.abspath(os.path.join(base_dir, 'data'))
raw_data_dir =  os.path.abspath(os.path.join(data_dir, 'raw'))
interim_data_dir =  os.path.abspath(os.path.join(data_dir, 'interim'))

### Defining names and paths

In [5]:
# Create path if it not exists
if not os.path.isdir(out_dir):
    os.makedirs(out_dir)
    print(f'New qiime-artifacts folder path created: {out_dir}')

# Create path if it not exists
if not os.path.isdir(img_folder):
    os.makedirs(img_folder)
    print(f'New img folder path created: {img_folder}')

# Define the output artifact full path
if demux_file is None:
    demux_file = os.path.join(out_dir, 'demux-paired.qza')
demux_view = os.path.join(out_dir, 'demux-paired.qzv')
demux_file_trim = os.path.join(out_dir, 'demux-paired-trim.qza')
demux_view_trim = os.path.join(out_dir, 'demux-paired-trim.qzv')

New qiime-artifacts folder path created: /home/lauro/nupeb/rede-micro/redemicro-ana-flavia-nutri/experiments/ana-flavia-STD-NRxHSD-NR-trim/qiime-artifacts
New img folder path created: /home/lauro/nupeb/rede-micro/redemicro-ana-flavia-nutri/experiments/ana-flavia-STD-NRxHSD-NR-trim/imgs


## Step execution

This Step import all `fastq` files in a **QIIME2 Artifact** object and save it to a new `qza` file of `SampleData[PairedEndSequencesWithQuality]`

In [6]:
manifest_df = pd.read_csv(manifest_file)
n_directions = len(manifest_df['direction'].unique())
if n_directions == 1:
    d_type = 'SampleData[SequencesWithQuality]'
    v_type = 'SingleEndFastqManifestPhred33'
elif n_directions == 2:
    d_type = 'SampleData[PairedEndSequencesWithQuality]'
    v_type = 'PairedEndFastqManifestPhred33'
else:
    print(f'ERROR: invalid number of directions {n_directions}')

In [7]:
# If the metadata file is not defined, use the default metadata file
metadata_qa = Metadata.load(metadata_file)

In [8]:
# If the demux file does not exist or if the old file will be replaced
if not os.path.isfile(demux_file) or replace_files:
    interim_demux_path = os.path.join(interim_data_dir, "demux-paired.qza")
    # Load Artifact from interim folder
    if os.path.isfile(interim_demux_path):
        artifact = Artifact.load(interim_demux_path)
    # Create new Artifact using Manifest
    else:
        artifact = Artifact.import_data(d_type, manifest_file, view_type=v_type)
        if not os.path.isfile(interim_demux_path):
            artifact.save(interim_demux_path)
    # Filter and Save Artifact
    artifact = filter_samples(demux=artifact, metadata=metadata_qa).filtered_demux
    artifact.save(demux_file)
    # Visualization
    if os.path.isfile(demux_view):
        demux_view_obj = Visualization.load(demux_view)
    else:
        demux_view_obj = demux.visualizers.summarize(artifact).visualization
        Visualization.save(demux_view_obj, filepath=demux_view)
elif os.path.isfile(demux_file):
    # Load Artifact
    artifact = Artifact.load(demux_file)
    artifact = filter_samples(demux=artifact, metadata=metadata_qa).filtered_demux
    # Visualization
    if os.path.isfile(demux_view):
        demux_view_obj = Visualization.load(demux_view)
    else:
        demux_view_obj = demux.visualizers.summarize(artifact).visualization
        Visualization.save(demux_view_obj, filepath=demux_view)

## Step report


In [9]:
print(demux_view_obj)

<visualization: Visualization uuid: c92ea228-809f-4760-98a6-787e0e3f05e0>


In [10]:
# Render Visualization
demux_view_obj

[cutadapt](https://docs.qiime2.org/2022.2/plugins/available/cutadapt/trim-paired/)

We trim the forward primer and the reverse complement of the reverse primer from the forward reads. We trim the reverse primer and reverse complement of the forward primer from the reverse reads.

In [11]:
demux_file_trim = os.path.join(out_dir, 'demux-paired-trim.qza')
demux_view_trim = os.path.join(out_dir, 'demux-paired-trim.qzv')

if trim and (not os.path.isfile(demux_file_trim) or replace_files):
    forward_primer = [trim['forward_primer']] # ['CCTACGGGRSGCAGCAG']
    reverse_primer = [trim['reverse_primer']] # ['GGACTACHVGGGTWTCTAAT']
    forward_reverse_complement = [str(Seq(forward_primer[0]).reverse_complement())]
    reverse_reverse_complement = [str(Seq(reverse_primer[0]).reverse_complement())]

    res = None
    
    if n_directions == 1:
        res = trim_single(
            demultiplexed_sequences=artifact,
            front=forward_primer,
            adapter=reverse_reverse_complement,
            cores=threads,
            indels=False,
            match_read_wildcards=True,
            match_adapter_wildcards=True,
            error_rate=0.01,
            discard_untrimmed=True,
        ).trimmed_sequences
    elif n_directions == 2:
        res = trim_paired(
            demultiplexed_sequences=artifact,
            front_f=forward_primer,
            front_r=reverse_primer,
            # adapter_f=reverse_reverse_complement,
            # adapter_r=forward_reverse_complement,
            cores=threads,
            overlap=trim['overlap'],
            indels=False,
            match_read_wildcards=True,
            match_adapter_wildcards=True,
            error_rate=0.01,
            discard_untrimmed=True,
        ).trimmed_sequences
    else:
        print(f'ERROR: invalid number of directions {n_directions}')

    if res is not None:
        res.save(demux_file_trim)
        Visualization.save(demux.visualizers.summarize(res).visualization, filepath=demux_view_trim)

Running external command line application. This may print messages to stdout and/or stderr.
The commands to be run are below. These commands cannot be manually re-run as they will depend on temporary files that no longer exist.

Command: cutadapt --cores 6 --error-rate 0.01 --times 1 --overlap 8 --minimum-length 1 -o /tmp/q2-CasavaOneEightSingleLanePerSampleDirFmt-kqpkqmxi/S210421121685_24_L001_R1_001.fastq.gz -p /tmp/q2-CasavaOneEightSingleLanePerSampleDirFmt-kqpkqmxi/S210421121685_25_L001_R2_001.fastq.gz --front CCTACGGGRSGCAGCAG -G GGACTACHVGGGTWTCTAAT --no-indels --match-read-wildcards --discard-untrimmed /tmp/qiime2-archive-s1x7mq7p/baf0bbb7-e484-4372-8cd7-d68fd293544f/data/S210421121685_24_L001_R1_001.fastq.gz /tmp/qiime2-archive-s1x7mq7p/baf0bbb7-e484-4372-8cd7-d68fd293544f/data/S210421121685_25_L001_R2_001.fastq.gz



This is cutadapt 3.5 with Python 3.8.12
Command line parameters: --cores 6 --error-rate 0.01 --times 1 --overlap 8 --minimum-length 1 -o /tmp/q2-CasavaOneEightSingleLanePerSampleDirFmt-kqpkqmxi/S210421121685_24_L001_R1_001.fastq.gz -p /tmp/q2-CasavaOneEightSingleLanePerSampleDirFmt-kqpkqmxi/S210421121685_25_L001_R2_001.fastq.gz --front CCTACGGGRSGCAGCAG -G GGACTACHVGGGTWTCTAAT --no-indels --match-read-wildcards --discard-untrimmed /tmp/qiime2-archive-s1x7mq7p/baf0bbb7-e484-4372-8cd7-d68fd293544f/data/S210421121685_24_L001_R1_001.fastq.gz /tmp/qiime2-archive-s1x7mq7p/baf0bbb7-e484-4372-8cd7-d68fd293544f/data/S210421121685_25_L001_R2_001.fastq.gz
Processing reads on 6 cores in paired-end mode ...


Finished in 2.69 s (11 µs/read; 5.67 M reads/minute).

=== Summary ===

Total read pairs processed:            254,393
  Read 1 with adapter:                 247,382 (97.2%)
  Read 2 with adapter:                 234,645 (92.2%)

== Read fate breakdown ==
Pairs that were too short:                   0 (0.0%)
Pairs discarded as untrimmed:           26,087 (10.3%)
Pairs written (passing filters):       228,306 (89.7%)

Total basepairs processed:   129,740,430 bp
  Read 1:    77,589,865 bp
  Read 2:    52,150,565 bp
Total written (filtered):    106,436,280 bp (82.0%)
  Read 1:    64,964,669 bp
  Read 2:    41,471,611 bp

=== First read: Adapter 1 ===

Sequence: CCTACGGGRSGCAGCAG; Type: regular 5'; Length: 17; Trimmed: 247382 times

Minimum overlap: 8
No. of allowed errors:
1-17 bp: 0

Overview of removed sequences
length	count	expect	max.err	error counts
8	3	3.9	0	3
9	3	1.0	0	3
10	2	0.2	0	2
11	1	0.1	0	1
12	6	0.0	0	6
13	5	0.0	0	5
14	13	0.0	0	13
15	8	0.0	0	8
16	5	0.0	0	5
17	82	0.0	0	82
18	3

Finished in 1.18 s (11 µs/read; 5.28 M reads/minute).

=== Summary ===

Total read pairs processed:            103,905
  Read 1 with adapter:                 101,232 (97.4%)
  Read 2 with adapter:                  96,332 (92.7%)

== Read fate breakdown ==
Pairs that were too short:                   0 (0.0%)
Pairs discarded as untrimmed:           10,027 (9.7%)
Pairs written (passing filters):        93,878 (90.3%)

Total basepairs processed:    52,991,550 bp
  Read 1:    31,691,025 bp
  Read 2:    21,300,525 bp
Total written (filtered):     43,766,915 bp (82.6%)
  Read 1:    26,714,313 bp
  Read 2:    17,052,602 bp

=== First read: Adapter 1 ===

Sequence: CCTACGGGRSGCAGCAG; Type: regular 5'; Length: 17; Trimmed: 101232 times

Minimum overlap: 8
No. of allowed errors:
1-17 bp: 0

Overview of removed sequences
length	count	expect	max.err	error counts
8	1	1.6	0	1
9	2	0.4	0	2
10	1	0.1	0	1
11	5	0.0	0	5
12	1	0.0	0	1
14	8	0.0	0	8
15	4	0.0	0	4
16	2	0.0	0	2
17	32	0.0	0	32
18	16068	0.0	0	16068

Finished in 0.81 s (12 µs/read; 4.92 M reads/minute).

=== Summary ===

Total read pairs processed:             66,368
  Read 1 with adapter:                  64,521 (97.2%)
  Read 2 with adapter:                  61,032 (92.0%)

== Read fate breakdown ==
Pairs that were too short:                   0 (0.0%)
Pairs discarded as untrimmed:            7,022 (10.6%)
Pairs written (passing filters):        59,346 (89.4%)

Total basepairs processed:    33,847,680 bp
  Read 1:    20,242,240 bp
  Read 2:    13,605,440 bp
Total written (filtered):     27,666,882 bp (81.7%)
  Read 1:    16,886,558 bp
  Read 2:    10,780,324 bp

=== First read: Adapter 1 ===

Sequence: CCTACGGGRSGCAGCAG; Type: regular 5'; Length: 17; Trimmed: 64521 times

Minimum overlap: 8
No. of allowed errors:
1-17 bp: 0

Overview of removed sequences
length	count	expect	max.err	error counts
10	1	0.1	0	1
11	2	0.0	0	2
13	2	0.0	0	2
14	3	0.0	0	3
15	2	0.0	0	2
16	1	0.0	0	1
17	22	0.0	0	22
18	9869	0.0	0	9869
19	11662	0.0	0	11662
20	1

Finished in 2.33 s (10 µs/read; 5.77 M reads/minute).

=== Summary ===

Total read pairs processed:            224,175
  Read 1 with adapter:                 217,928 (97.2%)
  Read 2 with adapter:                 207,950 (92.8%)

== Read fate breakdown ==
Pairs that were too short:                   0 (0.0%)
Pairs discarded as untrimmed:           21,909 (9.8%)
Pairs written (passing filters):       202,266 (90.2%)

Total basepairs processed:   114,329,250 bp
  Read 1:    68,373,375 bp
  Read 2:    45,955,875 bp
Total written (filtered):     94,295,455 bp (82.5%)
  Read 1:    57,554,676 bp
  Read 2:    36,740,779 bp

=== First read: Adapter 1 ===

Sequence: CCTACGGGRSGCAGCAG; Type: regular 5'; Length: 17; Trimmed: 217928 times

Minimum overlap: 8
No. of allowed errors:
1-17 bp: 0

Overview of removed sequences
length	count	expect	max.err	error counts
8	1	3.4	0	1
9	4	0.9	0	4
10	1	0.2	0	1
11	5	0.1	0	5
13	2	0.0	0	2
14	7	0.0	0	7
15	5	0.0	0	5
16	7	0.0	0	7
17	51	0.0	0	51
18	33939	0.0	0	33939

Finished in 0.92 s (11 µs/read; 5.22 M reads/minute).

=== Summary ===

Total read pairs processed:             80,216
  Read 1 with adapter:                  77,965 (97.2%)
  Read 2 with adapter:                  73,953 (92.2%)

== Read fate breakdown ==
Pairs that were too short:                   0 (0.0%)
Pairs discarded as untrimmed:            8,302 (10.3%)
Pairs written (passing filters):        71,914 (89.7%)

Total basepairs processed:    40,910,160 bp
  Read 1:    24,465,880 bp
  Read 2:    16,444,280 bp
Total written (filtered):     33,525,312 bp (81.9%)
  Read 1:    20,462,757 bp
  Read 2:    13,062,555 bp

=== First read: Adapter 1 ===

Sequence: CCTACGGGRSGCAGCAG; Type: regular 5'; Length: 17; Trimmed: 77965 times

Minimum overlap: 8
No. of allowed errors:
1-17 bp: 0

Overview of removed sequences
length	count	expect	max.err	error counts
8	1	1.2	0	1
9	1	0.3	0	1
14	2	0.0	0	2
16	3	0.0	0	3
17	23	0.0	0	23
18	12006	0.0	0	12006
19	14026	0.0	0	14026
20	14281	0.0	0	14281
21	13017	

Finished in 1.68 s (10 µs/read; 5.88 M reads/minute).

=== Summary ===

Total read pairs processed:            164,608
  Read 1 with adapter:                 160,048 (97.2%)
  Read 2 with adapter:                 152,781 (92.8%)

== Read fate breakdown ==
Pairs that were too short:                   0 (0.0%)
Pairs discarded as untrimmed:           15,982 (9.7%)
Pairs written (passing filters):       148,626 (90.3%)

Total basepairs processed:    83,950,080 bp
  Read 1:    50,205,440 bp
  Read 2:    33,744,640 bp
Total written (filtered):     69,288,385 bp (82.5%)
  Read 1:    42,290,904 bp
  Read 2:    26,997,481 bp

=== First read: Adapter 1 ===

Sequence: CCTACGGGRSGCAGCAG; Type: regular 5'; Length: 17; Trimmed: 160048 times

Minimum overlap: 8
No. of allowed errors:
1-17 bp: 0

Overview of removed sequences
length	count	expect	max.err	error counts
8	1	2.5	0	1
9	3	0.6	0	3
10	2	0.2	0	2
12	2	0.0	0	2
14	4	0.0	0	4
15	8	0.0	0	8
16	5	0.0	0	5
17	40	0.0	0	40
18	25002	0.0	0	25002
19	28512	0.0

Finished in 0.41 s (17 µs/read; 3.53 M reads/minute).

=== Summary ===

Total read pairs processed:             24,189
  Read 1 with adapter:                  23,503 (97.2%)
  Read 2 with adapter:                  21,685 (89.6%)

== Read fate breakdown ==
Pairs that were too short:                   0 (0.0%)
Pairs discarded as untrimmed:            3,102 (12.8%)
Pairs written (passing filters):        21,087 (87.2%)

Total basepairs processed:    12,336,390 bp
  Read 1:     7,377,645 bp
  Read 2:     4,958,745 bp
Total written (filtered):      9,830,774 bp (79.7%)
  Read 1:     6,000,254 bp
  Read 2:     3,830,520 bp

=== First read: Adapter 1 ===

Sequence: CCTACGGGRSGCAGCAG; Type: regular 5'; Length: 17; Trimmed: 23503 times

Minimum overlap: 8
No. of allowed errors:
1-17 bp: 0

Overview of removed sequences
length	count	expect	max.err	error counts
10	1	0.0	0	1
16	1	0.0	0	1
17	4	0.0	0	4
18	3615	0.0	0	3615
19	4126	0.0	0	4126
20	4339	0.0	0	4339
21	3919	0.0	0	3919
22	4058	0.0	0	4058
23	

Finished in 0.24 s (43 µs/read; 1.39 M reads/minute).

=== Summary ===

Total read pairs processed:              5,549
  Read 1 with adapter:                   5,397 (97.3%)
  Read 2 with adapter:                   5,015 (90.4%)

== Read fate breakdown ==
Pairs that were too short:                   0 (0.0%)
Pairs discarded as untrimmed:              667 (12.0%)
Pairs written (passing filters):         4,882 (88.0%)

Total basepairs processed:     2,829,990 bp
  Read 1:     1,692,445 bp
  Read 2:     1,137,545 bp
Total written (filtered):      2,276,001 bp (80.4%)
  Read 1:     1,389,152 bp
  Read 2:       886,849 bp

=== First read: Adapter 1 ===

Sequence: CCTACGGGRSGCAGCAG; Type: regular 5'; Length: 17; Trimmed: 5397 times

Minimum overlap: 8
No. of allowed errors:
1-17 bp: 0

Overview of removed sequences
length	count	expect	max.err	error counts
17	3	0.0	0	3
18	858	0.0	0	858
19	967	0.0	0	967
20	1009	0.0	0	1009
21	860	0.0	0	860
22	903	0.0	0	903
23	797	0.0	0	797


=== Second read: Ad

Finished in 3.46 s (10 µs/read; 5.97 M reads/minute).

=== Summary ===

Total read pairs processed:            344,337
  Read 1 with adapter:                 334,766 (97.2%)
  Read 2 with adapter:                 317,372 (92.2%)

== Read fate breakdown ==
Pairs that were too short:                   0 (0.0%)
Pairs discarded as untrimmed:           35,670 (10.4%)
Pairs written (passing filters):       308,667 (89.6%)

Total basepairs processed:   175,611,870 bp
  Read 1:   105,022,785 bp
  Read 2:    70,589,085 bp
Total written (filtered):    143,899,876 bp (81.9%)
  Read 1:    87,831,910 bp
  Read 2:    56,067,966 bp

=== First read: Adapter 1 ===

Sequence: CCTACGGGRSGCAGCAG; Type: regular 5'; Length: 17; Trimmed: 334766 times

Minimum overlap: 8
No. of allowed errors:
1-17 bp: 0

Overview of removed sequences
length	count	expect	max.err	error counts
8	1	5.3	0	1
9	4	1.3	0	4
10	8	0.3	0	8
11	7	0.1	0	7
12	6	0.0	0	6
13	6	0.0	0	6
14	19	0.0	0	19
15	7	0.0	0	7
16	15	0.0	0	15
17	102	0.0	0	102


Finished in 4.08 s (10 µs/read; 6.12 M reads/minute).

=== Summary ===

Total read pairs processed:            416,322
  Read 1 with adapter:                 405,067 (97.3%)
  Read 2 with adapter:                 385,489 (92.6%)

== Read fate breakdown ==
Pairs that were too short:                   0 (0.0%)
Pairs discarded as untrimmed:           41,066 (9.9%)
Pairs written (passing filters):       375,256 (90.1%)

Total basepairs processed:   212,324,220 bp
  Read 1:   126,978,210 bp
  Read 2:    85,346,010 bp
Total written (filtered):    174,944,302 bp (82.4%)
  Read 1:   106,779,862 bp
  Read 2:    68,164,440 bp

=== First read: Adapter 1 ===

Sequence: CCTACGGGRSGCAGCAG; Type: regular 5'; Length: 17; Trimmed: 405067 times

Minimum overlap: 8
No. of allowed errors:
1-17 bp: 0

Overview of removed sequences
length	count	expect	max.err	error counts
8	4	6.4	0	4
9	2	1.6	0	2
10	3	0.4	0	3
11	5	0.1	0	5
12	4	0.0	0	4
13	3	0.0	0	3
14	17	0.0	0	17
15	12	0.0	0	12
16	26	0.0	0	26
17	108	0.0	0	108

Finished in 0.91 s (11 µs/read; 5.37 M reads/minute).

=== Summary ===

Total read pairs processed:             81,485
  Read 1 with adapter:                  79,303 (97.3%)
  Read 2 with adapter:                  74,804 (91.8%)

== Read fate breakdown ==
Pairs that were too short:                   0 (0.0%)
Pairs discarded as untrimmed:            8,666 (10.6%)
Pairs written (passing filters):        72,819 (89.4%)

Total basepairs processed:    41,557,350 bp
  Read 1:    24,852,925 bp
  Read 2:    16,704,425 bp
Total written (filtered):     33,947,823 bp (81.7%)
  Read 1:    20,720,493 bp
  Read 2:    13,227,330 bp

=== First read: Adapter 1 ===

Sequence: CCTACGGGRSGCAGCAG; Type: regular 5'; Length: 17; Trimmed: 79303 times

Minimum overlap: 8
No. of allowed errors:
1-17 bp: 0

Overview of removed sequences
length	count	expect	max.err	error counts
9	2	0.3	0	2
10	2	0.1	0	2
12	1	0.0	0	1
14	4	0.0	0	4
15	3	0.0	0	3
16	2	0.0	0	2
17	34	0.0	0	34
18	12320	0.0	0	12320
19	14302	0.0	0	14302
20	

Finished in 0.45 s (17 µs/read; 3.62 M reads/minute).

=== Summary ===

Total read pairs processed:             27,196
  Read 1 with adapter:                  26,427 (97.2%)
  Read 2 with adapter:                  24,481 (90.0%)

== Read fate breakdown ==
Pairs that were too short:                   0 (0.0%)
Pairs discarded as untrimmed:            3,391 (12.5%)
Pairs written (passing filters):        23,805 (87.5%)

Total basepairs processed:    13,869,960 bp
  Read 1:     8,294,780 bp
  Read 2:     5,575,180 bp
Total written (filtered):     11,097,905 bp (80.0%)
  Read 1:     6,774,112 bp
  Read 2:     4,323,793 bp

=== First read: Adapter 1 ===

Sequence: CCTACGGGRSGCAGCAG; Type: regular 5'; Length: 17; Trimmed: 26427 times

Minimum overlap: 8
No. of allowed errors:
1-17 bp: 0

Overview of removed sequences
length	count	expect	max.err	error counts
13	1	0.0	0	1
14	2	0.0	0	2
17	6	0.0	0	6
18	4153	0.0	0	4153
19	4745	0.0	0	4745
20	4859	0.0	0	4859
21	4346	0.0	0	4346
22	4473	0.0	0	4473
23	

Finished in 0.48 s (14 µs/read; 4.14 M reads/minute).

=== Summary ===

Total read pairs processed:             32,857
  Read 1 with adapter:                  31,953 (97.2%)
  Read 2 with adapter:                  29,876 (90.9%)

== Read fate breakdown ==
Pairs that were too short:                   0 (0.0%)
Pairs discarded as untrimmed:            3,780 (11.5%)
Pairs written (passing filters):        29,077 (88.5%)

Total basepairs processed:    16,757,070 bp
  Read 1:    10,021,385 bp
  Read 2:     6,735,685 bp
Total written (filtered):     13,555,322 bp (80.9%)
  Read 1:     8,273,880 bp
  Read 2:     5,281,442 bp

=== First read: Adapter 1 ===

Sequence: CCTACGGGRSGCAGCAG; Type: regular 5'; Length: 17; Trimmed: 31953 times

Minimum overlap: 8
No. of allowed errors:
1-17 bp: 0

Overview of removed sequences
length	count	expect	max.err	error counts
11	2	0.0	0	2
12	1	0.0	0	1
13	1	0.0	0	1
14	1	0.0	0	1
16	1	0.0	0	1
17	9	0.0	0	9
18	4981	0.0	0	4981
19	5717	0.0	0	5717
20	5839	0.0	0	5839
21

Finished in 0.22 s (46 µs/read; 1.31 M reads/minute).

=== Summary ===

Total read pairs processed:              4,908
  Read 1 with adapter:                   4,754 (96.9%)
  Read 2 with adapter:                   4,257 (86.7%)

== Read fate breakdown ==
Pairs that were too short:                   0 (0.0%)
Pairs discarded as untrimmed:              773 (15.7%)
Pairs written (passing filters):         4,135 (84.3%)

Total basepairs processed:     2,503,080 bp
  Read 1:     1,496,940 bp
  Read 2:     1,006,140 bp
Total written (filtered):      1,927,695 bp (77.0%)
  Read 1:     1,176,557 bp
  Read 2:       751,138 bp

=== First read: Adapter 1 ===

Sequence: CCTACGGGRSGCAGCAG; Type: regular 5'; Length: 17; Trimmed: 4754 times

Minimum overlap: 8
No. of allowed errors:
1-17 bp: 0

Overview of removed sequences
length	count	expect	max.err	error counts
18	757	0.0	0	757
19	836	0.0	0	836
20	829	0.0	0	829
21	772	0.0	0	772
22	824	0.0	0	824
23	735	0.0	0	735
24	1	0.0	0	1


=== Second read: Adap