# Prepare data for Pipeline

## Setup and settings

In [1]:
# Importing packages
import os
import yaml
import logging
import pandas as pd
from Bio.Seq import Seq
from qiime2 import Artifact, Visualization
from qiime2.plugins import demux
from qiime2.plugins.cutadapt.methods import trim_paired

### Receiving the parameters

The following cell can receive parameters using the [papermill](https://papermill.readthedocs.io/en/latest/) tool.

In [2]:
params_path = os.path.join('..', 'params', 'ana-flavia-superlactacao.yaml')
experiment_name = 'jenneffer-vs-01'
base_dir = os.path.join('/', 'home', 'lauro', 'nupeb', 'redemicro')
manifest_file = os.path.join(base_dir, 'data', 'raw', 'manifest', 'not-hist-vs-manifest.csv')
img_folder = os.path.abspath(os.path.join(base_dir, 'imgs'))
replace_files = False
trim = None

In [3]:
# Parameters
experiment_name = "ana-flavia-STD-NCxHSD-NC-trim"
base_dir = "/home/lauro/nupeb/rede-micro/redemicro-ana-flavia-nutri"
manifest_file = "/home/lauro/nupeb/rede-micro/redemicro-ana-flavia-nutri/data/raw/manifest/manifest-ana-flavia-STD-NCxHSD-NC.csv"
metadata_file = "/home/lauro/nupeb/rede-micro/redemicro-ana-flavia-nutri/data/raw/metadata/metadata-ana-flavia-STD-NCxHSD-NC.tsv"
class_col = "group-id"
classifier_file = "/home/lauro/nupeb/dados_brutos_rede_genoma/16S_classifiers_qiime2/silva-138-99-nb-classifier.qza"
replace_files = False
phred = 20
trunc_f = 0
trunc_r = 0
overlap = 12
threads = 6
trim = {
    "overlap": 8,
    "forward_primer": "CCTACGGGRSGCAGCAG",
    "reverse_primer": "GGACTACHVGGGTWTCTAAT",
}


In [4]:
# Setting hardcoded parameters for development and debbuging
production = True
if not production:
    params_path = os.path.join('..', 'params', 'ana-flavia-hipotese-01')
    with open(params_path, 'r') as stream:
        params = yaml.safe_load(stream)
        experiment_name = params['experiment_name']
        base_dir = params['base_dir']
        manifest_file = params['manifest_file']
        replace_files = params['replace_files']

### Defining names and paths

In [5]:
# new_manifest = '/home/lauro/nupeb/redemicro/data/raw/manifest/karina-manifest.csv'
# with open(manifest_file, 'r') as oldm, open(new_manifest, 'w') as newm:
#     header = 'sample-id,absolute-filepath,direction\n'
#     newm.write(header)
#     for line in oldm.readlines()[1:]:
#         sid, forward, reverse = line[:-1].split(',')
#         fline = ','.join((sid, forward, 'forward')) + '\n'
#         rline = ','.join((sid, reverse, 'reverse')) + '\n'
#         newm.write(fline)
#         newm.write(rline)

In [6]:
# Define the output folder path
out_dir = os.path.join(base_dir, 'experiments', experiment_name, 'qiime-artifacts')
img_folder = os.path.abspath(os.path.join(base_dir, 'experiments', experiment_name, 'imgs'))

# Create path if it not exist
if not os.path.isdir(out_dir):
    os.makedirs(out_dir)
    print(f'New qiime-artifacts folder path created: {out_dir}')

# Create path if it not exist
if not os.path.isdir(img_folder):
    os.makedirs(img_folder)
    print(f'New img folder path created: {img_folder}')

# Define the output artifact full path
demux_file = os.path.join(out_dir, 'demux-paired.qza')
demux_view = os.path.join(out_dir, 'demux-paired.qzv')
demux_file_trim = os.path.join(out_dir, 'demux-paired-trim.qza')
demux_view_trim = os.path.join(out_dir, 'demux-paired-trim.qzv')

New qiime-artifacts folder path created: /home/lauro/nupeb/rede-micro/redemicro-ana-flavia-nutri/experiments/ana-flavia-STD-NCxHSD-NC-trim/qiime-artifacts
New img folder path created: /home/lauro/nupeb/rede-micro/redemicro-ana-flavia-nutri/experiments/ana-flavia-STD-NCxHSD-NC-trim/imgs


## Step execution

This Step import all `fastq` files in a **QIIME2 Artifact** object and save it to a new `qza` file of `SampleData[PairedEndSequencesWithQuality]`

In [7]:
manifest_df = pd.read_csv(manifest_file)
n_directions = len(manifest_df['direction'].unique())
if n_directions == 1:
    d_type = 'SampleData[SequencesWithQuality]'
    v_type = 'SingleEndFastqManifestPhred33'
elif n_directions == 2:
    d_type = 'SampleData[PairedEndSequencesWithQuality]'
    v_type = 'PairedEndFastqManifestPhred33'
else:
    print(f'ERROR: invalid number of directions {n_directions}')

In [8]:
# If the demux file does not exist or if the old file will be replaced
if not os.path.isfile(demux_file) or replace_files:
    
    # Import data and create an artifact object
    artifact = Artifact.import_data(
        d_type, 
        manifest_file, 
        view_type=v_type)
    
    # Save the artifact object to a new qza file
    artifact.save(demux_file)

else:
    artifact = Artifact.load(demux_file)

In [9]:
if not os.path.isfile(demux_view) or replace_files: 
    # Generate e visualization of the Artifact
    demux_summary = demux.visualizers.summarize(artifact)

    # Save a new visualization file based on the qza file
    Visualization.save(demux_summary.visualization, filepath=demux_view)
    
    demux_view_obj = demux_summary.visualization
else:
    demux_view_obj = Visualization.load(demux_view)

## Step report


In [10]:
print(demux_view_obj)

<visualization: Visualization uuid: df1f840a-bfcc-4c67-a065-4110bc896010>


In [11]:
# Render Visualization
demux_view_obj

[cutadapt](https://docs.qiime2.org/2022.2/plugins/available/cutadapt/trim-paired/)

We trim the forward primer and the reverse complement of the reverse primer from the forward reads. We trim the reverse primer and reverse complement of the forward primer from the reverse reads.

In [12]:
demux_file_trim = os.path.join(out_dir, 'demux-paired-trim.qza')
demux_view_trim = os.path.join(out_dir, 'demux-paired-trim.qzv')

if trim and (not os.path.isfile(demux_file_trim) or replace_files):
    forward_primer = [trim['forward_primer']] # ['CCTACGGGRSGCAGCAG']
    reverse_primer = [trim['reverse_primer']] # ['GGACTACHVGGGTWTCTAAT']
    forward_reverse_complement = [str(Seq(forward_primer[0]).reverse_complement())]
    reverse_reverse_complement = [str(Seq(reverse_primer[0]).reverse_complement())]

    res = trim_paired(
        demultiplexed_sequences=artifact,
        front_f=forward_primer,
        front_r=reverse_primer,
        adapter_f=reverse_reverse_complement,
        adapter_r=forward_reverse_complement,
        cores=threads,
        overlap=trim['overlap'],
        indels=False,
        match_read_wildcards=True,
        match_adapter_wildcards=True,
        error_rate=0.15,
        discard_untrimmed=True,
    ).trimmed_sequences

    demux.visualizers.summarize(res).visualization
    res.save(demux_file_trim)
    Visualization.save(demux.visualizers.summarize(res).visualization, filepath=demux_view_trim)

Running external command line application. This may print messages to stdout and/or stderr.
The commands to be run are below. These commands cannot be manually re-run as they will depend on temporary files that no longer exist.

Command: cutadapt --cores 6 --error-rate 0.15 --times 1 --overlap 8 --minimum-length 1 -o /tmp/q2-CasavaOneEightSingleLanePerSampleDirFmt-08o7obtr/210421121682_0_L001_R1_001.fastq.gz -p /tmp/q2-CasavaOneEightSingleLanePerSampleDirFmt-08o7obtr/210421121682_1_L001_R2_001.fastq.gz --adapter ATTAGAWACCCBDGTAGTCC --front CCTACGGGRSGCAGCAG -A CTGCTGCSYCCCGTAGG -G GGACTACHVGGGTWTCTAAT --no-indels --match-read-wildcards --discard-untrimmed /tmp/qiime2-archive-0tiamqlh/c1918fcd-fca3-4f28-b410-5c40896f2153/data/210421121682_0_L001_R1_001.fastq.gz /tmp/qiime2-archive-0tiamqlh/c1918fcd-fca3-4f28-b410-5c40896f2153/data/210421121682_1_L001_R2_001.fastq.gz



This is cutadapt 3.5 with Python 3.8.12
Command line parameters: --cores 6 --error-rate 0.15 --times 1 --overlap 8 --minimum-length 1 -o /tmp/q2-CasavaOneEightSingleLanePerSampleDirFmt-08o7obtr/210421121682_0_L001_R1_001.fastq.gz -p /tmp/q2-CasavaOneEightSingleLanePerSampleDirFmt-08o7obtr/210421121682_1_L001_R2_001.fastq.gz --adapter ATTAGAWACCCBDGTAGTCC --front CCTACGGGRSGCAGCAG -A CTGCTGCSYCCCGTAGG -G GGACTACHVGGGTWTCTAAT --no-indels --match-read-wildcards --discard-untrimmed /tmp/qiime2-archive-0tiamqlh/c1918fcd-fca3-4f28-b410-5c40896f2153/data/210421121682_0_L001_R1_001.fastq.gz /tmp/qiime2-archive-0tiamqlh/c1918fcd-fca3-4f28-b410-5c40896f2153/data/210421121682_1_L001_R2_001.fastq.gz
Processing reads on 6 cores in paired-end mode ...


Finished in 2.67 s (16 µs/read; 3.73 M reads/minute).

=== Summary ===

Total read pairs processed:            165,821
  Read 1 with adapter:                 164,234 (99.0%)
  Read 2 with adapter:                 162,538 (98.0%)

== Read fate breakdown ==
Pairs that were too short:                   2 (0.0%)
Pairs discarded as untrimmed:            4,837 (2.9%)
Pairs written (passing filters):       160,982 (97.1%)

Total basepairs processed:    84,568,710 bp
  Read 1:    50,575,405 bp
  Read 2:    33,993,305 bp
Total written (filtered):     75,046,538 bp (88.7%)
  Read 1:    45,805,681 bp
  Read 2:    29,240,857 bp

=== First read: Adapter 1 ===

Sequence: ATTAGAWACCCBDGTAGTCC; Type: regular 3'; Length: 20; Trimmed: 38 times

Minimum overlap: 8
No. of allowed errors:
1-5 bp: 0; 6-12 bp: 1; 13-19 bp: 2; 20 bp: 3

Bases preceding removed adapters:
  A: 0.0%
  C: 18.4%
  G: 73.7%
  T: 7.9%
  none/other: 0.0%

Overview of removed sequences
length	count	expect	max.err	error counts
8	2	2.5	

Finished in 2.30 s (15 µs/read; 3.91 M reads/minute).

=== Summary ===

Total read pairs processed:            149,412
  Read 1 with adapter:                 147,947 (99.0%)
  Read 2 with adapter:                 146,155 (97.8%)

== Read fate breakdown ==
Pairs that were too short:                   9 (0.0%)
Pairs discarded as untrimmed:            4,686 (3.1%)
Pairs written (passing filters):       144,717 (96.9%)

Total basepairs processed:    76,200,120 bp
  Read 1:    45,570,660 bp
  Read 2:    30,629,460 bp
Total written (filtered):     67,462,124 bp (88.5%)
  Read 1:    41,176,861 bp
  Read 2:    26,285,263 bp

=== First read: Adapter 1 ===

Sequence: ATTAGAWACCCBDGTAGTCC; Type: regular 3'; Length: 20; Trimmed: 105 times

Minimum overlap: 8
No. of allowed errors:
1-5 bp: 0; 6-12 bp: 1; 13-19 bp: 2; 20 bp: 3

Bases preceding removed adapters:
  A: 0.0%
  C: 27.6%
  G: 71.4%
  T: 1.0%
  none/other: 0.0%

Overview of removed sequences
length	count	expect	max.err	error counts
8	8	2.3

Finished in 4.99 s (15 µs/read; 4.13 M reads/minute).

=== Summary ===

Total read pairs processed:            343,261
  Read 1 with adapter:                 340,079 (99.1%)
  Read 2 with adapter:                 336,731 (98.1%)

== Read fate breakdown ==
Pairs that were too short:                   1 (0.0%)
Pairs discarded as untrimmed:            9,645 (2.8%)
Pairs written (passing filters):       333,615 (97.2%)

Total basepairs processed:   175,063,110 bp
  Read 1:   104,694,605 bp
  Read 2:    70,368,505 bp
Total written (filtered):    155,517,885 bp (88.8%)
  Read 1:    94,921,854 bp
  Read 2:    60,596,031 bp

=== First read: Adapter 1 ===

Sequence: ATTAGAWACCCBDGTAGTCC; Type: regular 3'; Length: 20; Trimmed: 162 times

Minimum overlap: 8
No. of allowed errors:
1-5 bp: 0; 6-12 bp: 1; 13-19 bp: 2; 20 bp: 3

Bases preceding removed adapters:
  A: 0.6%
  C: 56.2%
  G: 22.8%
  T: 20.4%
  none/other: 0.0%

Overview of removed sequences
length	count	expect	max.err	error counts
8	38	5

Finished in 2.08 s (15 µs/read; 3.94 M reads/minute).

=== Summary ===

Total read pairs processed:            136,741
  Read 1 with adapter:                 135,344 (99.0%)
  Read 2 with adapter:                 133,661 (97.7%)

== Read fate breakdown ==
Pairs that were too short:                  10 (0.0%)
Pairs discarded as untrimmed:            4,417 (3.2%)
Pairs written (passing filters):       132,314 (96.8%)

Total basepairs processed:    69,737,910 bp
  Read 1:    41,706,005 bp
  Read 2:    28,031,905 bp
Total written (filtered):     61,659,397 bp (88.4%)
  Read 1:    37,628,045 bp
  Read 2:    24,031,352 bp

=== First read: Adapter 1 ===

Sequence: ATTAGAWACCCBDGTAGTCC; Type: regular 3'; Length: 20; Trimmed: 111 times

Minimum overlap: 8
No. of allowed errors:
1-5 bp: 0; 6-12 bp: 1; 13-19 bp: 2; 20 bp: 3

Bases preceding removed adapters:
  A: 1.8%
  C: 23.4%
  G: 73.0%
  T: 1.8%
  none/other: 0.0%

Overview of removed sequences
length	count	expect	max.err	error counts
8	6	2.1

Processing reads on 6 cores in paired-end mode ...


Finished in 2.99 s (16 µs/read; 3.65 M reads/minute).

=== Summary ===

Total read pairs processed:            181,624
  Read 1 with adapter:                 179,835 (99.0%)
  Read 2 with adapter:                 177,952 (98.0%)

== Read fate breakdown ==
Pairs that were too short:                   7 (0.0%)
Pairs discarded as untrimmed:            5,418 (3.0%)
Pairs written (passing filters):       176,199 (97.0%)

Total basepairs processed:    92,628,240 bp
  Read 1:    55,395,320 bp
  Read 2:    37,232,920 bp
Total written (filtered):     82,133,955 bp (88.7%)
  Read 1:    50,131,407 bp
  Read 2:    32,002,548 bp

=== First read: Adapter 1 ===

Sequence: ATTAGAWACCCBDGTAGTCC; Type: regular 3'; Length: 20; Trimmed: 369 times

Minimum overlap: 8
No. of allowed errors:
1-5 bp: 0; 6-12 bp: 1; 13-19 bp: 2; 20 bp: 3

Bases preceding removed adapters:
  A: 1.6%
  C: 13.8%
  G: 83.7%
  T: 0.8%
  none/other: 0.0%
    The adapter is preceded by 'G' extremely often.
    The provided adapter se

Finished in 2.09 s (16 µs/read; 3.86 M reads/minute).

=== Summary ===

Total read pairs processed:            134,399
  Read 1 with adapter:                 133,122 (99.0%)
  Read 2 with adapter:                 131,694 (98.0%)

== Read fate breakdown ==
Pairs that were too short:                   2 (0.0%)
Pairs discarded as untrimmed:            3,948 (2.9%)
Pairs written (passing filters):       130,449 (97.1%)

Total basepairs processed:    68,543,490 bp
  Read 1:    40,991,695 bp
  Read 2:    27,551,795 bp
Total written (filtered):     60,814,970 bp (88.7%)
  Read 1:    37,120,467 bp
  Read 2:    23,694,503 bp

=== First read: Adapter 1 ===

Sequence: ATTAGAWACCCBDGTAGTCC; Type: regular 3'; Length: 20; Trimmed: 78 times

Minimum overlap: 8
No. of allowed errors:
1-5 bp: 0; 6-12 bp: 1; 13-19 bp: 2; 20 bp: 3

Bases preceding removed adapters:
  A: 0.0%
  C: 46.2%
  G: 53.8%
  T: 0.0%
  none/other: 0.0%

Overview of removed sequences
length	count	expect	max.err	error counts
8	23	2.1

Finished in 1.07 s (18 µs/read; 3.32 M reads/minute).

=== Summary ===

Total read pairs processed:             58,992
  Read 1 with adapter:                  58,444 (99.1%)
  Read 2 with adapter:                  57,446 (97.4%)

== Read fate breakdown ==
Pairs that were too short:                  12 (0.0%)
Pairs discarded as untrimmed:            2,078 (3.5%)
Pairs written (passing filters):        56,902 (96.5%)

Total basepairs processed:    30,085,920 bp
  Read 1:    17,992,560 bp
  Read 2:    12,093,360 bp
Total written (filtered):     26,521,205 bp (88.2%)
  Read 1:    16,187,827 bp
  Read 2:    10,333,378 bp

=== First read: Adapter 1 ===

Sequence: ATTAGAWACCCBDGTAGTCC; Type: regular 3'; Length: 20; Trimmed: 88 times

Minimum overlap: 8
No. of allowed errors:
1-5 bp: 0; 6-12 bp: 1; 13-19 bp: 2; 20 bp: 3

Bases preceding removed adapters:
  A: 2.3%
  C: 35.2%
  G: 59.1%
  T: 3.4%
  none/other: 0.0%

Overview of removed sequences
length	count	expect	max.err	error counts
8	6	0.9	

Finished in 3.68 s (16 µs/read; 3.75 M reads/minute).

=== Summary ===

Total read pairs processed:            229,809
  Read 1 with adapter:                 227,628 (99.1%)
  Read 2 with adapter:                 224,970 (97.9%)

== Read fate breakdown ==
Pairs that were too short:                  10 (0.0%)
Pairs discarded as untrimmed:            6,969 (3.0%)
Pairs written (passing filters):       222,830 (97.0%)

Total basepairs processed:   117,202,590 bp
  Read 1:    70,091,745 bp
  Read 2:    47,110,845 bp
Total written (filtered):    103,871,094 bp (88.6%)
  Read 1:    63,399,872 bp
  Read 2:    40,471,222 bp

=== First read: Adapter 1 ===

Sequence: ATTAGAWACCCBDGTAGTCC; Type: regular 3'; Length: 20; Trimmed: 372 times

Minimum overlap: 8
No. of allowed errors:
1-5 bp: 0; 6-12 bp: 1; 13-19 bp: 2; 20 bp: 3

Bases preceding removed adapters:
  A: 3.2%
  C: 12.1%
  G: 82.0%
  T: 2.7%
  none/other: 0.0%
    The adapter is preceded by 'G' extremely often.
    The provided adapter se

Finished in 1.86 s (16 µs/read; 3.70 M reads/minute).

=== Summary ===

Total read pairs processed:            114,634
  Read 1 with adapter:                 113,440 (99.0%)
  Read 2 with adapter:                 111,771 (97.5%)

== Read fate breakdown ==
Pairs that were too short:                  14 (0.0%)
Pairs discarded as untrimmed:            4,005 (3.5%)
Pairs written (passing filters):       110,615 (96.5%)

Total basepairs processed:    58,463,340 bp
  Read 1:    34,963,370 bp
  Read 2:    23,499,970 bp
Total written (filtered):     51,562,559 bp (88.2%)
  Read 1:    31,472,632 bp
  Read 2:    20,089,927 bp

=== First read: Adapter 1 ===

Sequence: ATTAGAWACCCBDGTAGTCC; Type: regular 3'; Length: 20; Trimmed: 47 times

Minimum overlap: 8
No. of allowed errors:
1-5 bp: 0; 6-12 bp: 1; 13-19 bp: 2; 20 bp: 3

Bases preceding removed adapters:
  A: 0.0%
  C: 34.0%
  G: 63.8%
  T: 2.1%
  none/other: 0.0%

Overview of removed sequences
length	count	expect	max.err	error counts
8	7	1.7	

Finished in 0.96 s (20 µs/read; 3.07 M reads/minute).

=== Summary ===

Total read pairs processed:             49,335
  Read 1 with adapter:                  48,895 (99.1%)
  Read 2 with adapter:                  48,247 (97.8%)

== Read fate breakdown ==
Pairs that were too short:                   5 (0.0%)
Pairs discarded as untrimmed:            1,514 (3.1%)
Pairs written (passing filters):        47,816 (96.9%)

Total basepairs processed:    25,160,850 bp
  Read 1:    15,047,175 bp
  Read 2:    10,113,675 bp
Total written (filtered):     22,289,755 bp (88.6%)
  Read 1:    13,605,875 bp
  Read 2:     8,683,880 bp

=== First read: Adapter 1 ===

Sequence: ATTAGAWACCCBDGTAGTCC; Type: regular 3'; Length: 20; Trimmed: 31 times

Minimum overlap: 8
No. of allowed errors:
1-5 bp: 0; 6-12 bp: 1; 13-19 bp: 2; 20 bp: 3

Bases preceding removed adapters:
  A: 3.2%
  C: 74.2%
  G: 19.4%
  T: 3.2%
  none/other: 0.0%

Overview of removed sequences
length	count	expect	max.err	error counts
8	14	0.8

Finished in 0.43 s (30 µs/read; 1.99 M reads/minute).

=== Summary ===

Total read pairs processed:             14,319
  Read 1 with adapter:                  14,165 (98.9%)
  Read 2 with adapter:                  13,639 (95.3%)

== Read fate breakdown ==
Pairs that were too short:                  11 (0.1%)
Pairs discarded as untrimmed:              822 (5.7%)
Pairs written (passing filters):        13,486 (94.2%)

Total basepairs processed:     7,302,690 bp
  Read 1:     4,367,295 bp
  Read 2:     2,935,395 bp
Total written (filtered):      6,284,106 bp (86.1%)
  Read 1:     3,837,039 bp
  Read 2:     2,447,067 bp

=== First read: Adapter 1 ===

Sequence: ATTAGAWACCCBDGTAGTCC; Type: regular 3'; Length: 20; Trimmed: 13 times

Minimum overlap: 8
No. of allowed errors:
1-5 bp: 0; 6-12 bp: 1; 13-19 bp: 2; 20 bp: 3

Bases preceding removed adapters:
  A: 7.7%
  C: 38.5%
  G: 38.5%
  T: 7.7%
  none/other: 7.7%

Overview of removed sequences
length	count	expect	max.err	error counts
8	1	0.2	

Finished in 0.57 s (27 µs/read; 2.20 M reads/minute).

=== Summary ===

Total read pairs processed:             20,959
  Read 1 with adapter:                  20,764 (99.1%)
  Read 2 with adapter:                  20,210 (96.4%)

== Read fate breakdown ==
Pairs that were too short:                   6 (0.0%)
Pairs discarded as untrimmed:              930 (4.4%)
Pairs written (passing filters):        20,023 (95.5%)

Total basepairs processed:    10,689,090 bp
  Read 1:     6,392,495 bp
  Read 2:     4,296,595 bp
Total written (filtered):      9,331,198 bp (87.3%)
  Read 1:     5,696,317 bp
  Read 2:     3,634,881 bp

=== First read: Adapter 1 ===

Sequence: ATTAGAWACCCBDGTAGTCC; Type: regular 3'; Length: 20; Trimmed: 14 times

Minimum overlap: 8
No. of allowed errors:
1-5 bp: 0; 6-12 bp: 1; 13-19 bp: 2; 20 bp: 3

Bases preceding removed adapters:
  A: 0.0%
  C: 21.4%
  G: 64.3%
  T: 14.3%
  none/other: 0.0%

Overview of removed sequences
length	count	expect	max.err	error counts
8	1	0.3

Finished in 0.52 s (26 µs/read; 2.34 M reads/minute).

=== Summary ===

Total read pairs processed:             20,194
  Read 1 with adapter:                  19,984 (99.0%)
  Read 2 with adapter:                  19,270 (95.4%)

== Read fate breakdown ==
Pairs that were too short:                   8 (0.0%)
Pairs discarded as untrimmed:            1,124 (5.6%)
Pairs written (passing filters):        19,062 (94.4%)

Total basepairs processed:    10,298,940 bp
  Read 1:     6,159,170 bp
  Read 2:     4,139,770 bp
Total written (filtered):      8,882,443 bp (86.2%)
  Read 1:     5,423,098 bp
  Read 2:     3,459,345 bp

=== First read: Adapter 1 ===

Sequence: ATTAGAWACCCBDGTAGTCC; Type: regular 3'; Length: 20; Trimmed: 9 times

Minimum overlap: 8
No. of allowed errors:
1-5 bp: 0; 6-12 bp: 1; 13-19 bp: 2; 20 bp: 3

Bases preceding removed adapters:
  A: 22.2%
  C: 22.2%
  G: 55.6%
  T: 0.0%
  none/other: 0.0%

Overview of removed sequences
length	count	expect	max.err	error counts
8	1	0.3	

Finished in 1.22 s (18 µs/read; 3.32 M reads/minute).

=== Summary ===

Total read pairs processed:             67,293
  Read 1 with adapter:                  66,636 (99.0%)
  Read 2 with adapter:                  65,550 (97.4%)

== Read fate breakdown ==
Pairs that were too short:                  14 (0.0%)
Pairs discarded as untrimmed:            2,368 (3.5%)
Pairs written (passing filters):        64,911 (96.5%)

Total basepairs processed:    34,319,430 bp
  Read 1:    20,524,365 bp
  Read 2:    13,795,065 bp
Total written (filtered):     30,254,663 bp (88.2%)
  Read 1:    18,466,767 bp
  Read 2:    11,787,896 bp

=== First read: Adapter 1 ===

Sequence: ATTAGAWACCCBDGTAGTCC; Type: regular 3'; Length: 20; Trimmed: 44 times

Minimum overlap: 8
No. of allowed errors:
1-5 bp: 0; 6-12 bp: 1; 13-19 bp: 2; 20 bp: 3

Bases preceding removed adapters:
  A: 2.3%
  C: 47.7%
  G: 50.0%
  T: 0.0%
  none/other: 0.0%

Overview of removed sequences
length	count	expect	max.err	error counts
8	10	1.0

Finished in 0.42 s (50 µs/read; 1.19 M reads/minute).

=== Summary ===

Total read pairs processed:              8,334
  Read 1 with adapter:                   8,233 (98.8%)
  Read 2 with adapter:                   7,983 (95.8%)

== Read fate breakdown ==
Pairs that were too short:                   7 (0.1%)
Pairs discarded as untrimmed:              443 (5.3%)
Pairs written (passing filters):         7,884 (94.6%)

Total basepairs processed:     4,250,340 bp
  Read 1:     2,541,870 bp
  Read 2:     1,708,470 bp
Total written (filtered):      3,674,455 bp (86.5%)
  Read 1:     2,243,538 bp
  Read 2:     1,430,917 bp

=== First read: Adapter 1 ===

Sequence: ATTAGAWACCCBDGTAGTCC; Type: regular 3'; Length: 20; Trimmed: 1 times

Minimum overlap: 8
No. of allowed errors:
1-5 bp: 0; 6-12 bp: 1; 13-19 bp: 2; 20 bp: 3

Bases preceding removed adapters:
  A: 0.0%
  C: 0.0%
  G: 100.0%
  T: 0.0%
  none/other: 0.0%

Overview of removed sequences
length	count	expect	max.err	error counts
24	1	0.0	