# Prepare data for Pipeline

## Setup and settings

In [1]:
# Importing packages
import os
import yaml
import logging
import pandas as pd
from Bio.Seq import Seq
from qiime2 import Artifact, Visualization
from qiime2.plugins import demux
from qiime2.plugins.cutadapt.methods import trim_paired

### Receiving the parameters

The following cell can receive parameters using the [papermill](https://papermill.readthedocs.io/en/latest/) tool.

In [2]:
params_path = os.path.join('..', 'params', 'ana-flavia-superlactacao.yaml')
experiment_name = 'jenneffer-vs-01'
base_dir = os.path.join('/', 'home', 'lauro', 'nupeb', 'redemicro')
manifest_file = os.path.join(base_dir, 'data', 'raw', 'manifest', 'not-hist-vs-manifest.csv')
img_folder = os.path.abspath(os.path.join(base_dir, 'imgs'))
replace_files = False
trim = None

In [3]:
# Parameters
experiment_name = "ana-flavia-STD-NRxHSD-NR-trim"
base_dir = "/home/lauro/nupeb/rede-micro/redemicro-ana-flavia-nutri"
manifest_file = "/home/lauro/nupeb/rede-micro/redemicro-ana-flavia-nutri/data/raw/manifest/manifest-ana-flavia-STD-NRxHSD-NR.csv"
metadata_file = "/home/lauro/nupeb/rede-micro/redemicro-ana-flavia-nutri/data/raw/metadata/metadata-ana-flavia-STD-NRxHSD-NR.tsv"
class_col = "group-id"
classifier_file = (
    "/home/lauro/nupeb/16S_classifiers_qiime2/silva-138-99-nb-classifier.qza"
)
replace_files = False
phred = 20
trunc_f = 0
trunc_r = 0
overlap = 12
threads = 6
trim = {
    "overlap": 8,
    "forward_primer": "CCTACGGGRSGCAGCAG",
    "reverse_primer": "GGACTACHVGGGTWTCTAAT",
}


In [4]:
# Setting hardcoded parameters for development and debbuging
production = True
if not production:
    params_path = os.path.join('..', 'params', 'ana-flavia-hipotese-01')
    with open(params_path, 'r') as stream:
        params = yaml.safe_load(stream)
        experiment_name = params['experiment_name']
        base_dir = params['base_dir']
        manifest_file = params['manifest_file']
        replace_files = params['replace_files']

### Defining names and paths

In [5]:
# new_manifest = '/home/lauro/nupeb/redemicro/data/raw/manifest/karina-manifest.csv'
# with open(manifest_file, 'r') as oldm, open(new_manifest, 'w') as newm:
#     header = 'sample-id,absolute-filepath,direction\n'
#     newm.write(header)
#     for line in oldm.readlines()[1:]:
#         sid, forward, reverse = line[:-1].split(',')
#         fline = ','.join((sid, forward, 'forward')) + '\n'
#         rline = ','.join((sid, reverse, 'reverse')) + '\n'
#         newm.write(fline)
#         newm.write(rline)

In [6]:
# Define the output folder path
out_dir = os.path.join(base_dir, 'experiments', experiment_name, 'qiime-artifacts')
img_folder = os.path.abspath(os.path.join(base_dir, 'experiments', experiment_name, 'imgs'))

# Create path if it not exist
if not os.path.isdir(out_dir):
    os.makedirs(out_dir)
    print(f'New qiime-artifacts folder path created: {out_dir}')

# Create path if it not exist
if not os.path.isdir(img_folder):
    os.makedirs(img_folder)
    print(f'New img folder path created: {img_folder}')

# Define the output artifact full path
demux_file = os.path.join(out_dir, 'demux-paired.qza')
demux_view = os.path.join(out_dir, 'demux-paired.qzv')
demux_file_trim = os.path.join(out_dir, 'demux-paired-trim.qza')
demux_view_trim = os.path.join(out_dir, 'demux-paired-trim.qzv')

New qiime-artifacts folder path created: /home/lauro/nupeb/rede-micro/redemicro-ana-flavia-nutri/experiments/ana-flavia-STD-NRxHSD-NR-trim/qiime-artifacts
New img folder path created: /home/lauro/nupeb/rede-micro/redemicro-ana-flavia-nutri/experiments/ana-flavia-STD-NRxHSD-NR-trim/imgs


## Step execution

This Step import all `fastq` files in a **QIIME2 Artifact** object and save it to a new `qza` file of `SampleData[PairedEndSequencesWithQuality]`

In [7]:
manifest_df = pd.read_csv(manifest_file)
n_directions = len(manifest_df['direction'].unique())
if n_directions == 1:
    d_type = 'SampleData[SequencesWithQuality]'
    v_type = 'SingleEndFastqManifestPhred33'
elif n_directions == 2:
    d_type = 'SampleData[PairedEndSequencesWithQuality]'
    v_type = 'PairedEndFastqManifestPhred33'
else:
    print(f'ERROR: invalid number of directions {n_directions}')

In [8]:
# If the demux file does not exist or if the old file will be replaced
if not os.path.isfile(demux_file) or replace_files:
    
    # Import data and create an artifact object
    artifact = Artifact.import_data(
        d_type, 
        manifest_file, 
        view_type=v_type)
    
    # Save the artifact object to a new qza file
    artifact.save(demux_file)

else:
    artifact = Artifact.load(demux_file)

In [9]:
if not os.path.isfile(demux_view) or replace_files: 
    # Generate e visualization of the Artifact
    demux_summary = demux.visualizers.summarize(artifact)

    # Save a new visualization file based on the qza file
    Visualization.save(demux_summary.visualization, filepath=demux_view)
    
    demux_view_obj = demux_summary.visualization
else:
    demux_view_obj = Visualization.load(demux_view)

## Step report


In [10]:
print(demux_view_obj)

<visualization: Visualization uuid: 6fa595a3-aad6-413f-886f-d8fe05a36646>


In [11]:
# Render Visualization
demux_view_obj

[cutadapt](https://docs.qiime2.org/2022.2/plugins/available/cutadapt/trim-paired/)

We trim the forward primer and the reverse complement of the reverse primer from the forward reads. We trim the reverse primer and reverse complement of the forward primer from the reverse reads.

In [12]:
demux_file_trim = os.path.join(out_dir, 'demux-paired-trim.qza')
demux_view_trim = os.path.join(out_dir, 'demux-paired-trim.qzv')

if trim and (not os.path.isfile(demux_file_trim) or replace_files):
    forward_primer = [trim['forward_primer']] # ['CCTACGGGRSGCAGCAG']
    reverse_primer = [trim['reverse_primer']] # ['GGACTACHVGGGTWTCTAAT']
    forward_reverse_complement = [str(Seq(forward_primer[0]).reverse_complement())]
    reverse_reverse_complement = [str(Seq(reverse_primer[0]).reverse_complement())]

    res = trim_paired(
        demultiplexed_sequences=artifact,
        front_f=forward_primer,
        front_r=reverse_primer,
        adapter_f=reverse_reverse_complement,
        adapter_r=forward_reverse_complement,
        cores=threads,
        overlap=trim['overlap'],
        indels=False,
        match_read_wildcards=True,
        match_adapter_wildcards=True,
        error_rate=0.15,
        discard_untrimmed=True,
    ).trimmed_sequences

    demux.visualizers.summarize(res).visualization
    res.save(demux_file_trim)
    Visualization.save(demux.visualizers.summarize(res).visualization, filepath=demux_view_trim)

Running external command line application. This may print messages to stdout and/or stderr.
The commands to be run are below. These commands cannot be manually re-run as they will depend on temporary files that no longer exist.

Command: cutadapt --cores 6 --error-rate 0.15 --times 1 --overlap 8 --minimum-length 1 -o /tmp/q2-CasavaOneEightSingleLanePerSampleDirFmt-z5kdrf1y/210421121685_0_L001_R1_001.fastq.gz -p /tmp/q2-CasavaOneEightSingleLanePerSampleDirFmt-z5kdrf1y/210421121685_1_L001_R2_001.fastq.gz --adapter ATTAGAWACCCBDGTAGTCC --front CCTACGGGRSGCAGCAG -A CTGCTGCSYCCCGTAGG -G GGACTACHVGGGTWTCTAAT --no-indels --match-read-wildcards --discard-untrimmed /tmp/qiime2-archive-xpuveehl/399d6386-b73f-448c-a5a2-dc63a94a4515/data/210421121685_0_L001_R1_001.fastq.gz /tmp/qiime2-archive-xpuveehl/399d6386-b73f-448c-a5a2-dc63a94a4515/data/210421121685_1_L001_R2_001.fastq.gz



This is cutadapt 3.5 with Python 3.8.12
Command line parameters: --cores 6 --error-rate 0.15 --times 1 --overlap 8 --minimum-length 1 -o /tmp/q2-CasavaOneEightSingleLanePerSampleDirFmt-z5kdrf1y/210421121685_0_L001_R1_001.fastq.gz -p /tmp/q2-CasavaOneEightSingleLanePerSampleDirFmt-z5kdrf1y/210421121685_1_L001_R2_001.fastq.gz --adapter ATTAGAWACCCBDGTAGTCC --front CCTACGGGRSGCAGCAG -A CTGCTGCSYCCCGTAGG -G GGACTACHVGGGTWTCTAAT --no-indels --match-read-wildcards --discard-untrimmed /tmp/qiime2-archive-xpuveehl/399d6386-b73f-448c-a5a2-dc63a94a4515/data/210421121685_0_L001_R1_001.fastq.gz /tmp/qiime2-archive-xpuveehl/399d6386-b73f-448c-a5a2-dc63a94a4515/data/210421121685_1_L001_R2_001.fastq.gz
Processing reads on 6 cores in paired-end mode ...


Finished in 3.87 s (15 µs/read; 3.94 M reads/minute).

=== Summary ===

Total read pairs processed:            254,393
  Read 1 with adapter:                 251,998 (99.1%)
  Read 2 with adapter:                 248,860 (97.8%)

== Read fate breakdown ==
Pairs that were too short:                  10 (0.0%)
Pairs discarded as untrimmed:            7,871 (3.1%)
Pairs written (passing filters):       246,512 (96.9%)

Total basepairs processed:   129,740,430 bp
  Read 1:    77,589,865 bp
  Read 2:    52,150,565 bp
Total written (filtered):    114,920,232 bp (88.6%)
  Read 1:    70,145,471 bp
  Read 2:    44,774,761 bp

=== First read: Adapter 1 ===

Sequence: ATTAGAWACCCBDGTAGTCC; Type: regular 3'; Length: 20; Trimmed: 128 times

Minimum overlap: 8
No. of allowed errors:
1-5 bp: 0; 6-12 bp: 1; 13-19 bp: 2; 20 bp: 3

Bases preceding removed adapters:
  A: 0.0%
  C: 34.4%
  G: 60.9%
  T: 4.7%
  none/other: 0.0%

Overview of removed sequences
length	count	expect	max.err	error counts
8	28	3.

Finished in 1.76 s (17 µs/read; 3.54 M reads/minute).

=== Summary ===

Total read pairs processed:            103,905
  Read 1 with adapter:                 102,946 (99.1%)
  Read 2 with adapter:                 101,888 (98.1%)

== Read fate breakdown ==
Pairs that were too short:                   0 (0.0%)
Pairs discarded as untrimmed:            2,954 (2.8%)
Pairs written (passing filters):       100,951 (97.2%)

Total basepairs processed:    52,991,550 bp
  Read 1:    31,691,025 bp
  Read 2:    21,300,525 bp
Total written (filtered):     47,063,687 bp (88.8%)
  Read 1:    28,727,135 bp
  Read 2:    18,336,552 bp

=== First read: Adapter 1 ===

Sequence: ATTAGAWACCCBDGTAGTCC; Type: regular 3'; Length: 20; Trimmed: 42 times

Minimum overlap: 8
No. of allowed errors:
1-5 bp: 0; 6-12 bp: 1; 13-19 bp: 2; 20 bp: 3

Bases preceding removed adapters:
  A: 4.8%
  C: 14.3%
  G: 78.6%
  T: 2.4%
  none/other: 0.0%

Overview of removed sequences
length	count	expect	max.err	error counts
8	3	1.6	

Finished in 1.17 s (18 µs/read; 3.39 M reads/minute).

=== Summary ===

Total read pairs processed:             66,368
  Read 1 with adapter:                  65,760 (99.1%)
  Read 2 with adapter:                  64,834 (97.7%)

== Read fate breakdown ==
Pairs that were too short:                   2 (0.0%)
Pairs discarded as untrimmed:            2,121 (3.2%)
Pairs written (passing filters):        64,245 (96.8%)

Total basepairs processed:    33,847,680 bp
  Read 1:    20,242,240 bp
  Read 2:    13,605,440 bp
Total written (filtered):     29,949,537 bp (88.5%)
  Read 1:    18,280,890 bp
  Read 2:    11,668,647 bp

=== First read: Adapter 1 ===

Sequence: ATTAGAWACCCBDGTAGTCC; Type: regular 3'; Length: 20; Trimmed: 85 times

Minimum overlap: 8
No. of allowed errors:
1-5 bp: 0; 6-12 bp: 1; 13-19 bp: 2; 20 bp: 3

Bases preceding removed adapters:
  A: 0.0%
  C: 63.5%
  G: 36.5%
  T: 0.0%
  none/other: 0.0%

Overview of removed sequences
length	count	expect	max.err	error counts
8	25	1.0

Finished in 3.48 s (16 µs/read; 3.86 M reads/minute).

=== Summary ===

Total read pairs processed:            224,175
  Read 1 with adapter:                 221,891 (99.0%)
  Read 2 with adapter:                 219,776 (98.0%)

== Read fate breakdown ==
Pairs that were too short:                   1 (0.0%)
Pairs discarded as untrimmed:            6,634 (3.0%)
Pairs written (passing filters):       217,540 (97.0%)

Total basepairs processed:   114,329,250 bp
  Read 1:    68,373,375 bp
  Read 2:    45,955,875 bp
Total written (filtered):    101,415,359 bp (88.7%)
  Read 1:    61,901,589 bp
  Read 2:    39,513,770 bp

=== First read: Adapter 1 ===

Sequence: ATTAGAWACCCBDGTAGTCC; Type: regular 3'; Length: 20; Trimmed: 66 times

Minimum overlap: 8
No. of allowed errors:
1-5 bp: 0; 6-12 bp: 1; 13-19 bp: 2; 20 bp: 3

Bases preceding removed adapters:
  A: 0.0%
  C: 4.5%
  G: 92.4%
  T: 3.0%
  none/other: 0.0%
    The adapter is preceded by 'G' extremely often.
    The provided adapter sequ

Finished in 1.32 s (16 µs/read; 3.65 M reads/minute).

=== Summary ===

Total read pairs processed:             80,216
  Read 1 with adapter:                  79,416 (99.0%)
  Read 2 with adapter:                  78,121 (97.4%)

== Read fate breakdown ==
Pairs that were too short:                  15 (0.0%)
Pairs discarded as untrimmed:            2,870 (3.6%)
Pairs written (passing filters):        77,331 (96.4%)

Total basepairs processed:    40,910,160 bp
  Read 1:    24,465,880 bp
  Read 2:    16,444,280 bp
Total written (filtered):     36,048,173 bp (88.1%)
  Read 1:    22,004,584 bp
  Read 2:    14,043,589 bp

=== First read: Adapter 1 ===

Sequence: ATTAGAWACCCBDGTAGTCC; Type: regular 3'; Length: 20; Trimmed: 51 times

Minimum overlap: 8
No. of allowed errors:
1-5 bp: 0; 6-12 bp: 1; 13-19 bp: 2; 20 bp: 3

Bases preceding removed adapters:
  A: 0.0%
  C: 43.1%
  G: 54.9%
  T: 2.0%
  none/other: 0.0%

Overview of removed sequences
length	count	expect	max.err	error counts
8	7	1.2	

Finished in 2.51 s (15 µs/read; 3.93 M reads/minute).

=== Summary ===

Total read pairs processed:            164,608
  Read 1 with adapter:                 163,069 (99.1%)
  Read 2 with adapter:                 161,335 (98.0%)

== Read fate breakdown ==
Pairs that were too short:                   3 (0.0%)
Pairs discarded as untrimmed:            4,763 (2.9%)
Pairs written (passing filters):       159,842 (97.1%)

Total basepairs processed:    83,950,080 bp
  Read 1:    50,205,440 bp
  Read 2:    33,744,640 bp
Total written (filtered):     74,517,810 bp (88.8%)
  Read 1:    45,483,954 bp
  Read 2:    29,033,856 bp

=== First read: Adapter 1 ===

Sequence: ATTAGAWACCCBDGTAGTCC; Type: regular 3'; Length: 20; Trimmed: 101 times

Minimum overlap: 8
No. of allowed errors:
1-5 bp: 0; 6-12 bp: 1; 13-19 bp: 2; 20 bp: 3

Bases preceding removed adapters:
  A: 0.0%
  C: 82.2%
  G: 17.8%
  T: 0.0%
  none/other: 0.0%
    The adapter is preceded by 'C' extremely often.
    The provided adapter se

Finished in 0.57 s (24 µs/read; 2.55 M reads/minute).

=== Summary ===

Total read pairs processed:             24,189
  Read 1 with adapter:                  23,955 (99.0%)
  Read 2 with adapter:                  23,066 (95.4%)

== Read fate breakdown ==
Pairs that were too short:                  14 (0.1%)
Pairs discarded as untrimmed:            1,339 (5.5%)
Pairs written (passing filters):        22,836 (94.4%)

Total basepairs processed:    12,336,390 bp
  Read 1:     7,377,645 bp
  Read 2:     4,958,745 bp
Total written (filtered):     10,633,459 bp (86.2%)
  Read 1:     6,488,861 bp
  Read 2:     4,144,598 bp

=== First read: Adapter 1 ===

Sequence: ATTAGAWACCCBDGTAGTCC; Type: regular 3'; Length: 20; Trimmed: 56 times

Minimum overlap: 8
No. of allowed errors:
1-5 bp: 0; 6-12 bp: 1; 13-19 bp: 2; 20 bp: 3

Bases preceding removed adapters:
  A: 0.0%
  C: 44.6%
  G: 51.8%
  T: 3.6%
  none/other: 0.0%

Overview of removed sequences
length	count	expect	max.err	error counts
8	5	0.4	

Finished in 0.41 s (74 µs/read; 0.81 M reads/minute).

=== Summary ===

Total read pairs processed:              5,549
  Read 1 with adapter:                   5,499 (99.1%)
  Read 2 with adapter:                   5,356 (96.5%)

== Read fate breakdown ==
Pairs that were too short:                   0 (0.0%)
Pairs discarded as untrimmed:              240 (4.3%)
Pairs written (passing filters):         5,309 (95.7%)

Total basepairs processed:     2,829,990 bp
  Read 1:     1,692,445 bp
  Read 2:     1,137,545 bp
Total written (filtered):      2,474,084 bp (87.4%)
  Read 1:     1,510,232 bp
  Read 2:       963,852 bp

=== First read: Adapter 1 ===

Sequence: ATTAGAWACCCBDGTAGTCC; Type: regular 3'; Length: 20; Trimmed: 4 times

Minimum overlap: 8
No. of allowed errors:
1-5 bp: 0; 6-12 bp: 1; 13-19 bp: 2; 20 bp: 3

Bases preceding removed adapters:
  A: 0.0%
  C: 100.0%
  G: 0.0%
  T: 0.0%
  none/other: 0.0%

Overview of removed sequences
length	count	expect	max.err	error counts
8	1	0.1	1

Finished in 4.98 s (14 µs/read; 4.15 M reads/minute).

=== Summary ===

Total read pairs processed:            344,337
  Read 1 with adapter:                 340,922 (99.0%)
  Read 2 with adapter:                 337,158 (97.9%)

== Read fate breakdown ==
Pairs that were too short:                  12 (0.0%)
Pairs discarded as untrimmed:           10,517 (3.1%)
Pairs written (passing filters):       333,808 (96.9%)

Total basepairs processed:   175,611,870 bp
  Read 1:   105,022,785 bp
  Read 2:    70,589,085 bp
Total written (filtered):    155,616,970 bp (88.6%)
  Read 1:    94,986,971 bp
  Read 2:    60,629,999 bp

=== First read: Adapter 1 ===

Sequence: ATTAGAWACCCBDGTAGTCC; Type: regular 3'; Length: 20; Trimmed: 43 times

Minimum overlap: 8
No. of allowed errors:
1-5 bp: 0; 6-12 bp: 1; 13-19 bp: 2; 20 bp: 3

Bases preceding removed adapters:
  A: 0.0%
  C: 32.6%
  G: 67.4%
  T: 0.0%
  none/other: 0.0%

Overview of removed sequences
length	count	expect	max.err	error counts
8	7	5.3	

Finished in 6.05 s (15 µs/read; 4.13 M reads/minute).

=== Summary ===

Total read pairs processed:            416,322
  Read 1 with adapter:                 412,332 (99.0%)
  Read 2 with adapter:                 407,730 (97.9%)

== Read fate breakdown ==
Pairs that were too short:                  21 (0.0%)
Pairs discarded as untrimmed:           12,483 (3.0%)
Pairs written (passing filters):       403,818 (97.0%)

Total basepairs processed:   212,324,220 bp
  Read 1:   126,978,210 bp
  Read 2:    85,346,010 bp
Total written (filtered):    188,257,614 bp (88.7%)
  Read 1:   114,910,144 bp
  Read 2:    73,347,470 bp

=== First read: Adapter 1 ===

Sequence: ATTAGAWACCCBDGTAGTCC; Type: regular 3'; Length: 20; Trimmed: 75 times

Minimum overlap: 8
No. of allowed errors:
1-5 bp: 0; 6-12 bp: 1; 13-19 bp: 2; 20 bp: 3

Bases preceding removed adapters:
  A: 0.0%
  C: 40.0%
  G: 54.7%
  T: 5.3%
  none/other: 0.0%

Overview of removed sequences
length	count	expect	max.err	error counts
8	19	6.4

Finished in 1.37 s (17 µs/read; 3.58 M reads/minute).

=== Summary ===

Total read pairs processed:             81,485
  Read 1 with adapter:                  80,768 (99.1%)
  Read 2 with adapter:                  79,421 (97.5%)

== Read fate breakdown ==
Pairs that were too short:                  10 (0.0%)
Pairs discarded as untrimmed:            2,761 (3.4%)
Pairs written (passing filters):        78,714 (96.6%)

Total basepairs processed:    41,557,350 bp
  Read 1:    24,852,925 bp
  Read 2:    16,704,425 bp
Total written (filtered):     36,688,578 bp (88.3%)
  Read 1:    22,394,693 bp
  Read 2:    14,293,885 bp

=== First read: Adapter 1 ===

Sequence: ATTAGAWACCCBDGTAGTCC; Type: regular 3'; Length: 20; Trimmed: 38 times

Minimum overlap: 8
No. of allowed errors:
1-5 bp: 0; 6-12 bp: 1; 13-19 bp: 2; 20 bp: 3

Bases preceding removed adapters:
  A: 47.4%
  C: 10.5%
  G: 39.5%
  T: 2.6%
  none/other: 0.0%

Overview of removed sequences
length	count	expect	max.err	error counts
8	3	1.2

Finished in 0.64 s (24 µs/read; 2.55 M reads/minute).

=== Summary ===

Total read pairs processed:             27,196
  Read 1 with adapter:                  26,931 (99.0%)
  Read 2 with adapter:                  26,035 (95.7%)

== Read fate breakdown ==
Pairs that were too short:                  18 (0.1%)
Pairs discarded as untrimmed:            1,407 (5.2%)
Pairs written (passing filters):        25,771 (94.8%)

Total basepairs processed:    13,869,960 bp
  Read 1:     8,294,780 bp
  Read 2:     5,575,180 bp
Total written (filtered):     12,010,229 bp (86.6%)
  Read 1:     7,333,324 bp
  Read 2:     4,676,905 bp

=== First read: Adapter 1 ===

Sequence: ATTAGAWACCCBDGTAGTCC; Type: regular 3'; Length: 20; Trimmed: 12 times

Minimum overlap: 8
No. of allowed errors:
1-5 bp: 0; 6-12 bp: 1; 13-19 bp: 2; 20 bp: 3

Bases preceding removed adapters:
  A: 0.0%
  C: 33.3%
  G: 58.3%
  T: 0.0%
  none/other: 8.3%

Overview of removed sequences
length	count	expect	max.err	error counts
8	2	0.4	

Finished in 0.72 s (22 µs/read; 2.75 M reads/minute).

=== Summary ===

Total read pairs processed:             32,857
  Read 1 with adapter:                  32,525 (99.0%)
  Read 2 with adapter:                  31,730 (96.6%)

== Read fate breakdown ==
Pairs that were too short:                  11 (0.0%)
Pairs discarded as untrimmed:            1,442 (4.4%)
Pairs written (passing filters):        31,404 (95.6%)

Total basepairs processed:    16,757,070 bp
  Read 1:    10,021,385 bp
  Read 2:     6,735,685 bp
Total written (filtered):     14,618,640 bp (87.2%)
  Read 1:     8,915,974 bp
  Read 2:     5,702,666 bp

=== First read: Adapter 1 ===

Sequence: ATTAGAWACCCBDGTAGTCC; Type: regular 3'; Length: 20; Trimmed: 131 times

Minimum overlap: 8
No. of allowed errors:
1-5 bp: 0; 6-12 bp: 1; 13-19 bp: 2; 20 bp: 3

Bases preceding removed adapters:
  A: 44.3%
  C: 6.9%
  G: 48.1%
  T: 0.8%
  none/other: 0.0%

Overview of removed sequences
length	count	expect	max.err	error counts
21	1	0.

Finished in 0.32 s (65 µs/read; 0.92 M reads/minute).

=== Summary ===

Total read pairs processed:              4,908
  Read 1 with adapter:                   4,853 (98.9%)
  Read 2 with adapter:                   4,578 (93.3%)

== Read fate breakdown ==
Pairs that were too short:                   3 (0.1%)
Pairs discarded as untrimmed:              377 (7.7%)
Pairs written (passing filters):         4,528 (92.3%)

Total basepairs processed:     2,503,080 bp
  Read 1:     1,496,940 bp
  Read 2:     1,006,140 bp
Total written (filtered):      2,106,079 bp (84.1%)
  Read 1:     1,284,585 bp
  Read 2:       821,494 bp

=== First read: Adapter 1 ===

Sequence: ATTAGAWACCCBDGTAGTCC; Type: regular 3'; Length: 20; Trimmed: 31 times

Minimum overlap: 8
No. of allowed errors:
1-5 bp: 0; 6-12 bp: 1; 13-19 bp: 2; 20 bp: 3

Bases preceding removed adapters:
  A: 3.2%
  C: 25.8%
  G: 58.1%
  T: 12.9%
  none/other: 0.0%

Overview of removed sequences
length	count	expect	max.err	error counts
9	1	0.0