# Prepare data for Pipeline

## Setup and settings

In [1]:
# Importing packages
import os
import yaml
import logging
import pandas as pd
from Bio.Seq import Seq
from qiime2 import Artifact, Visualization
from qiime2.plugins import demux
from qiime2.plugins.cutadapt.methods import trim_paired

### Receiving the parameters

The following cell can receive parameters using the [papermill](https://papermill.readthedocs.io/en/latest/) tool.

In [2]:
params_path = os.path.join('..', 'params', 'ana-flavia-superlactacao.yaml')
experiment_name = 'jenneffer-vs-01'
base_dir = os.path.join('/', 'home', 'lauro', 'nupeb', 'redemicro')
manifest_file = os.path.join(base_dir, 'data', 'raw', 'manifest', 'not-hist-vs-manifest.csv')
img_folder = os.path.abspath(os.path.join(base_dir, 'imgs'))
replace_files = False
trim = None

In [3]:
# Parameters
experiment_name = "andressa"
base_dir = "/home/lauro/nupeb/rede-micro/redemicro-andressa-lbtm"
manifest_file = "/home/lauro/nupeb/rede-micro/redemicro-andressa-lbtm/data/manifest.csv"
metadata_file = "/home/lauro/nupeb/rede-micro/redemicro-andressa-lbtm/data/metadata.tsv"
class_col = "group-id"
classifier_file = "/home/lauro/nupeb/dados_brutos_rede_genoma/16S_classifiers_qiime2/silva-138-99-nb-classifier.qza"
replace_files = False
phred = 20
trunc_f = 0
trunc_r = 0
overlap = 12
threads = 6
trim = {
    "overlap": 8,
    "forward_primer": "CCTACGGGRSGCAGCAG",
    "reverse_primer": "GGACTACHVGGGTWTCTAAT",
}


In [4]:
# Setting hardcoded parameters for development and debbuging
production = True
if not production:
    params_path = os.path.join('..', 'params', 'ana-flavia-hipotese-01')
    with open(params_path, 'r') as stream:
        params = yaml.safe_load(stream)
        experiment_name = params['experiment_name']
        base_dir = params['base_dir']
        manifest_file = params['manifest_file']
        replace_files = params['replace_files']

### Defining names and paths

In [5]:
# new_manifest = '/home/lauro/nupeb/redemicro/data/raw/manifest/karina-manifest.csv'
# with open(manifest_file, 'r') as oldm, open(new_manifest, 'w') as newm:
#     header = 'sample-id,absolute-filepath,direction\n'
#     newm.write(header)
#     for line in oldm.readlines()[1:]:
#         sid, forward, reverse = line[:-1].split(',')
#         fline = ','.join((sid, forward, 'forward')) + '\n'
#         rline = ','.join((sid, reverse, 'reverse')) + '\n'
#         newm.write(fline)
#         newm.write(rline)

In [6]:
# Define the output folder path
out_dir = os.path.join(base_dir, 'experiments', experiment_name, 'qiime-artifacts')
img_folder = os.path.abspath(os.path.join(base_dir, 'experiments', experiment_name, 'imgs'))

# Create path if it not exist
if not os.path.isdir(out_dir):
    os.makedirs(out_dir)
    print(f'New qiime-artifacts folder path created: {out_dir}')

# Create path if it not exist
if not os.path.isdir(img_folder):
    os.makedirs(img_folder)
    print(f'New img folder path created: {img_folder}')

# Define the output artifact full path
demux_file = os.path.join(out_dir, 'demux-paired.qza')
demux_view = os.path.join(out_dir, 'demux-paired.qzv')
demux_file_trim = os.path.join(out_dir, 'demux-paired-trim.qza')
demux_view_trim = os.path.join(out_dir, 'demux-paired-trim.qzv')

## Step execution

This Step import all `fastq` files in a **QIIME2 Artifact** object and save it to a new `qza` file of `SampleData[PairedEndSequencesWithQuality]`

In [7]:
manifest_df = pd.read_csv(manifest_file)
n_directions = len(manifest_df['direction'].unique())
if n_directions == 1:
    d_type = 'SampleData[SequencesWithQuality]'
    v_type = 'SingleEndFastqManifestPhred33'
elif n_directions == 2:
    d_type = 'SampleData[PairedEndSequencesWithQuality]'
    v_type = 'PairedEndFastqManifestPhred33'
else:
    print(f'ERROR: invalid number of directions {n_directions}')

In [8]:
# If the demux file does not exist or if the old file will be replaced
if not os.path.isfile(demux_file) or replace_files:
    
    # Import data and create an artifact object
    artifact = Artifact.import_data(
        d_type, 
        manifest_file, 
        view_type=v_type)
    
    # Save the artifact object to a new qza file
    artifact.save(demux_file)

else:
    artifact = Artifact.load(demux_file)

In [9]:
if not os.path.isfile(demux_view) or replace_files: 
    # Generate e visualization of the Artifact
    demux_summary = demux.visualizers.summarize(artifact)

    # Save a new visualization file based on the qza file
    Visualization.save(demux_summary.visualization, filepath=demux_view)
    
    demux_view_obj = demux_summary.visualization
else:
    demux_view_obj = Visualization.load(demux_view)

## Step report


In [10]:
print(demux_view_obj)

<visualization: Visualization uuid: 1e4f5d56-f644-4b61-8958-8112d6dc3b95>


In [11]:
# Render Visualization
demux_view_obj

[cutadapt](https://docs.qiime2.org/2022.2/plugins/available/cutadapt/trim-paired/)

We trim the forward primer and the reverse complement of the reverse primer from the forward reads. We trim the reverse primer and reverse complement of the forward primer from the reverse reads.

In [12]:
demux_file_trim = os.path.join(out_dir, 'demux-paired-trim.qza')
demux_view_trim = os.path.join(out_dir, 'demux-paired-trim.qzv')

if trim and (not os.path.isfile(demux_file_trim) or replace_files):
    forward_primer = [trim['forward_primer']] # ['CCTACGGGRSGCAGCAG']
    reverse_primer = [trim['reverse_primer']] # ['GGACTACHVGGGTWTCTAAT']
    forward_reverse_complement = [str(Seq(forward_primer[0]).reverse_complement())]
    reverse_reverse_complement = [str(Seq(reverse_primer[0]).reverse_complement())]

    res = trim_paired(
        demultiplexed_sequences=artifact,
        front_f=forward_primer,
        front_r=reverse_primer,
        adapter_f=reverse_reverse_complement,
        adapter_r=forward_reverse_complement,
        cores=threads,
        overlap=trim['overlap'],
        indels=False,
        match_read_wildcards=True,
        match_adapter_wildcards=True,
        error_rate=0.15,
        discard_untrimmed=True,
    ).trimmed_sequences

    demux.visualizers.summarize(res).visualization
    res.save(demux_file_trim)
    Visualization.save(demux.visualizers.summarize(res).visualization, filepath=demux_view_trim)

Running external command line application. This may print messages to stdout and/or stderr.
The commands to be run are below. These commands cannot be manually re-run as they will depend on temporary files that no longer exist.

Command: cutadapt --cores 6 --error-rate 0.15 --times 1 --overlap 8 --minimum-length 1 -o /tmp/q2-CasavaOneEightSingleLanePerSampleDirFmt-ehp71vq9/S62_0_L001_R1_001.fastq.gz -p /tmp/q2-CasavaOneEightSingleLanePerSampleDirFmt-ehp71vq9/S62_1_L001_R2_001.fastq.gz --adapter ATTAGAWACCCBDGTAGTCC --front CCTACGGGRSGCAGCAG -A CTGCTGCSYCCCGTAGG -G GGACTACHVGGGTWTCTAAT --no-indels --match-read-wildcards --discard-untrimmed /tmp/qiime2-archive-j6h5lrr4/0000f8ed-d420-4fca-bc8c-7f2da1cd7718/data/S62_0_L001_R1_001.fastq.gz /tmp/qiime2-archive-j6h5lrr4/0000f8ed-d420-4fca-bc8c-7f2da1cd7718/data/S62_1_L001_R2_001.fastq.gz



This is cutadapt 3.5 with Python 3.8.12
Command line parameters: --cores 6 --error-rate 0.15 --times 1 --overlap 8 --minimum-length 1 -o /tmp/q2-CasavaOneEightSingleLanePerSampleDirFmt-ehp71vq9/S62_0_L001_R1_001.fastq.gz -p /tmp/q2-CasavaOneEightSingleLanePerSampleDirFmt-ehp71vq9/S62_1_L001_R2_001.fastq.gz --adapter ATTAGAWACCCBDGTAGTCC --front CCTACGGGRSGCAGCAG -A CTGCTGCSYCCCGTAGG -G GGACTACHVGGGTWTCTAAT --no-indels --match-read-wildcards --discard-untrimmed /tmp/qiime2-archive-j6h5lrr4/0000f8ed-d420-4fca-bc8c-7f2da1cd7718/data/S62_0_L001_R1_001.fastq.gz /tmp/qiime2-archive-j6h5lrr4/0000f8ed-d420-4fca-bc8c-7f2da1cd7718/data/S62_1_L001_R2_001.fastq.gz
Processing reads on 6 cores in paired-end mode ...


Finished in 2.14 s (18 µs/read; 3.36 M reads/minute).

=== Summary ===

Total read pairs processed:            119,387
  Read 1 with adapter:                 118,446 (99.2%)
  Read 2 with adapter:                 117,302 (98.3%)

== Read fate breakdown ==
Pairs that were too short:                   0 (0.0%)
Pairs discarded as untrimmed:            3,008 (2.5%)
Pairs written (passing filters):       116,379 (97.5%)

Total basepairs processed:    60,887,370 bp
  Read 1:    36,413,035 bp
  Read 2:    24,474,335 bp
Total written (filtered):     54,191,998 bp (89.0%)
  Read 1:    33,051,306 bp
  Read 2:    21,140,692 bp

=== First read: Adapter 1 ===

Sequence: ATTAGAWACCCBDGTAGTCC; Type: regular 3'; Length: 20; Trimmed: 384 times

Minimum overlap: 8
No. of allowed errors:
1-5 bp: 0; 6-12 bp: 1; 13-19 bp: 2; 20 bp: 3

Bases preceding removed adapters:
  A: 0.0%
  C: 8.6%
  G: 90.4%
  T: 1.0%
  none/other: 0.0%
    The adapter is preceded by 'G' extremely often.
    The provided adapter seq

Processing reads on 6 cores in paired-end mode ...


Finished in 0.66 s (42 µs/read; 1.42 M reads/minute).

=== Summary ===

Total read pairs processed:             15,750
  Read 1 with adapter:                  15,619 (99.2%)
  Read 2 with adapter:                  15,464 (98.2%)

== Read fate breakdown ==
Pairs that were too short:                   0 (0.0%)
Pairs discarded as untrimmed:              415 (2.6%)
Pairs written (passing filters):        15,335 (97.4%)

Total basepairs processed:     8,032,500 bp
  Read 1:     4,803,750 bp
  Read 2:     3,228,750 bp
Total written (filtered):      7,146,540 bp (89.0%)
  Read 1:     4,360,997 bp
  Read 2:     2,785,543 bp

=== First read: Adapter 1 ===

Sequence: ATTAGAWACCCBDGTAGTCC; Type: regular 3'; Length: 20; Trimmed: 26 times

Minimum overlap: 8
No. of allowed errors:
1-5 bp: 0; 6-12 bp: 1; 13-19 bp: 2; 20 bp: 3

Bases preceding removed adapters:
  A: 0.0%
  C: 38.5%
  G: 53.8%
  T: 7.7%
  none/other: 0.0%

Overview of removed sequences
length	count	expect	max.err	error counts
8	4	0.2	

Finished in 4.19 s (15 µs/read; 3.88 M reads/minute).

=== Summary ===

Total read pairs processed:            271,003
  Read 1 with adapter:                 268,541 (99.1%)
  Read 2 with adapter:                 266,287 (98.3%)

== Read fate breakdown ==
Pairs that were too short:                   1 (0.0%)
Pairs discarded as untrimmed:            7,124 (2.6%)
Pairs written (passing filters):       263,878 (97.4%)

Total basepairs processed:   138,211,530 bp
  Read 1:    82,655,915 bp
  Read 2:    55,555,615 bp
Total written (filtered):    123,011,399 bp (89.0%)
  Read 1:    75,078,925 bp
  Read 2:    47,932,474 bp

=== First read: Adapter 1 ===

Sequence: ATTAGAWACCCBDGTAGTCC; Type: regular 3'; Length: 20; Trimmed: 175 times

Minimum overlap: 8
No. of allowed errors:
1-5 bp: 0; 6-12 bp: 1; 13-19 bp: 2; 20 bp: 3

Bases preceding removed adapters:
  A: 3.4%
  C: 29.7%
  G: 65.1%
  T: 1.7%
  none/other: 0.0%

Overview of removed sequences
length	count	expect	max.err	error counts
8	28	4.

Finished in 0.49 s (32 µs/read; 1.88 M reads/minute).

=== Summary ===

Total read pairs processed:             15,466
  Read 1 with adapter:                  15,345 (99.2%)
  Read 2 with adapter:                  15,209 (98.3%)

== Read fate breakdown ==
Pairs that were too short:                   0 (0.0%)
Pairs discarded as untrimmed:              375 (2.4%)
Pairs written (passing filters):        15,091 (97.6%)

Total basepairs processed:     7,887,660 bp
  Read 1:     4,717,130 bp
  Read 2:     3,170,530 bp
Total written (filtered):      7,033,019 bp (89.2%)
  Read 1:     4,291,678 bp
  Read 2:     2,741,341 bp

=== First read: Adapter 1 ===

Sequence: ATTAGAWACCCBDGTAGTCC; Type: regular 3'; Length: 20; Trimmed: 27 times

Minimum overlap: 8
No. of allowed errors:
1-5 bp: 0; 6-12 bp: 1; 13-19 bp: 2; 20 bp: 3

Bases preceding removed adapters:
  A: 0.0%
  C: 40.7%
  G: 59.3%
  T: 0.0%
  none/other: 0.0%

Overview of removed sequences
length	count	expect	max.err	error counts
8	3	0.2	

Finished in 0.42 s (60 µs/read; 0.99 M reads/minute).

=== Summary ===

Total read pairs processed:              6,980
  Read 1 with adapter:                   6,929 (99.3%)
  Read 2 with adapter:                   6,873 (98.5%)

== Read fate breakdown ==
Pairs that were too short:                   1 (0.0%)
Pairs discarded as untrimmed:              155 (2.2%)
Pairs written (passing filters):         6,824 (97.8%)

Total basepairs processed:     3,559,800 bp
  Read 1:     2,128,900 bp
  Read 2:     1,430,900 bp
Total written (filtered):      3,181,180 bp (89.4%)
  Read 1:     1,941,618 bp
  Read 2:     1,239,562 bp

=== First read: Adapter 1 ===

Sequence: ATTAGAWACCCBDGTAGTCC; Type: regular 3'; Length: 20; Trimmed: 4 times

Minimum overlap: 8
No. of allowed errors:
1-5 bp: 0; 6-12 bp: 1; 13-19 bp: 2; 20 bp: 3

Bases preceding removed adapters:
  A: 0.0%
  C: 25.0%
  G: 75.0%
  T: 0.0%
  none/other: 0.0%

Overview of removed sequences
length	count	expect	max.err	error counts
8	1	0.1	1

Finished in 0.76 s (50 µs/read; 1.21 M reads/minute).

=== Summary ===

Total read pairs processed:             15,369
  Read 1 with adapter:                  15,246 (99.2%)
  Read 2 with adapter:                  15,145 (98.5%)

== Read fate breakdown ==
Pairs that were too short:                   0 (0.0%)
Pairs discarded as untrimmed:              346 (2.3%)
Pairs written (passing filters):        15,023 (97.7%)

Total basepairs processed:     7,838,190 bp
  Read 1:     4,687,545 bp
  Read 2:     3,150,645 bp
Total written (filtered):      7,003,039 bp (89.3%)
  Read 1:     4,274,098 bp
  Read 2:     2,728,941 bp

=== First read: Adapter 1 ===

Sequence: ATTAGAWACCCBDGTAGTCC; Type: regular 3'; Length: 20; Trimmed: 12 times

Minimum overlap: 8
No. of allowed errors:
1-5 bp: 0; 6-12 bp: 1; 13-19 bp: 2; 20 bp: 3

Bases preceding removed adapters:
  A: 0.0%
  C: 16.7%
  G: 83.3%
  T: 0.0%
  none/other: 0.0%

Overview of removed sequences
length	count	expect	max.err	error counts
8	1	0.2	

Finished in 0.48 s (45 µs/read; 1.34 M reads/minute).

=== Summary ===

Total read pairs processed:             10,791
  Read 1 with adapter:                  10,716 (99.3%)
  Read 2 with adapter:                  10,597 (98.2%)

== Read fate breakdown ==
Pairs that were too short:                   0 (0.0%)
Pairs discarded as untrimmed:              266 (2.5%)
Pairs written (passing filters):        10,525 (97.5%)

Total basepairs processed:     5,503,410 bp
  Read 1:     3,291,255 bp
  Read 2:     2,212,155 bp
Total written (filtered):      4,906,724 bp (89.2%)
  Read 1:     2,995,042 bp
  Read 2:     1,911,682 bp

=== First read: Adapter 1 ===

Sequence: ATTAGAWACCCBDGTAGTCC; Type: regular 3'; Length: 20; Trimmed: 6 times

Minimum overlap: 8
No. of allowed errors:
1-5 bp: 0; 6-12 bp: 1; 13-19 bp: 2; 20 bp: 3

Bases preceding removed adapters:
  A: 0.0%
  C: 16.7%
  G: 83.3%
  T: 0.0%
  none/other: 0.0%

Overview of removed sequences
length	count	expect	max.err	error counts
9	1	0.0	1

Finished in 0.58 s (55 µs/read; 1.09 M reads/minute).

=== Summary ===

Total read pairs processed:             10,510
  Read 1 with adapter:                  10,402 (99.0%)
  Read 2 with adapter:                  10,369 (98.7%)

== Read fate breakdown ==
Pairs that were too short:                   0 (0.0%)
Pairs discarded as untrimmed:              249 (2.4%)
Pairs written (passing filters):        10,261 (97.6%)

Total basepairs processed:     5,360,100 bp
  Read 1:     3,205,550 bp
  Read 2:     2,154,550 bp
Total written (filtered):      4,783,140 bp (89.2%)
  Read 1:     2,919,403 bp
  Read 2:     1,863,737 bp

=== First read: Adapter 1 ===

Sequence: ATTAGAWACCCBDGTAGTCC; Type: regular 3'; Length: 20; Trimmed: 10 times

Minimum overlap: 8
No. of allowed errors:
1-5 bp: 0; 6-12 bp: 1; 13-19 bp: 2; 20 bp: 3

Bases preceding removed adapters:
  A: 0.0%
  C: 0.0%
  G: 100.0%
  T: 0.0%
  none/other: 0.0%

Overview of removed sequences
length	count	expect	max.err	error counts
20	1	0.0

Finished in 0.68 s (24 µs/read; 2.55 M reads/minute).

=== Summary ===

Total read pairs processed:             29,039
  Read 1 with adapter:                  28,803 (99.2%)
  Read 2 with adapter:                  28,562 (98.4%)

== Read fate breakdown ==
Pairs that were too short:                   0 (0.0%)
Pairs discarded as untrimmed:              703 (2.4%)
Pairs written (passing filters):        28,336 (97.6%)

Total basepairs processed:    14,809,890 bp
  Read 1:     8,856,895 bp
  Read 2:     5,952,995 bp
Total written (filtered):     13,206,456 bp (89.2%)
  Read 1:     8,059,567 bp
  Read 2:     5,146,889 bp

=== First read: Adapter 1 ===

Sequence: ATTAGAWACCCBDGTAGTCC; Type: regular 3'; Length: 20; Trimmed: 51 times

Minimum overlap: 8
No. of allowed errors:
1-5 bp: 0; 6-12 bp: 1; 13-19 bp: 2; 20 bp: 3

Bases preceding removed adapters:
  A: 3.9%
  C: 5.9%
  G: 90.2%
  T: 0.0%
  none/other: 0.0%
    The adapter is preceded by 'G' extremely often.
    The provided adapter sequ

Finished in 0.73 s (25 µs/read; 2.37 M reads/minute).

=== Summary ===

Total read pairs processed:             28,939
  Read 1 with adapter:                  28,726 (99.3%)
  Read 2 with adapter:                  28,445 (98.3%)

== Read fate breakdown ==
Pairs that were too short:                   0 (0.0%)
Pairs discarded as untrimmed:              703 (2.4%)
Pairs written (passing filters):        28,236 (97.6%)

Total basepairs processed:    14,758,890 bp
  Read 1:     8,826,395 bp
  Read 2:     5,932,495 bp
Total written (filtered):     13,157,876 bp (89.2%)
  Read 1:     8,028,750 bp
  Read 2:     5,129,126 bp

=== First read: Adapter 1 ===

Sequence: ATTAGAWACCCBDGTAGTCC; Type: regular 3'; Length: 20; Trimmed: 62 times

Minimum overlap: 8
No. of allowed errors:
1-5 bp: 0; 6-12 bp: 1; 13-19 bp: 2; 20 bp: 3

Bases preceding removed adapters:
  A: 0.0%
  C: 21.0%
  G: 71.0%
  T: 8.1%
  none/other: 0.0%

Overview of removed sequences
length	count	expect	max.err	error counts
8	5	0.4	

Finished in 1.33 s (19 µs/read; 3.23 M reads/minute).

=== Summary ===

Total read pairs processed:             71,899
  Read 1 with adapter:                  71,267 (99.1%)
  Read 2 with adapter:                  70,823 (98.5%)

== Read fate breakdown ==
Pairs that were too short:                   0 (0.0%)
Pairs discarded as untrimmed:            1,695 (2.4%)
Pairs written (passing filters):        70,204 (97.6%)

Total basepairs processed:    36,668,490 bp
  Read 1:    21,929,195 bp
  Read 2:    14,739,295 bp
Total written (filtered):     32,730,931 bp (89.3%)
  Read 1:    19,977,855 bp
  Read 2:    12,753,076 bp

=== First read: Adapter 1 ===

Sequence: ATTAGAWACCCBDGTAGTCC; Type: regular 3'; Length: 20; Trimmed: 15 times

Minimum overlap: 8
No. of allowed errors:
1-5 bp: 0; 6-12 bp: 1; 13-19 bp: 2; 20 bp: 3

Bases preceding removed adapters:
  A: 0.0%
  C: 33.3%
  G: 66.7%
  T: 0.0%
  none/other: 0.0%

Overview of removed sequences
length	count	expect	max.err	error counts
8	2	1.1	

Finished in 0.71 s (20 µs/read; 3.02 M reads/minute).

=== Summary ===

Total read pairs processed:             35,572
  Read 1 with adapter:                  35,245 (99.1%)
  Read 2 with adapter:                  35,051 (98.5%)

== Read fate breakdown ==
Pairs that were too short:                   0 (0.0%)
Pairs discarded as untrimmed:              843 (2.4%)
Pairs written (passing filters):        34,729 (97.6%)

Total basepairs processed:    18,141,720 bp
  Read 1:    10,849,460 bp
  Read 2:     7,292,260 bp
Total written (filtered):     16,190,591 bp (89.2%)
  Read 1:     9,881,513 bp
  Read 2:     6,309,078 bp

=== First read: Adapter 1 ===

Sequence: ATTAGAWACCCBDGTAGTCC; Type: regular 3'; Length: 20; Trimmed: 23 times

Minimum overlap: 8
No. of allowed errors:
1-5 bp: 0; 6-12 bp: 1; 13-19 bp: 2; 20 bp: 3

Bases preceding removed adapters:
  A: 0.0%
  C: 21.7%
  G: 78.3%
  T: 0.0%
  none/other: 0.0%

Overview of removed sequences
length	count	expect	max.err	error counts
8	2	0.5	

Finished in 0.90 s (20 µs/read; 2.97 M reads/minute).

=== Summary ===

Total read pairs processed:             44,482
  Read 1 with adapter:                  44,118 (99.2%)
  Read 2 with adapter:                  43,780 (98.4%)

== Read fate breakdown ==
Pairs that were too short:                   0 (0.0%)
Pairs discarded as untrimmed:            1,059 (2.4%)
Pairs written (passing filters):        43,423 (97.6%)

Total basepairs processed:    22,685,820 bp
  Read 1:    13,567,010 bp
  Read 2:     9,118,810 bp
Total written (filtered):     20,243,998 bp (89.2%)
  Read 1:    12,356,229 bp
  Read 2:     7,887,769 bp

=== First read: Adapter 1 ===

Sequence: ATTAGAWACCCBDGTAGTCC; Type: regular 3'; Length: 20; Trimmed: 14 times

Minimum overlap: 8
No. of allowed errors:
1-5 bp: 0; 6-12 bp: 1; 13-19 bp: 2; 20 bp: 3

Bases preceding removed adapters:
  A: 0.0%
  C: 35.7%
  G: 64.3%
  T: 0.0%
  none/other: 0.0%

Overview of removed sequences
length	count	expect	max.err	error counts
8	1	0.7	

Finished in 1.17 s (20 µs/read; 3.00 M reads/minute).

=== Summary ===

Total read pairs processed:             58,322
  Read 1 with adapter:                  57,822 (99.1%)
  Read 2 with adapter:                  57,434 (98.5%)

== Read fate breakdown ==
Pairs that were too short:                   0 (0.0%)
Pairs discarded as untrimmed:            1,379 (2.4%)
Pairs written (passing filters):        56,943 (97.6%)

Total basepairs processed:    29,744,220 bp
  Read 1:    17,788,210 bp
  Read 2:    11,956,010 bp
Total written (filtered):     26,546,948 bp (89.3%)
  Read 1:    16,202,424 bp
  Read 2:    10,344,524 bp

=== First read: Adapter 1 ===

Sequence: ATTAGAWACCCBDGTAGTCC; Type: regular 3'; Length: 20; Trimmed: 31 times

Minimum overlap: 8
No. of allowed errors:
1-5 bp: 0; 6-12 bp: 1; 13-19 bp: 2; 20 bp: 3

Bases preceding removed adapters:
  A: 3.2%
  C: 19.4%
  G: 77.4%
  T: 0.0%
  none/other: 0.0%

Overview of removed sequences
length	count	expect	max.err	error counts
8	3	0.9	

Finished in 0.91 s (20 µs/read; 2.98 M reads/minute).

=== Summary ===

Total read pairs processed:             45,210
  Read 1 with adapter:                  44,840 (99.2%)
  Read 2 with adapter:                  44,467 (98.4%)

== Read fate breakdown ==
Pairs that were too short:                   0 (0.0%)
Pairs discarded as untrimmed:            1,109 (2.5%)
Pairs written (passing filters):        44,101 (97.5%)

Total basepairs processed:    23,057,100 bp
  Read 1:    13,789,050 bp
  Read 2:     9,268,050 bp
Total written (filtered):     20,558,287 bp (89.2%)
  Read 1:    12,547,479 bp
  Read 2:     8,010,808 bp

=== First read: Adapter 1 ===

Sequence: ATTAGAWACCCBDGTAGTCC; Type: regular 3'; Length: 20; Trimmed: 25 times

Minimum overlap: 8
No. of allowed errors:
1-5 bp: 0; 6-12 bp: 1; 13-19 bp: 2; 20 bp: 3

Bases preceding removed adapters:
  A: 4.0%
  C: 12.0%
  G: 84.0%
  T: 0.0%
  none/other: 0.0%
    The adapter is preceded by 'G' extremely often.
    The provided adapter seq