<span style="color:red; font-family:Helvetica Neue, Helvetica, Arial, sans-serif; font-size:2em;">An Exception was encountered at '<a href="#papermill-error-cell">In [7]</a>'.</span>

# Prepare data for Pipeline

## Setup and settings

In [1]:
# Importing packages
import os
import yaml
import logging
import pandas as pd
from Bio.Seq import Seq
from qiime2 import Artifact, Visualization, Metadata
from qiime2.plugins import demux
from qiime2.plugins.cutadapt.methods import trim_paired, trim_single
from qiime2.plugins.demux.methods import filter_samples

### Receiving the parameters

The following cell can receive parameters using the [papermill](https://papermill.readthedocs.io/en/latest/) tool.

In [2]:
params_path = os.path.join('..', 'params', 'parameter-set-01.yaml')
experiment_name = 'exp-01'
base_dir = os.path.join('/', 'home', 'username', 'pipeline-dir')
manifest_file = os.path.join(base_dir, 'data', 'raw', 'manifest', 'manifest.csv')
replace_files = False
trim = None
metadata_file = None
threads = 1

In [3]:
# Parameters
experiment_name = "ana-flavia-all"
base_dir = "/home/lauro/nupeb/rede-micro/redemicro-ana-flavia-nutri"
manifest_file = "/home/lauro/nupeb/rede-micro/redemicro-ana-flavia-nutri/data/raw/manifest-all-ana.csv"
metadata_file = "/home/lauro/nupeb/rede-micro/redemicro-ana-flavia-nutri/data/raw/metadata-all-ana-types.tsv"
class_col = "group-id"
classifier_file = "/home/lauro/nupeb/rede-micro/models/silva-138-99-nb-classifier.qza"
top_n = 20
replace_files = False
phred = 20
trunc_f = 0
trunc_r = 0
overlap = 12
threads = 6
trim = {
    "overlap": 8,
    "forward_primer": "CCTACGGGRSGCAGCAG",
    "reverse_primer": "GGACTACHVGGGTWTCTAAT",
}


### Defining names and paths

In [4]:
# Define the output folder path
out_dir = os.path.join(base_dir, 'experiments', experiment_name, 'qiime-artifacts')
img_folder = os.path.abspath(os.path.join(base_dir, 'experiments', experiment_name, 'imgs'))

# Create path if it not exists
if not os.path.isdir(out_dir):
    os.makedirs(out_dir)
    print(f'New qiime-artifacts folder path created: {out_dir}')

# Create path if it not exists
if not os.path.isdir(img_folder):
    os.makedirs(img_folder)
    print(f'New img folder path created: {img_folder}')

# Define the output artifact full path
demux_file = os.path.join(out_dir, 'demux-paired.qza')
demux_view = os.path.join(out_dir, 'demux-paired.qzv')
demux_file_trim = os.path.join(out_dir, 'demux-paired-trim.qza')
demux_view_trim = os.path.join(out_dir, 'demux-paired-trim.qzv')

## Step execution

This Step import all `fastq` files in a **QIIME2 Artifact** object and save it to a new `qza` file of `SampleData[PairedEndSequencesWithQuality]`

In [5]:
manifest_df = pd.read_csv(manifest_file)
n_directions = len(manifest_df['direction'].unique())
if n_directions == 1:
    d_type = 'SampleData[SequencesWithQuality]'
    v_type = 'SingleEndFastqManifestPhred33'
elif n_directions == 2:
    d_type = 'SampleData[PairedEndSequencesWithQuality]'
    v_type = 'PairedEndFastqManifestPhred33'
else:
    print(f'ERROR: invalid number of directions {n_directions}')

In [6]:
# If the metadata file is not defined, use the default metadata file
metadata_qa = Metadata.load(metadata_file)
metadata_qa.to_dataframe()

Unnamed: 0_level_0,sample-desc,group-id,group-desc
sample-id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
S210421121673,N9.01,NC,Ninhada-controle
S210421121674,N11.01,NC,Ninhada-controle
S210421121675,N9.05,NC,Ninhada-controle
S210421121676,N11.02,NC,Ninhada-controle
S210421121677,N9.03,NR,Ninhada-reduzida
S210421121678,N10.03,NR,Ninhada-reduzida
S210421121679,N10.04,NR,Ninhada-reduzida
S210421121680,N10.07,NR,Ninhada-reduzida
S210421121681,N10.08,NR,Ninhada-reduzida
S210421121682,N7.01,STD-NC,Grupo-dieta-padrao


In [7]:
print(manifest_file)
!head {manifest_file}

/home/lauro/nupeb/rede-micro/redemicro-ana-flavia-nutri/data/raw/manifest-all-ana.csv
sample-id,absolute-filepath,direction
S210421121673,/home/lauro/nupeb/dados_brutos_rede_genoma/data/16s_karina/workspace/decompressed_reads/210421121673-1-1-1_S81_L001_R1_001.fastq,forward
S210421121673,/home/lauro/nupeb/dados_brutos_rede_genoma/data/16s_karina/workspace/decompressed_reads/210421121673-1-1-1_S81_L001_R2_001.fastq,reverse
S210421121674,/home/lauro/nupeb/dados_brutos_rede_genoma/data/16s_karina/workspace/decompressed_reads/210421121674-1-1-1_S83_L001_R1_001.fastq,forward
S210421121674,/home/lauro/nupeb/dados_brutos_rede_genoma/data/16s_karina/workspace/decompressed_reads/210421121674-1-1-1_S83_L001_R2_001.fastq,reverse
S210421121675,/home/lauro/nupeb/dados_brutos_rede_genoma/data/16s_karina/workspace/decompressed_reads/210421121675-1-1-1_S85_L001_R1_001.fastq,forward
S210421121675,/home/lauro/nupeb/dados_brutos_rede_genoma/data/16s_karina/workspace/decompressed_reads/210421121675-

In [8]:
# Import data and create an artifact object
artifact = Artifact.import_data(
    d_type, 
    manifest_file, 
    view_type=v_type)

#     artifact = filter_samples(demux=artifact, metadata=metadata_qa).filtered_demux
# Save the artifact object to a new qza file
artifact.save(demux_file)

'/home/lauro/nupeb/rede-micro/redemicro-ana-flavia-nutri/experiments/ana-flavia-all/qiime-artifacts/demux-paired.qza'

In [None]:
# If the demux file does not exist or if the old file will be replaced
if not os.path.isfile(demux_file) or replace_files:
    
    # Import data and create an artifact object
    artifact = Artifact.import_data(
        d_type, 
        manifest_file, 
        view_type=v_type)

#     artifact = filter_samples(demux=artifact, metadata=metadata_qa).filtered_demux
    # Save the artifact object to a new qza file
    artifact.save(demux_file)

else:
    artifact = Artifact.load(demux_file)

In [9]:
if not os.path.isfile(demux_view) or replace_files: 
    # Generate e visualization of the Artifact
    demux_summary = demux.visualizers.summarize(artifact)

    # Save a new visualization file based on the qza file
    Visualization.save(demux_summary.visualization, filepath=demux_view)
    
    demux_view_obj = demux_summary.visualization
else:
    demux_view_obj = Visualization.load(demux_view)

In [10]:
demux_file

'/home/lauro/nupeb/rede-micro/redemicro-ana-flavia-nutri/experiments/ana-flavia-all/qiime-artifacts/demux-paired.qza'

## Step report


In [None]:
print(demux_view_obj)

In [None]:
# Render Visualization
demux_view_obj

[cutadapt](https://docs.qiime2.org/2022.2/plugins/available/cutadapt/trim-paired/)

We trim the forward primer and the reverse complement of the reverse primer from the forward reads. We trim the reverse primer and reverse complement of the forward primer from the reverse reads.

In [13]:
demux_file_trim = os.path.join(out_dir, 'demux-paired-trim.qza')
demux_view_trim = os.path.join(out_dir, 'demux-paired-trim.qzv')

if trim and (not os.path.isfile(demux_file_trim) or replace_files):
    forward_primer = [trim['forward_primer']] # ['CCTACGGGRSGCAGCAG']
    reverse_primer = [trim['reverse_primer']] # ['GGACTACHVGGGTWTCTAAT']
    forward_reverse_complement = [str(Seq(forward_primer[0]).reverse_complement())]
    reverse_reverse_complement = [str(Seq(reverse_primer[0]).reverse_complement())]

    res = None
    
    if n_directions == 1:
        res = trim_single(
            demultiplexed_sequences=artifact,
            front=forward_primer,
            adapter=reverse_reverse_complement,
            cores=threads,
            indels=False,
            match_read_wildcards=True,
            match_adapter_wildcards=True,
            error_rate=0.01,
            discard_untrimmed=True,
        ).trimmed_sequences
    elif n_directions == 2:
        res = trim_paired(
            demultiplexed_sequences=artifact,
            front_f=forward_primer,
            front_r=reverse_primer,
            # adapter_f=reverse_reverse_complement,
            # adapter_r=forward_reverse_complement,
            cores=threads,
            overlap=trim['overlap'],
            indels=False,
            match_read_wildcards=True,
            match_adapter_wildcards=True,
            error_rate=0.01,
            discard_untrimmed=True,
        ).trimmed_sequences
    else:
        print(f'ERROR: invalid number of directions {n_directions}')

    if res is not None:
        res.save(demux_file_trim)
        res_view = demux.visualizers.summarize(res).visualization
        Visualization.save(res_view, filepath=demux_view_trim)

Running external command line application. This may print messages to stdout and/or stderr.
The commands to be run are below. These commands cannot be manually re-run as they will depend on temporary files that no longer exist.

Command: cutadapt --cores 6 --error-rate 0.01 --times 1 --overlap 8 --minimum-length 1 -o /tmp/q2-CasavaOneEightSingleLanePerSampleDirFmt-oyazuqe_/S210421121673_0_L001_R1_001.fastq.gz -p /tmp/q2-CasavaOneEightSingleLanePerSampleDirFmt-oyazuqe_/S210421121673_1_L001_R2_001.fastq.gz --front CCTACGGGRSGCAGCAG -G GGACTACHVGGGTWTCTAAT --no-indels --match-read-wildcards --discard-untrimmed /tmp/qiime2-archive-7lm7gvl9/068f0c34-15b0-437a-a67e-bb0223aa7625/data/S210421121673_0_L001_R1_001.fastq.gz /tmp/qiime2-archive-7lm7gvl9/068f0c34-15b0-437a-a67e-bb0223aa7625/data/S210421121673_1_L001_R2_001.fastq.gz

This is cutadapt 3.5 with Python 3.8.12
Command line parameters: --cores 6 --error-rate 0.01 --times 1 --overlap 8 --minimum-length 1 -o /tmp/q2-CasavaOneEightSingleLan

Finished in 6.49 s (9 µs/read; 6.59 M reads/minute).

=== Summary ===

Total read pairs processed:            712,750
  Read 1 with adapter:                 694,205 (97.4%)
  Read 2 with adapter:                 664,033 (93.2%)

== Read fate breakdown ==
Pairs that were too short:                   0 (0.0%)
Pairs discarded as untrimmed:           65,715 (9.2%)
Pairs written (passing filters):       647,035 (90.8%)

Total basepairs processed:   363,502,500 bp
  Read 1:   217,388,750 bp
  Read 2:   146,113,750 bp
Total written (filtered):    301,650,677 bp (83.0%)
  Read 1:   184,115,305 bp
  Read 2:   117,535,372 bp

=== First read: Adapter 1 ===

Sequence: CCTACGGGRSGCAGCAG; Type: regular 5'; Length: 17; Trimmed: 694205 times

Minimum overlap: 8
No. of allowed errors:
1-17 bp: 0

Overview of removed sequences
length	count	expect	max.err	error counts
8	4	10.9	0	4
9	9	2.7	0	9
10	5	0.7	0	5
11	14	0.2	0	14
12	8	0.0	0	8
13	6	0.0	0	6
14	30	0.0	0	30
15	18	0.0	0	18
16	33	0.0	0	33
17	183	0.0	0	1

Finished in 2.95 s (9 µs/read; 6.44 M reads/minute).

=== Summary ===

Total read pairs processed:            316,849
  Read 1 with adapter:                 308,351 (97.3%)
  Read 2 with adapter:                 292,785 (92.4%)

== Read fate breakdown ==
Pairs that were too short:                   0 (0.0%)
Pairs discarded as untrimmed:           31,765 (10.0%)
Pairs written (passing filters):       285,084 (90.0%)

Total basepairs processed:   161,592,990 bp
  Read 1:    96,638,945 bp
  Read 2:    64,954,045 bp
Total written (filtered):    132,905,965 bp (82.2%)
  Read 1:    81,120,487 bp
  Read 2:    51,785,478 bp

=== First read: Adapter 1 ===

Sequence: CCTACGGGRSGCAGCAG; Type: regular 5'; Length: 17; Trimmed: 308351 times

Minimum overlap: 8
No. of allowed errors:
1-17 bp: 0

Overview of removed sequences
length	count	expect	max.err	error counts
8	4	4.8	0	4
9	1	1.2	0	1
10	1	0.3	0	1
11	9	0.1	0	9
12	4	0.0	0	4
13	5	0.0	0	5
14	17	0.0	0	17
15	20	0.0	0	20
16	21	0.0	0	21
17	83	0.0	0	83
1

Finished in 1.71 s (10 µs/read; 5.82 M reads/minute).

=== Summary ===

Total read pairs processed:            165,821
  Read 1 with adapter:                 161,411 (97.3%)
  Read 2 with adapter:                 154,014 (92.9%)

== Read fate breakdown ==
Pairs that were too short:                   0 (0.0%)
Pairs discarded as untrimmed:           15,834 (9.5%)
Pairs written (passing filters):       149,987 (90.5%)

Total basepairs processed:    84,568,710 bp
  Read 1:    50,575,405 bp
  Read 2:    33,993,305 bp
Total written (filtered):     69,924,570 bp (82.7%)
  Read 1:    42,679,301 bp
  Read 2:    27,245,269 bp

=== First read: Adapter 1 ===

Sequence: CCTACGGGRSGCAGCAG; Type: regular 5'; Length: 17; Trimmed: 161411 times

Minimum overlap: 8
No. of allowed errors:
1-17 bp: 0

Overview of removed sequences
length	count	expect	max.err	error counts
8	3	2.5	0	3
9	1	0.6	0	1
10	3	0.2	0	3
11	4	0.0	0	4
12	1	0.0	0	1
13	3	0.0	0	3
14	8	0.0	0	8
15	1	0.0	0	1
16	4	0.0	0	4
17	46	0.0	0	46
18	2512

Finished in 2.48 s (10 µs/read; 6.17 M reads/minute).

=== Summary ===

Total read pairs processed:            254,393
  Read 1 with adapter:                 247,382 (97.2%)
  Read 2 with adapter:                 234,645 (92.2%)

== Read fate breakdown ==
Pairs that were too short:                   0 (0.0%)
Pairs discarded as untrimmed:           26,087 (10.3%)
Pairs written (passing filters):       228,306 (89.7%)

Total basepairs processed:   129,740,430 bp
  Read 1:    77,589,865 bp
  Read 2:    52,150,565 bp
Total written (filtered):    106,436,280 bp (82.0%)
  Read 1:    64,964,669 bp
  Read 2:    41,471,611 bp

=== First read: Adapter 1 ===

Sequence: CCTACGGGRSGCAGCAG; Type: regular 5'; Length: 17; Trimmed: 247382 times

Minimum overlap: 8
No. of allowed errors:
1-17 bp: 0

Overview of removed sequences
length	count	expect	max.err	error counts
8	3	3.9	0	3
9	3	1.0	0	3
10	2	0.2	0	2
11	1	0.1	0	1
12	6	0.0	0	6
13	5	0.0	0	5
14	13	0.0	0	13
15	8	0.0	0	8
16	5	0.0	0	5
17	82	0.0	0	82
18	3

Finished in 1.51 s (11 µs/read; 5.44 M reads/minute).

=== Summary ===

Total read pairs processed:            136,741
  Read 1 with adapter:                 132,822 (97.1%)
  Read 2 with adapter:                 126,455 (92.5%)

== Read fate breakdown ==
Pairs that were too short:                   0 (0.0%)
Pairs discarded as untrimmed:           13,839 (10.1%)
Pairs written (passing filters):       122,902 (89.9%)

Total basepairs processed:    69,737,910 bp
  Read 1:    41,706,005 bp
  Read 2:    28,031,905 bp
Total written (filtered):     57,294,347 bp (82.2%)
  Read 1:    34,969,730 bp
  Read 2:    22,324,617 bp

=== First read: Adapter 1 ===

Sequence: CCTACGGGRSGCAGCAG; Type: regular 5'; Length: 17; Trimmed: 132822 times

Minimum overlap: 8
No. of allowed errors:
1-17 bp: 0

Overview of removed sequences
length	count	expect	max.err	error counts
8	1	2.1	0	1
9	2	0.5	0	2
12	4	0.0	0	4
13	1	0.0	0	1
14	8	0.0	0	8
15	5	0.0	0	5
16	11	0.0	0	11
17	32	0.0	0	32
18	20270	0.0	0	20270
19	23588	

Finished in 2.35 s (10 µs/read; 5.72 M reads/minute).

=== Summary ===

Total read pairs processed:            224,175
  Read 1 with adapter:                 217,928 (97.2%)
  Read 2 with adapter:                 207,950 (92.8%)

== Read fate breakdown ==
Pairs that were too short:                   0 (0.0%)
Pairs discarded as untrimmed:           21,909 (9.8%)
Pairs written (passing filters):       202,266 (90.2%)

Total basepairs processed:   114,329,250 bp
  Read 1:    68,373,375 bp
  Read 2:    45,955,875 bp
Total written (filtered):     94,295,455 bp (82.5%)
  Read 1:    57,554,676 bp
  Read 2:    36,740,779 bp

=== First read: Adapter 1 ===

Sequence: CCTACGGGRSGCAGCAG; Type: regular 5'; Length: 17; Trimmed: 217928 times

Minimum overlap: 8
No. of allowed errors:
1-17 bp: 0

Overview of removed sequences
length	count	expect	max.err	error counts
8	1	3.4	0	1
9	4	0.9	0	4
10	1	0.2	0	1
11	5	0.1	0	5
13	2	0.0	0	2
14	7	0.0	0	7
15	5	0.0	0	5
16	7	0.0	0	7
17	51	0.0	0	51
18	33939	0.0	0	33939

Finished in 0.73 s (12 µs/read; 4.84 M reads/minute).

=== Summary ===

Total read pairs processed:             58,992
  Read 1 with adapter:                  57,341 (97.2%)
  Read 2 with adapter:                  54,208 (91.9%)

== Read fate breakdown ==
Pairs that were too short:                   0 (0.0%)
Pairs discarded as untrimmed:            6,281 (10.6%)
Pairs written (passing filters):        52,711 (89.4%)

Total basepairs processed:    30,085,920 bp
  Read 1:    17,992,560 bp
  Read 2:    12,093,360 bp
Total written (filtered):     24,574,036 bp (81.7%)
  Read 1:    14,999,414 bp
  Read 2:     9,574,622 bp

=== First read: Adapter 1 ===

Sequence: CCTACGGGRSGCAGCAG; Type: regular 5'; Length: 17; Trimmed: 57341 times

Minimum overlap: 8
No. of allowed errors:
1-17 bp: 0

Overview of removed sequences
length	count	expect	max.err	error counts
9	1	0.2	0	1
11	1	0.0	0	1
12	1	0.0	0	1
14	1	0.0	0	1
15	6	0.0	0	6
16	3	0.0	0	3
17	10	0.0	0	10
18	8921	0.0	0	8921
19	10341	0.0	0	10341
20	10

Finished in 0.63 s (13 µs/read; 4.72 M reads/minute).

=== Summary ===

Total read pairs processed:             49,335
  Read 1 with adapter:                  47,930 (97.2%)
  Read 2 with adapter:                  45,553 (92.3%)

== Read fate breakdown ==
Pairs that were too short:                   0 (0.0%)
Pairs discarded as untrimmed:            5,055 (10.2%)
Pairs written (passing filters):        44,280 (89.8%)

Total basepairs processed:    25,160,850 bp
  Read 1:    15,047,175 bp
  Read 2:    10,113,675 bp
Total written (filtered):     20,642,581 bp (82.0%)
  Read 1:    12,599,485 bp
  Read 2:     8,043,096 bp

=== First read: Adapter 1 ===

Sequence: CCTACGGGRSGCAGCAG; Type: regular 5'; Length: 17; Trimmed: 47930 times

Minimum overlap: 8
No. of allowed errors:
1-17 bp: 0

Overview of removed sequences
length	count	expect	max.err	error counts
9	1	0.2	0	1
11	2	0.0	0	2
12	1	0.0	0	1
14	1	0.0	0	1
15	2	0.0	0	2
16	3	0.0	0	3
17	16	0.0	0	16
18	7269	0.0	0	7269
19	8668	0.0	0	8668
20	8944

Finished in 0.38 s (19 µs/read; 3.16 M reads/minute).

=== Summary ===

Total read pairs processed:             20,194
  Read 1 with adapter:                  19,610 (97.1%)
  Read 2 with adapter:                  18,045 (89.4%)

== Read fate breakdown ==
Pairs that were too short:                   0 (0.0%)
Pairs discarded as untrimmed:            2,655 (13.1%)
Pairs written (passing filters):        17,539 (86.9%)

Total basepairs processed:    10,298,940 bp
  Read 1:     6,159,170 bp
  Read 2:     4,139,770 bp
Total written (filtered):      8,176,314 bp (79.4%)
  Read 1:     4,990,682 bp
  Read 2:     3,185,632 bp

=== First read: Adapter 1 ===

Sequence: CCTACGGGRSGCAGCAG; Type: regular 5'; Length: 17; Trimmed: 19610 times

Minimum overlap: 8
No. of allowed errors:
1-17 bp: 0

Overview of removed sequences
length	count	expect	max.err	error counts
15	3	0.0	0	3
17	7	0.0	0	7
18	3003	0.0	0	3003
19	3491	0.0	0	3491
20	3617	0.0	0	3617
21	3224	0.0	0	3224
22	3416	0.0	0	3416
23	2844	0.0	0	28

Finished in 0.43 s (18 µs/read; 3.39 M reads/minute).

=== Summary ===

Total read pairs processed:             24,189
  Read 1 with adapter:                  23,503 (97.2%)
  Read 2 with adapter:                  21,685 (89.6%)

== Read fate breakdown ==
Pairs that were too short:                   0 (0.0%)
Pairs discarded as untrimmed:            3,102 (12.8%)
Pairs written (passing filters):        21,087 (87.2%)

Total basepairs processed:    12,336,390 bp
  Read 1:     7,377,645 bp
  Read 2:     4,958,745 bp
Total written (filtered):      9,830,774 bp (79.7%)
  Read 1:     6,000,254 bp
  Read 2:     3,830,520 bp

=== First read: Adapter 1 ===

Sequence: CCTACGGGRSGCAGCAG; Type: regular 5'; Length: 17; Trimmed: 23503 times

Minimum overlap: 8
No. of allowed errors:
1-17 bp: 0

Overview of removed sequences
length	count	expect	max.err	error counts
10	1	0.0	0	1
16	1	0.0	0	1
17	4	0.0	0	4
18	3615	0.0	0	3615
19	4126	0.0	0	4126
20	4339	0.0	0	4339
21	3919	0.0	0	3919
22	4058	0.0	0	4058
23	

Finished in 4.17 s (10 µs/read; 6.00 M reads/minute).

=== Summary ===

Total read pairs processed:            416,322
  Read 1 with adapter:                 405,067 (97.3%)
  Read 2 with adapter:                 385,489 (92.6%)

== Read fate breakdown ==
Pairs that were too short:                   0 (0.0%)
Pairs discarded as untrimmed:           41,066 (9.9%)
Pairs written (passing filters):       375,256 (90.1%)

Total basepairs processed:   212,324,220 bp
  Read 1:   126,978,210 bp
  Read 2:    85,346,010 bp
Total written (filtered):    174,944,302 bp (82.4%)
  Read 1:   106,779,862 bp
  Read 2:    68,164,440 bp

=== First read: Adapter 1 ===

Sequence: CCTACGGGRSGCAGCAG; Type: regular 5'; Length: 17; Trimmed: 405067 times

Minimum overlap: 8
No. of allowed errors:
1-17 bp: 0

Overview of removed sequences
length	count	expect	max.err	error counts
8	4	6.4	0	4
9	2	1.6	0	2
10	3	0.4	0	3
11	5	0.1	0	5
12	4	0.0	0	4
13	3	0.0	0	3
14	17	0.0	0	17
15	12	0.0	0	12
16	26	0.0	0	26
17	108	0.0	0	108

Finished in 0.47 s (14 µs/read; 4.23 M reads/minute).

=== Summary ===

Total read pairs processed:             32,857
  Read 1 with adapter:                  31,953 (97.2%)
  Read 2 with adapter:                  29,876 (90.9%)

== Read fate breakdown ==
Pairs that were too short:                   0 (0.0%)
Pairs discarded as untrimmed:            3,780 (11.5%)
Pairs written (passing filters):        29,077 (88.5%)

Total basepairs processed:    16,757,070 bp
  Read 1:    10,021,385 bp
  Read 2:     6,735,685 bp
Total written (filtered):     13,555,322 bp (80.9%)
  Read 1:     8,273,880 bp
  Read 2:     5,281,442 bp

=== First read: Adapter 1 ===

Sequence: CCTACGGGRSGCAGCAG; Type: regular 5'; Length: 17; Trimmed: 31953 times

Minimum overlap: 8
No. of allowed errors:
1-17 bp: 0

Overview of removed sequences
length	count	expect	max.err	error counts
11	2	0.0	0	2
12	1	0.0	0	1
13	1	0.0	0	1
14	1	0.0	0	1
16	1	0.0	0	1
17	9	0.0	0	9
18	4981	0.0	0	4981
19	5717	0.0	0	5717
20	5839	0.0	0	5839
21

Finished in 2.17 s (11 µs/read; 5.45 M reads/minute).

=== Summary ===

Total read pairs processed:            197,224
  Read 1 with adapter:                 191,943 (97.3%)
  Read 2 with adapter:                 181,981 (92.3%)

== Read fate breakdown ==
Pairs that were too short:                   0 (0.0%)
Pairs discarded as untrimmed:           19,971 (10.1%)
Pairs written (passing filters):       177,253 (89.9%)

Total basepairs processed:   100,584,240 bp
  Read 1:    60,153,320 bp
  Read 2:    40,430,920 bp
Total written (filtered):     82,634,027 bp (82.2%)
  Read 1:    50,437,083 bp
  Read 2:    32,196,944 bp

=== First read: Adapter 1 ===

Sequence: CCTACGGGRSGCAGCAG; Type: regular 5'; Length: 17; Trimmed: 191943 times

Minimum overlap: 8
No. of allowed errors:
1-17 bp: 0

Overview of removed sequences
length	count	expect	max.err	error counts
10	3	0.2	0	3
11	2	0.0	0	2
12	4	0.0	0	4
13	2	0.0	0	2
14	5	0.0	0	5
15	3	0.0	0	3
16	8	0.0	0	8
17	44	0.0	0	44
18	29908	0.0	0	29908
19	34447	

Finished in 4.08 s (10 µs/read; 6.12 M reads/minute).

=== Summary ===

Total read pairs processed:            416,343
  Read 1 with adapter:                 405,322 (97.4%)
  Read 2 with adapter:                 382,754 (91.9%)

== Read fate breakdown ==
Pairs that were too short:                   0 (0.0%)
Pairs discarded as untrimmed:           43,507 (10.4%)
Pairs written (passing filters):       372,836 (89.6%)

Total basepairs processed:   212,334,930 bp
  Read 1:   126,984,615 bp
  Read 2:    85,350,315 bp
Total written (filtered):    173,816,084 bp (81.9%)
  Read 1:   106,092,368 bp
  Read 2:    67,723,716 bp

=== First read: Adapter 1 ===

Sequence: CCTACGGGRSGCAGCAG; Type: regular 5'; Length: 17; Trimmed: 405322 times

Minimum overlap: 8
No. of allowed errors:
1-17 bp: 0

Overview of removed sequences
length	count	expect	max.err	error counts
8	2	6.4	0	2
9	4	1.6	0	4
10	5	0.4	0	5
11	10	0.1	0	10
13	7	0.0	0	7
14	12	0.0	0	12
15	9	0.0	0	9
16	22	0.0	0	22
17	131	0.0	0	131
18	63635	0.

Finished in 1.26 s (11 µs/read; 5.36 M reads/minute).

=== Summary ===

Total read pairs processed:            112,618
  Read 1 with adapter:                 109,503 (97.2%)
  Read 2 with adapter:                 102,994 (91.5%)

== Read fate breakdown ==
Pairs that were too short:                   0 (0.0%)
Pairs discarded as untrimmed:           12,401 (11.0%)
Pairs written (passing filters):       100,217 (89.0%)

Total basepairs processed:    57,435,180 bp
  Read 1:    34,348,490 bp
  Read 2:    23,086,690 bp
Total written (filtered):     46,718,895 bp (81.3%)
  Read 1:    28,515,186 bp
  Read 2:    18,203,709 bp

=== First read: Adapter 1 ===

Sequence: CCTACGGGRSGCAGCAG; Type: regular 5'; Length: 17; Trimmed: 109503 times

Minimum overlap: 8
No. of allowed errors:
1-17 bp: 0

Overview of removed sequences
length	count	expect	max.err	error counts
10	1	0.1	0	1
11	2	0.0	0	2
12	1	0.0	0	1
13	2	0.0	0	2
15	3	0.0	0	3
16	4	0.0	0	4
17	35	0.0	0	35
18	16903	0.0	0	16903
19	19486	0.0	0	19486
2

Finished in 3.62 s (10 µs/read; 5.73 M reads/minute).

=== Summary ===

Total read pairs processed:            345,448
  Read 1 with adapter:                 336,214 (97.3%)
  Read 2 with adapter:                 317,650 (92.0%)

== Read fate breakdown ==
Pairs that were too short:                   0 (0.0%)
Pairs discarded as untrimmed:           36,167 (10.5%)
Pairs written (passing filters):       309,281 (89.5%)

Total basepairs processed:   176,178,480 bp
  Read 1:   105,361,640 bp
  Read 2:    70,816,840 bp
Total written (filtered):    144,189,236 bp (81.8%)
  Read 1:    88,006,094 bp
  Read 2:    56,183,142 bp

=== First read: Adapter 1 ===

Sequence: CCTACGGGRSGCAGCAG; Type: regular 5'; Length: 17; Trimmed: 336214 times

Minimum overlap: 8
No. of allowed errors:
1-17 bp: 0

Overview of removed sequences
length	count	expect	max.err	error counts
8	1	5.3	0	1
9	3	1.3	0	3
10	2	0.3	0	2
11	6	0.1	0	6
12	8	0.0	0	8
13	2	0.0	0	2
14	14	0.0	0	14
15	16	0.0	0	16
16	16	0.0	0	16
17	84	0.0	0	84


Finished in 3.29 s (10 µs/read; 6.09 M reads/minute).

=== Summary ===

Total read pairs processed:            334,551
  Read 1 with adapter:                 325,355 (97.3%)
  Read 2 with adapter:                 310,233 (92.7%)

== Read fate breakdown ==
Pairs that were too short:                   0 (0.0%)
Pairs discarded as untrimmed:           32,704 (9.8%)
Pairs written (passing filters):       301,847 (90.2%)

Total basepairs processed:   170,621,010 bp
  Read 1:   102,038,055 bp
  Read 2:    68,582,955 bp
Total written (filtered):    140,719,082 bp (82.5%)
  Read 1:    85,890,636 bp
  Read 2:    54,828,446 bp

=== First read: Adapter 1 ===

Sequence: CCTACGGGRSGCAGCAG; Type: regular 5'; Length: 17; Trimmed: 325355 times

Minimum overlap: 8
No. of allowed errors:
1-17 bp: 0

Overview of removed sequences
length	count	expect	max.err	error counts
10	8	0.3	0	8
11	7	0.1	0	7
12	4	0.0	0	4
13	3	0.0	0	3
14	13	0.0	0	13
15	14	0.0	0	14
16	7	0.0	0	7
17	85	0.0	0	85
18	50577	0.0	0	50577
19	589

Finished in 2.07 s (11 µs/read; 5.35 M reads/minute).

=== Summary ===

Total read pairs processed:            184,058
  Read 1 with adapter:                 179,051 (97.3%)
  Read 2 with adapter:                 169,240 (91.9%)

== Read fate breakdown ==
Pairs that were too short:                   0 (0.0%)
Pairs discarded as untrimmed:           19,329 (10.5%)
Pairs written (passing filters):       164,729 (89.5%)

Total basepairs processed:    93,869,580 bp
  Read 1:    56,137,690 bp
  Read 2:    37,731,890 bp
Total written (filtered):     76,794,752 bp (81.8%)
  Read 1:    46,873,160 bp
  Read 2:    29,921,592 bp

=== First read: Adapter 1 ===

Sequence: CCTACGGGRSGCAGCAG; Type: regular 5'; Length: 17; Trimmed: 179051 times

Minimum overlap: 8
No. of allowed errors:
1-17 bp: 0

Overview of removed sequences
length	count	expect	max.err	error counts
8	3	2.8	0	3
9	4	0.7	0	4
11	4	0.0	0	4
12	2	0.0	0	2
13	2	0.0	0	2
14	9	0.0	0	9
15	4	0.0	0	4
16	13	0.0	0	13
17	41	0.0	0	41
18	27921	0.0	0	27

Finished in 1.29 s (11 µs/read; 5.26 M reads/minute).

=== Summary ===

Total read pairs processed:            113,245
  Read 1 with adapter:                 110,092 (97.2%)
  Read 2 with adapter:                 103,978 (91.8%)

== Read fate breakdown ==
Pairs that were too short:                   0 (0.0%)
Pairs discarded as untrimmed:           12,106 (10.7%)
Pairs written (passing filters):       101,139 (89.3%)

Total basepairs processed:    57,754,950 bp
  Read 1:    34,539,725 bp
  Read 2:    23,215,225 bp
Total written (filtered):     47,149,152 bp (81.6%)
  Read 1:    28,778,393 bp
  Read 2:    18,370,759 bp

=== First read: Adapter 1 ===

Sequence: CCTACGGGRSGCAGCAG; Type: regular 5'; Length: 17; Trimmed: 110092 times

Minimum overlap: 8
No. of allowed errors:
1-17 bp: 0

Overview of removed sequences
length	count	expect	max.err	error counts
8	1	1.7	0	1
9	2	0.4	0	2
11	1	0.0	0	1
12	1	0.0	0	1
14	5	0.0	0	5
15	4	0.0	0	4
16	2	0.0	0	2
17	28	0.0	0	28
18	17209	0.0	0	17209
19	19695	0.

Finished in 1.66 s (11 µs/read; 5.45 M reads/minute).

=== Summary ===

Total read pairs processed:            151,058
  Read 1 with adapter:                 146,859 (97.2%)
  Read 2 with adapter:                 139,367 (92.3%)

== Read fate breakdown ==
Pairs that were too short:                   0 (0.0%)
Pairs discarded as untrimmed:           15,510 (10.3%)
Pairs written (passing filters):       135,548 (89.7%)

Total basepairs processed:    77,039,580 bp
  Read 1:    46,072,690 bp
  Read 2:    30,966,890 bp
Total written (filtered):     63,193,980 bp (82.0%)
  Read 1:    38,571,401 bp
  Read 2:    24,622,579 bp

=== First read: Adapter 1 ===

Sequence: CCTACGGGRSGCAGCAG; Type: regular 5'; Length: 17; Trimmed: 146859 times

Minimum overlap: 8
No. of allowed errors:
1-17 bp: 0

Overview of removed sequences
length	count	expect	max.err	error counts
9	3	0.6	0	3
10	2	0.1	0	2
11	3	0.0	0	3
12	1	0.0	0	1
13	3	0.0	0	3
14	5	0.0	0	5
15	5	0.0	0	5
16	3	0.0	0	3
17	39	0.0	0	39
18	22910	0.0	0	229

Finished in 2.36 s (10 µs/read; 6.19 M reads/minute).

=== Summary ===

Total read pairs processed:            243,179
  Read 1 with adapter:                 236,854 (97.4%)
  Read 2 with adapter:                 223,595 (91.9%)

== Read fate breakdown ==
Pairs that were too short:                   0 (0.0%)
Pairs discarded as untrimmed:           25,304 (10.4%)
Pairs written (passing filters):       217,875 (89.6%)

Total basepairs processed:   124,021,290 bp
  Read 1:    74,169,595 bp
  Read 2:    49,851,695 bp
Total written (filtered):    101,573,655 bp (81.9%)
  Read 1:    61,995,458 bp
  Read 2:    39,578,197 bp

=== First read: Adapter 1 ===

Sequence: CCTACGGGRSGCAGCAG; Type: regular 5'; Length: 17; Trimmed: 236854 times

Minimum overlap: 8
No. of allowed errors:
1-17 bp: 0

Overview of removed sequences
length	count	expect	max.err	error counts
8	2	3.7	0	2
9	2	0.9	0	2
10	1	0.2	0	1
11	3	0.1	0	3
12	1	0.0	0	1
13	3	0.0	0	3
14	14	0.0	0	14
15	12	0.0	0	12
16	9	0.0	0	9
17	66	0.0	0	66
18

Finished in 0.94 s (12 µs/read; 5.16 M reads/minute).

=== Summary ===

Total read pairs processed:             80,755
  Read 1 with adapter:                  78,601 (97.3%)
  Read 2 with adapter:                  74,038 (91.7%)

== Read fate breakdown ==
Pairs that were too short:                   0 (0.0%)
Pairs discarded as untrimmed:            8,658 (10.7%)
Pairs written (passing filters):        72,097 (89.3%)

Total basepairs processed:    41,185,050 bp
  Read 1:    24,630,275 bp
  Read 2:    16,554,775 bp
Total written (filtered):     33,611,590 bp (81.6%)
  Read 1:    20,515,535 bp
  Read 2:    13,096,055 bp

=== First read: Adapter 1 ===

Sequence: CCTACGGGRSGCAGCAG; Type: regular 5'; Length: 17; Trimmed: 78601 times

Minimum overlap: 8
No. of allowed errors:
1-17 bp: 0

Overview of removed sequences
length	count	expect	max.err	error counts
10	1	0.1	0	1
11	1	0.0	0	1
12	1	0.0	0	1
13	2	0.0	0	2
14	5	0.0	0	5
15	1	0.0	0	1
16	2	0.0	0	2
17	22	0.0	0	22
18	12274	0.0	0	12274
19	14235	0

Finished in 1.48 s (11 µs/read; 5.65 M reads/minute).

=== Summary ===

Total read pairs processed:            139,176
  Read 1 with adapter:                 135,203 (97.1%)
  Read 2 with adapter:                 127,496 (91.6%)

== Read fate breakdown ==
Pairs that were too short:                   0 (0.0%)
Pairs discarded as untrimmed:           15,223 (10.9%)
Pairs written (passing filters):       123,953 (89.1%)

Total basepairs processed:    70,979,760 bp
  Read 1:    42,448,680 bp
  Read 2:    28,531,080 bp
Total written (filtered):     57,785,990 bp (81.4%)
  Read 1:    35,270,977 bp
  Read 2:    22,515,013 bp

=== First read: Adapter 1 ===

Sequence: CCTACGGGRSGCAGCAG; Type: regular 5'; Length: 17; Trimmed: 135203 times

Minimum overlap: 8
No. of allowed errors:
1-17 bp: 0

Overview of removed sequences
length	count	expect	max.err	error counts
9	1	0.5	0	1
11	3	0.0	0	3
12	2	0.0	0	2
14	4	0.0	0	4
15	3	0.0	0	3
16	4	0.0	0	4
17	35	0.0	0	35
18	21044	0.0	0	21044
19	24431	0.0	0	24431
20

Finished in 3.10 s (10 µs/read; 6.09 M reads/minute).

=== Summary ===

Total read pairs processed:            314,325
  Read 1 with adapter:                 305,481 (97.2%)
  Read 2 with adapter:                 290,205 (92.3%)

== Read fate breakdown ==
Pairs that were too short:                   0 (0.0%)
Pairs discarded as untrimmed:           32,137 (10.2%)
Pairs written (passing filters):       282,188 (89.8%)

Total basepairs processed:   160,305,750 bp
  Read 1:    95,869,125 bp
  Read 2:    64,436,625 bp
Total written (filtered):    131,558,190 bp (82.1%)
  Read 1:    80,299,014 bp
  Read 2:    51,259,176 bp

=== First read: Adapter 1 ===

Sequence: CCTACGGGRSGCAGCAG; Type: regular 5'; Length: 17; Trimmed: 305481 times

Minimum overlap: 8
No. of allowed errors:
1-17 bp: 0

Overview of removed sequences
length	count	expect	max.err	error counts
8	3	4.8	0	3
9	5	1.2	0	5
10	6	0.3	0	6
11	5	0.1	0	5
12	6	0.0	0	6
13	1	0.0	0	1
14	13	0.0	0	13
15	5	0.0	0	5
16	19	0.0	0	19
17	86	0.0	0	86
18

In [14]:
res_view

In [None]:
demux_path = os.path.join('..', 'qiime-artifacts', 'demux-paired-trim.qza')
!ls {demux_path}

In [None]:
filtered_demux_path = os.path.join(out_dir, 'demux-paired-trim-filtered.qza')

In [None]:
demux_path

In [None]:
metadata_qa = Metadata.load(metadata_file)
metadata_qa

In [None]:
metadata_f = os.path.join('..', '..', '..', 'data', 'raw', 'metadata-all-ana.tsv')
!ls {metadata_f}
metadata_art = Metadata.load(metadata_f)
metadata_art

aux_art = Artifact.load(demux_path)
aux_art

In [None]:
filtered_demux_path = '../qiime-artifacts/demux-paired-trim-filtered.qza'

In [None]:
%%bash
metadata_f="../../../data/raw/metadata-all-ana.tsv"
demux_path="../qiime-artifacts/demux-paired-trim.qza"
filtered_demux_path="../qiime-artifacts/demux-paired-trim-filtered.qza"

qiime demux filter-samples --i-demux ${demux_path} --m-metadata-file ${metadata_f} --o-filtered-demux ${filtered_demux_path}

In [None]:
from qiime2.plugins.demux.methods import filter_samples

res2 = filter_samples(demux=aux_art, metadata=metadata_art).filtered_demux