In [1]:
import os
import yaml
import papermill as pm
from utils import *

In [2]:
# Define paths
experiment_name = 'thayane-feces-01'
base_folder = os.path.join('/', 'home', 'lauro', 'nupeb', 'redemicro')
parameters_folder = os.path.join(base_folder, 'params')
steps_folder = os.path.join(base_folder, 'steps')
experiments_folder = os.path.join(base_folder, 'experiments')

In [13]:
# Load parameter file
param_file = os.path.join(parameters_folder, 'thayane-feces-01.yaml')
with open(param_file) as file:
    parameters = yaml.load(file, Loader=yaml.FullLoader)
parameters    

{'experiment_name': 'jenneffer-vs-01',
 'base_dir': '/home/lauro/nupeb/redemicro',
 'manifest_file': '/home/lauro/nupeb/redemicro/data/raw/manifest/not-hist-manifest.csv',
 'metadata_file': '/home/lauro/nupeb/redemicro/data/raw/metadata/not-hist-metadata.tsv',
 'classifier_file': '/home/lauro/nupeb/dados_brutos_rede_genoma/16S_classifiers_qiime2/silva-138-99-nb-classifier.qza',
 'phred': 20,
 'trunc_f': 0,
 'trunc_r': 0,
 'overlap': 16,
 'threads': 6}

In [4]:
# Define names
experiment_folder = os.path.join(experiments_folder, f"{parameters['experiment_name']}")
executed_steps = os.path.join(experiment_folder, 'executed_steps')
qiime_folder = os.path.join(experiment_folder, 'qiime-artifacts')
manifest_folder = os.path.join(base_folder, 'data', 'raw', 'manifest')
metadata_folder = os.path.join(base_folder, 'data', 'raw', 'metadata')

# Create paths if it not exist
if not os.path.isdir(executed_steps):
    os.makedirs(executed_steps)
    print(f'New folder path created: {executed_steps}')

if not os.path.isdir(qiime_folder):
    os.makedirs(qiime_folder)
    print(f'New folder path created: {qiime_folder}')

In [5]:
def execute_step(fname, params, replace=False):
    step_file = os.path.join(steps_folder, fname)
    result_file = os.path.join(executed_steps, fname)
    pm.execute_notebook(step_file, result_file, parameters=params, kernel_name='Python 3')

# Pipeline Steps

## Step 01 - Prepare data
This Step load the raw `fastq` files, create and save a `qza` Artifact QIIME2 file and a `qzv` Visualization QIIME2 file.

Using the `qzv` Visualization QIIME2 file we can view the reads quality scores distribuition across all bases.

In [8]:
# execute_step(
#     'step-01-prepare-data.ipynb',
#     params={
#         'experiment_name': experiment_name,
#         'base_dir': base_folder,
#         'manifest_file': os.path.join(manifest_folder, 'not-hist-feces-manifest.csv'),
#         'replace_files': False,
#     })

In [11]:
# Define paths
fname = 'step-01-prepare-data.ipynb'
step_file = os.path.join(steps_folder, fname)
result_file = os.path.join(executed_steps, fname)
step_params = {
    'experiment_name': experiment_name,
    'base_dir': base_folder,
    'manifest_file': os.path.join(manifest_folder, 'not-hist-feces-manifest.csv'),
    'replace_files': False,
}

# Execute notebook
pm.execute_notebook(step_file, result_file, parameters=step_params, kernel_name='python3')

Executing:   0%|          | 0/15 [00:00<?, ?cell/s]

{'cells': [{'cell_type': 'markdown',
   'metadata': {'tags': [],
    'papermill': {'exception': False,
     'start_time': '2022-05-25T13:25:21.784328',
     'end_time': '2022-05-25T13:25:21.800761',
     'duration': 0.016433,
     'status': 'completed'}},
   'source': '# Prepare data for Pipeline',
   'id': '6a13f0c6'},
  {'cell_type': 'markdown',
   'metadata': {'tags': [],
    'papermill': {'exception': False,
     'start_time': '2022-05-25T13:25:21.811686',
     'end_time': '2022-05-25T13:25:21.821590',
     'duration': 0.009904,
     'status': 'completed'}},
   'source': '## Setup and settings',
   'id': '90793999'},
  {'cell_type': 'code',
   'execution_count': 1,
   'metadata': {'ExecuteTime': {'end_time': '2022-05-25T10:45:49.091975Z',
     'start_time': '2022-05-25T10:45:31.197286Z'},
    'tags': [],
    'papermill': {'exception': False,
     'start_time': '2022-05-25T13:25:21.831103',
     'end_time': '2022-05-25T13:25:28.260209',
     'duration': 6.429106,
     'status': 'com

## Step 02 - Construct ASV table
This Step load the raw sequences Artifact `qza`, conduct **quality control** and construct the **ASV** table using the `DADA2` QIIME2 plugin.

A report with describing the filtering implact along all phases of DADA2 plugin execution. The DADA2 outputs three QIME2 Artifacts:
- Table
- Representative Sequences
- Statistics

In [10]:
# Define paths
fname = 'step-02-dada2.ipynb'
step_file = os.path.join(steps_folder, fname)
result_file = os.path.join(executed_steps, fname)
step_params = {
    'metadata_file': os.path.join(metadata_folder, 'not-hist-feces-metadata.tsv'),
    'experiment_folder': os.path.join(qiime_folder, 'demux-paired.qza'),
    'replace_files': False,
    'phred': parameters['phred'],
    'trunc_f': parameters['trunc_f'],
    'trunc_r': parameters['trunc_r'],
    'overlap': parameters['overlap'],
    'threads': parameters['threads'],
}
# Execute notebook
pm.execute_notebook(step_file, result_file, parameters=step_params)