# Make OTU Table from many file types

The purpose of this program is, using inputs, to take a variety of file types as an input and to output an OTU table and other analysis based on user preferences.

In [2]:
import zipfile
import os
import subprocess
import sys

from IPython.display import FileLinks, FileLink
from functools import partial
from os import chdir

--main asks user for inputs of file paths and preferences, runs all functions, outputs an OTU table

In [None]:
def main():
    username = input("What is the name of your user profile on your computer? The outputs from this program will be located in your user folder. ")
    output_folder = input("What is the name of the output folder you would like to create? ") 
    qiime_path = input("What is the path to macqiime on your computer, including the 'macqiime' folder. ")

    out_path =  "/Users"+ "/" + username + "/" + output_folder
    if not os.path.exists(out_path):
        os.makedirs(out_path)
    
    #orig_seqs is the path to the original inputted sequence, including the file name
    orig_seqs = input("What is the path to the DNA sequencing file, including the file name? ")
    
    #seqs_final is path to final sequencing file, including the file name
    seqs_final = det_seq_file_type(orig_seqs, out_path)
    
    clustering_method = input("What method would you like to use for clustering (denovo, open, closed)? ")
    if (clustering_method != "denovo") or (clustering_method != "open") or (clustering_method != "closed"):
        clustering_method = input("Please input one of the three options for clustering (denovo, open, closed). ")
    
    platform_type = input("What platform are you using: Illumina or 454? ")
    if (platform_type != "Illumina") or (platform_type != "454"):
        platform_type = input("Please input one of the two platform options(Illumina or 454). ")
        
    otufasta97 = qiime_path + '/anaconda/lib/python2.7/site_packages/qiime_default_reference/gg_13_8_otus/rep_set/97_otus.fasta'
    
    otutxt97  = qiime_path + '/anaconda/lib/python2.7/site_packages/qiime_default_reference/gg_13_8_otus/taxonomy/97_otu_taxonomy.txt'
    
    run_pipeline(seqs_final, out_path, username, clustering_method, otufasta97, otutxt97)

inputs

main2 is for running in jupyter notebooks. not a part of the final program

--det_seq_file_type input orig_seqes is file path to the original inputted sequences. calls apropriate function to convert sequences into one FASTA file. outputs path to final FASTA file.

In [None]:
def det_seq_file_type(orig_seqs, out_path):
    parts = orig_seqs.split("/")
    file_name= parts[-1]
    if ".fna" in file_name or ".fasta" in file_name or ".fa" in file_name or ".fsa" in file_name:
        return orig_seqs
    if ".zip" in file_name:
        return convert_zip(orig_seqs, out_path)
    if "." not in file_name:
        return convert_folder(orig_seqs, out_path)
    if ".sff" in file_name:
        return convert_sff(orig_seqs, out_path)
    if ".bam" in file_name:
        return convert_bam(orig_seqs, out_path)

--convert_zip input orig_seqes is file path to the original inputted sequences, where they are in a zip folder. unzips file and calls det_seq_file_type() on unzipped file. outputs path to final FASTA file

In [None]:
def convert_zip(orig_seqs, out_path):
    parts = orig_seqs.split("/")
    file_name = parts[-1]
    file_location = "/".join(parts[:-1])
    
    unzipped_folder_name = file_name[:-4]
    
    
    zfile = zipfile.ZipFile(orig_seqs)
    zfile.extractall(out_path + '/' + unzipped_folder_name)
            
    joined_data = det_seq_file_type(out_path + '/' + unzipped_folder_name, out_path)
    
    return joined_data

3 cases:
1. R1 and R2 in a folder
2. R1, R2 and barcode
3. subfolders, each containing R1 and R2 (multiple_joined_paired_ends)

--convert_folder input orig_seqes is file path to the original inputted sequences, where this is a folder containing R1 and R2 reads, and possibly a file containing barcodes. outputs path to final FASTA file.

In [None]:
def convert_folder(orig_seqs, out_path):
    files_in_folder = os.listdir(orig_seqs)

    r1_files = 0
    for f in files_in_folder:
        if os.path.isdir(orig_seqs +'/'+ f):
            return multiple_joins(orig_seqs, out_path)
        elif not os.path.isdir(orig_seqs+ '/' + f):
            if 'R1' in f:
                r1_files+=1
    if r1_files > 1:
        return multiple_joins(orig_seqs, out_path)

    r_one = ""
    r_two = ""
    barcodes = ""
    joined_data = ""

    parts = orig_seqs.split("/")
    file_location = "/".join(parts[:-1])
    file_name = parts[-1]

    for f in files_in_folder:
        if "R1" in f or "r1" in f:
            r_one = f
        elif "R2" in f or "r2" in f:
            r_two = f
        elif "barcodes" or "Barcodes" in f:
            barc = f

    if barcodes == "":
        forward = orig_seqs+ '/' + r_one
        reverse = orig_seqs+ '/' + r_two

        !join_paired_ends.py -f $forward -r $reverse -o $out_path"/joined_data"
    else:
        barc = orig_seqs + barcodes
        !join_paired_ends -f $forward -r $reverse -b $barc -o $out_path'/joined_data'

    return out_path + '/' + "joined_data"

case 3

--multiple_joins input orig_seqes is file path to the original inputted sequences, where this is a folder containing subfolders, where each subfolder contains two files: R1 and R2 reads of the same sequence. outputs path to final FASTA file

In [None]:
def multiple_joins(orig_seqs, out_path):
    #Join the forward and reverse sequences
    #Works for subfolders and mutiple files within one folder
    !multiple_join_paired_ends.py -i $orig_seqs -o $out_path"/joined_data"

    return out_path + '/' + "joined_data"

converts sff file, outputs path to FASTA file

In [None]:
def convert_sff(orig_seqs, out_path):
    parts = orig_seqs.split("/")
    file_location = "/".join(parts[:-1])
    
    !process_sff -i $orig_seqs -o $out_path"/joined_data"
    return out_path+ '/' + "joined_data"

NEED TO FIGURE OUT --convert_bam input orig_seqes is file path to the original inputted sequences, where this is an bam file. outputs path to final FASTA file.

In [None]:
# def convert_bam(orig_seqs, out_path):
#     bedtools bamtofastq [OPTIONS] -i orig_seqs -fq Fasting_Example.fna
#     http://bedtools.readthedocs.io/en/latest/content/tools/bamtofastq.html

In [None]:
def run_pipeline(seqs_final, out_path, username, clustering_method, otufasta97, otutext97,gg_13_5_fasta, gg_13_5_txt):
    
    #Remove the non-joined sequences. different for whether or not there are multiple libraries
    in_joined_data = os.listdir(seqs_final)

    n_dirs = 0
    for f in in_joined_data:
        if os.path.isdir(seqs_final+ '/' + f):
            n_dirs+=1
            to_delete_un1 = seqs_final + '/' + f +"/fastqjoin.un1.fastq"
            to_delete_un2 = seqs_final + '/'  + f +"/fastqjoin.un2.fastq"
            !rm -r $to_delete_un1
            !rm -r $to_delete_un2

    if n_dirs == 0:
        !rm -r $seqs_final"/fastqjoin.un1.fastq"
        !rm -r $seqs_final"/fastqjoin.un2.fastq"

    # We are going to concatenate all the sequences into 1 to run 
    # Note the folder for the next step cannot contain this file.
    
    in_joined_data_2 = os.listdir(seqs_final)
    filenames = []
    
    for f in in_joined_data_2:
        if "_L001_R1_001" in f:
            filenames.append(seqs_final + '/' + f + '/fastqjoin.join.fastq')
            
#     with open(seqs_final + "/all_fastqjoin.join.fastq", 'w') as outfile:
#         for fname in filenames:
#             with open(fname) as infile:
#                 for line in infile:
#                     outfile.write(line)
    
    
#     # Use fastaQC to analyze the data and determine the quality of the reads
#     ####!module load java
#     !mkdir $out_path'/fastqc_results'
#     !'/home/username/FastQC/fastqc/'$seqs_final'/all_fastqjoin.join.fastq' --outdir=$out_path'/fastqc_results'
    
    # Data seems fine. Allowing to use up to 3 reads below 30 of quality and only 1 N

#     #Move all all_fastqjoin.join.fastq
#     !mv $seqs_final"/all_fastqjoin.join.fastq "$seqs_final"/fastqc_results"

    #Rename files in Python
    chdir(seqs_final)
    file_names = os.listdir(seqs_final)
    for ifile in file_names:
        os.rename(ifile, str.replace(ifile, "_", ""))

#     # Make sure you remove the _L001_R1
#     chdir(seqs_final)
#     !rm -r *L001_R1_001


    #Split libraries. includes saving the fasta files (see qiime parameters) using qiime. Determines how many files (more than 1 R1 or R2), split vs. multiple split.
##should have an option to use a barcodes file (-b option in split_libraries.py)
    
    in_joined_data = os.listdir(seqs_final)
    n_dirs = 0
    for f in in_joined_data:
        if os.path.isdir(seqs_final+ '/' + f):
            n_dirs+=1
    if n_dirs > 1:
        !multiple_split_libraries_fastq.py -i $seqs_final -o $out_path'/split_data' --demultiplexing_method sampleid_by_file --include_input_dir_path --remove_filepath_in_name -p $out_path'/qiime_parameters.txt'
    else:
        !split_libraries_fastq.py -i $seqs_final -o $out_path'/split_data'
    
    
    #Fixing the names
    chdir(out_path+'/split_data')
    #!sed 's/L001R1001//g' seqs.fastq > seqs_fixed.fastq 
    !sed 's/L001R1001//g' seqs.fna > seqs_fixed.fna

##reference database is gg_13_8
    !identify_chimeric_seqs.py -i $out_path'/split_data/seqs_fixed.fna' -m usearch61 -o $out_path'/usearch_checked_chimeras/' -r $otufasta97

    !filter_fasta.py -f $out_path'/split_data/seqs_fixed.fna' -o $out_path'/seqs_nochimeras_filtered.fna' -s $out_path'/usearch_checked_chimeras/chimeras.txt' -n
    


## need to actually make OTU (this code must have gotten removed?)
    
    
    if clustering_method == "denovo" or clustering_method == "Denovo":
        !pick_otus.py -i $out_path'/seqs_nochimeras_filtered.fna' -o $out_path'/OTUclustering_denovo' -m uclust
        otu_clust_path = '/OTUclustering_denovo'
        !make_otu_table.py -i $out_path'/OTUclustering_denovo/seqs_nochimeras_filtered_otus.txt' -o $out_path$otu_clust_path'/otu_table_denovo.biom'
        !biom convert -i $out_path$otu_clust_path'/otu_table_denovo.biom' -o $out_path'/otu_table_'$clustering_method'.txt' --table-type="OTU table" --to-tsv 
        !biom convert -i $out_path$otu_clust_path'/otu_table_denovo.biom' -o $out_path'/otu_table_'$clustering_method'.json' --table-type="OTU table" --to-json
        !mkdir $out_path'/biom_summarize' 
        !biom summarize-table -i $out_path$otu_clust_path'/otu_table_denovo.biom' -o $out_path'/biom_summarize/otu_table_'$clustering_method'_sum.txt'
        !biom summarize-table -i $out_path$otu_clust_path'/otu_table_denovo.biom' -o $out_path'/biom_summarize/otu_table_'$clustering_method'_sumq.txt'  --qualitative
        
    elif clustering_method == "open" or clustering_method == "Open":
        !pick_open_reference_otus.py -i $out_path'/seqs_nochimeras_filtered.fna' -o $out_path'/OTUclustering_openreference_usearch' --reference_fp $otufasta97 -f -m usearch61 -p $out_path'/qiime_parameters.txt'
        otu_clust_path = '/OTUclustering_openreference_usearch'
        
    elif clustering_method == "closed" or clustering_method == "Closed":  
        !pick_closed_reference_otus.py -i $out_path'/seqs_nochimeras_filtered.fna' -o $out_path'/OTUclustering_closedreference' --reference_fp $otufasta97 -f -m usearch61 -p $out_path'/qiime_parameters.txt'
        otu_clust_path = '/OTUclustering_closedreference'
    
    if clustering_method != "denovo" or clustering_method != "Denovo":
        !biom convert -i $out_path$otu_clust_path'/otu_table_mc2_w_tax.biom' -o $out_path'/otu_table_'$clustering_method'.txt' --table-type="OTU table" --to-tsv 

        !biom convert -i $out_path$otu_clust_path'/otu_table_mc2_w_tax.biom' -o $out_path'/otu_table_'$clustering_method'.json' --table-type="OTU table" --to-json

        # Summarize tables
        !mkdir $out_path'/biom_summarize' 
        !biom summarize-table -i $out_path$otu_clust_path'/otu_table_mc2_w_tax.biom' -o $out_path'/biom_summarize/otu_table_'$clustering_method'_sum.txt'
        !biom summarize-table -i $out_path$otu_clust_path'/otu_table_mc2_w_tax.biom' -o $out_path'/biom_summarize/otu_table_'$clustering_method'_sumq.txt'  --qualitative
    
    #======================================================================================================================
    #PICRUST
    #======================================================================================================================
    


In [4]:
#What is the name of your user profile on your computer? The outputs from this program will be located in your user folder.
username = 'user'

##What is the name of the output folder you would like to create?
output_folder = 'try_prac_files_open_3'

##What is the path to macqiime on your computer, including the 'macqiime' folder.
qiime_path = '/macqiime'

out_path =  "/Users"+ "/" + username + "/" + output_folder
if not os.path.exists(out_path):
    os.makedirs(out_path)


#orig_seqs is the path to the original inputted sequence, including the file name
## What is the path to the DNA sequencing file, including the file name? 
orig_seqs = '/Users/user/Desktop/practice_sequencing_files'

#seqs_final is path to final sequencing file, including the file name
seqs_final = det_seq_file_type(orig_seqs, out_path)

##What method would you like to use for clustering (denovo, open, closed)?
clustering_method = 'open'

## What platform type are you using? Illumina or 454?    
platform_type = 'Illumina'

##What is the path to gg_13_5?
gg_13_5_path = '/Users/user/Desktop/Independent_Study/gg_13_5_otus'

gg_13_5_fasta = gg_13_5_path + '/rep_set/97_otus.fasta'
gg_13_5_txt = gg_13_5_path + '/taxonomy/97_otu_taxonomy.txt'

#dont worry about this stuff rn
otufasta97 = '/Users/user/Desktop/Independent_Study/gg_13_8_otus/rep_set/97_otus.fasta'

otutxt97  = '/Users/user/Desktop/Independent_Study/gg_13_8_otus/taxonomy/97_otu_taxonomy.txt'



In [None]:
##Use the below section to if you would like to set custom parameters
##If you do not want to set custom parameters, the default QIIME parameters will be used

#Split_libraries_fastq parameters
max_bad_run_length = 3
quality_threshold = 30
sequence_max_n = 1
store_demultiplexed_fastq = True
#Pick_otus parameters 
similarity = .9
# valid usearch_sort_methods include "abundance", "length", or "None"
usearch61_sort_method = "abundance"
sizeorder = True


parameter_file = open(out_path + "/qiime_parameters.txt", "w+")

parameter_file.write("# Parameters for defining split_libraries and pick_otus \n")
parameter_file.write(("split_libraries_fastq:max_bad_run_length ") + str(max_bad_run_length) + " \n")
parameter_file.write(('split_libraries_fastq:phred_quality_threshold ') + str(quality_threshold) + " \n")
parameter_file.write(("split_libraries_fastq:sequence_max_n ") + str(sequence_max_n) + " \n")
parameter_file.write(("split_libraries_fastq:store_demultiplexed_fastq") + str(store_demultiplexed_fastq) + " \n")
parameter_file.write(("pick_otus:similarity ") + str(similarity) + " \n")
parameter_file.write(("pick_otus:usearch61_sort_method ") + usearch61_sort_method + " \n")
parameter_file.write(("pick_otus:sizeorder ") + str(sizeorder))

parameter_file.close()

In [None]:
#run through the pipeline
run_pipeline(seqs_final, out_path, username, clustering_method, otufasta97, otutxt97, gg_13_5_fasta, gg_13_5_txt)

In [None]:
!mkdir $out_path'/PICRUST_outputs'

In [None]:
!identify_chimeric_seqs.py -i $out_path'/split_data/seqs_fixed.fna' -m usearch61 -o $out_path'/PICRUST_outputs/usearch_checked_chimeras/' -r $gg_13_5_fasta

In [None]:
!filter_fasta.py -f $out_path'/split_data/seqs_fixed.fna' -o $out_path'/PICRUST_outputs/seqs_nochimeras_filtered.fna' -s $out_path'/PICRUST_outputs/usearch_checked_chimeras/chimeras.txt' -n


In [None]:
output_otu = '/PICRUST_outputs/OTUclustering_closed'
!pick_otus.py -i $out_path'/PICRUST_outputs/seqs_nochimeras_filtered.fna' -o $out_path$output_otu --refseqs_fp $gg_13_5_fasta -m usearch61_ref  --suppress_new_clusters


In [None]:
!make_otu_table.py -i $out_path$output_otu'/seqs_nochimeras_filtered_otus.txt' --taxonomy $gg_13_5_txt -o $out_path'/PICRUST_outputs/otu_table.biom'


In [None]:
!pick_rep_set.py -i $out_path$output_otu'/seqs_nochimeras_filtered_otus.txt' -o $out_path'/PICRUST_outputs/rep_set.fna' -f $out_path"/PICRUST_outputs/seqs_nochimeras_filtered.fna"


In [None]:
!assign_taxonomy.py -i $out_path'/PICRUST_outputs/rep_set.fna' -o $out_path'/AssignTaxa' -r $gg_13_5_fasta -t $gg_13_5_txt


In [None]:
!align_seqs.py -i $out_path'/PICRUST_outputs/rep_set.fna' -o $out_path'/PICRUST_outputs/RepSeqAligmenment' -t $'/Users/user/Desktop/Independent_Study/gg_13_5_otus/rep_set_aligned/97_otus.fasta'


In [None]:
!filter_alignment.py -i $out_path'/PICRUST_outputs/RepSeqAligmenment/rep_set_aligned.fasta' -o $out_path'/PICRUST_outputs/FilterAlignment'


In [None]:
!make_phylogeny.py -i $out_path'/PICRUST_outputs/FilterAlignment/rep_set_aligned_pfiltered.fasta' -o $out_path'/PICRUST_outputs/refset.tree'


In [None]:
!biom convert -i $out_path'/PICRUST_outputs/otu_table.biom' -o $out_path'/PICRUST_outputs/otu_table.txt' --table-type="OTU table" --to-tsv 


In [None]:
!biom convert -i $out_path'/PICRUST_outputs/otu_table.biom' -o $out_path'/PICRUST_outputs/otu_table.json' --table-type="OTU table" --to-json

In [None]:
!mkdir $out_path'/PICRUST_outputs/biom_summarize'


In [None]:
!biom summarize-table -i $out_path'/PICRUST_outputs/otu_table.biom' -o $out_path'/PICRUST_outputs/biom_summarize/otu_table_sum.txt'


In [None]:
!biom summarize-table -i $out_path'/PICRUST_outputs/otu_table.biom' -o $out_path'/PICRUST_outputs/biom_summarize/otu_table_sumq.txt'  --qualitative 


NEED TO FIGURE THIS OUT FOR ALL COMPUTER TYPES (ONE CELL BELOW)

- had to put the correct data files in the folder /macqiime/anaconda/lib/python2.7/site-packages/PICRUSt-1.1.0-py2.7.egg/picrust/data

In [1]:
!normalize_by_copy_number.py -i $out_path'/PICRUST_outputs/otu_table.biom'  -o $out_path'/PICRUST_outputs/norm.biom'


RuntimeError: module compiled against API version 9 but this version of numpy is 4
Traceback (most recent call last):
  File "/macqiime/anaconda/bin/normalize_by_copy_number.py", line 4, in <module>
    __import__('pkg_resources').run_script('PICRUSt==1.1.0', 'normalize_by_copy_number.py')
  File "build/bdist.macosx-10.6-x86_64/egg/pkg_resources/__init__.py", line 744, in run_script
  File "build/bdist.macosx-10.6-x86_64/egg/pkg_resources/__init__.py", line 1499, in run_script
  File "/macqiime/anaconda/lib/python2.7/site-packages/PICRUSt-1.1.0-py2.7.egg/EGG-INFO/scripts/normalize_by_copy_number.py", line 16, in <module>
    from biom import load_table, Table
  File "/macqiime/anaconda/lib/python2.7/site-packages/biom/__init__.py", line 51, in <module>
    from .table import Table
  File "/macqiime/anaconda/lib/python2.7/site-packages/biom/table.py", line 185, in <module>
    from scipy.sparse import coo_matrix, csc_matrix, csr_matrix, isspmatrix, vstack
  File "/macqiime/a

In [None]:
!predict_metagenomes.py -i $out_path'/PICRUST_outputs/norm.biom' -o $out_path'/PICRUST_outputs/metagenome_predictions.biom' --with_confidence

In [None]:
!categorize_by_function.py -i $out_path'/PICRUST_outputs/metagenome_predictions.biom' -c KEGG_Pathways -l 2 -o $out_path'/PICRUST_outputs/predicted_metagenomes.L2.biom'


In [None]:
!mkdir $out_path'/PICRUST_outputs'

#Use the version of GG available from PICRUST gg_13_5
!identify_chimeric_seqs.py -i $out_path'/split_data/seqs_fixed.fna' -m usearch61 -o $out_path'/PICRUST_outputs/usearch_checked_chimeras/' -r $gg_13_5_fasta

!filter_fasta.py -f $out_path'/split_data/seqs_fixed.fna' -o $out_path'/PICRUST_outputs/seqs_nochimeras_filtered.fna' -s $out_path'/PICRUST_outputs/usearch_checked_chimeras/chimeras.txt' -n


#create OTU table without chimeras for PICRUST
if clustering_method == "denovo" or clustering_method == "Denovo":
    output_otu = '/PICRUST_outputs/OTUclustering_denovo'
    !pick_otus.py -i $out_path'/PICRUST_outputs/seqs_nochimeras_filtered.fna' -o $out_path$output_otu -m uclust
elif clustering_method == "open" or clustering_method =="Open":
    output_otu = '/PICRUST_outputs/OTUclustering_open'
    !pick_otus.py -i $out_path'/PICRUST_outputs/seqs_nochimeras_filtered.fna' -o $out_path$output_otu --refseqs_fp $gg_13_5_fasta -m usearch61_ref  --suppress_new_clusters
elif clustering_method == "closed" or clustering_method =="Closed":
    output_otu = '/PICRUST_outputs/OTUclustering_closed'
    !pick_otus.py -i $out_path'/PICRUST_outputs/seqs_nochimeras_filtered.fna' -o $out_path$output_otu --refseqs_fp $gg_13_5_fasta -m usearch61_ref  --suppress_new_clusters


!make_otu_table.py -i $out_path$output_otu'/seqs_nochimeras_filtered_otus.txt' --taxonomy $gg_13_5_txt -o $out_path'/PICRUST_outputs/otu_table.biom'

!pick_rep_set.py -i $out_path$output_otu'/seqs_nochimeras_filtered_otus.txt' -o $out_path'/PICRUST_outputs/rep_set.fna' -f $out_path"/PICRUST_outputs/seqs_nochimeras_filtered.fna"

!assign_taxonomy.py -i $out_path'/PICRUST_outputs/rep_set.fna' -o $out_path'/AssignTaxa' -r $gg_13_5_fasta -t $gg_13_5_txt

#Align sequences in Qiime (Takes long for large datasets) 
!align_seqs.py -i $out_path'/PICRUST_outputs/rep_set.fna' -o $out_path'/PICRUST_outputs/RepSeqAligmenment' -t $gg_13_5_txt

#Filter alignments 
!filter_alignment.py -i $out_path'/PICRUST_outputs/RepSeqAligmenment/rep_set_aligned.fasta' -o $out_path'/PICRUST_outputs/FilterAligment'

#Make phylogeny -Same results with 85 than 97
!make_phylogeny.py -i $out_path'/PICRUST_outputs/FilterAligment/rep_set_aligned_pfiltered.fasta' -o $out_path'PICRUST_outputs/refset.tree'

!biom convert -i $out_path'/PICRUST_outputs/otu_table.biom' -o $out_path'/PICRUST_outputs/otu_table.txt' --table-type="OTU table" --to-tsv 

!biom convert -i $out_path'/PICRUST_outputs/otu_table.biom' -o $out_path'/PICRUST_outputs/otu_table.json' --table-type="OTU table" --to-json

# Summarize tables
!mkdir $out_path'/PICRUST_outputs/biom_summarize'
!biom summarize-table -i $out_path'/PICRUST_outputs/otu_table.biom' -o $out_path'/PICRUST_outputs/biom_summarize/otu_table_sum.txt'
!biom summarize-table -i $out_path'/PICRUST_outputs/otu_table.biom' -o $out_path'/PICRUST_outputs/biom_summarize/otu_table_sumq.txt'  --qualitative 

#Normalize by copy number
# This part is done locally
!normalize_by_copy_number.py -i $out_path'/PICRUST_outputs/otu_table.biom'  -o $out_path'/PICRUST_outputs/norm.biom'

#Predict metagenomes
!predict_metagenomes.py -i $out_path'/PICRUST_outputs/norm.biom' -o $out_path'/PICRUST_outputs/metagenome_predictions.biom' --with_confidence

#Categorize by function with level 2 in KEGG
!categorize_by_function.py -i $out_path'/PICRUST_outputs/metagenome_predictions.biom' -c KEGG_Pathways -l 2 -o $out_path'/PICRUST_outputs/predicted_metagenomes.L2.biom'

#Categorize by function with level 2 in KEGG
!categorize_by_function.py -i $out_path'/PICRUST_outputs/predicted_metagenomes.L2.biom' -c KEGG_Pathways -l 2 -o $out_path'/PICRUST_outputs/predicted_metagenomes.L2.txt' -f 

#Categorize by function with level 3 in KEGG
!categorize_by_function.py -i $out_path'/PICRUST_outputs/predicted_metagenomes.L2.biom' -c KEGG_Pathways -l 3 -o $out_path'/PICRUST_outputs/predicted_metagenomes.L3.biom'

#Categorize by function with level 3 in KEGG
!categorize_by_function.py -i $out_path'/PICRUST_outputs/metagenome_predictions.biom' -c KEGG_Pathways -l 3 -o $out_path'/PICRUST_outputs/predicted_metagenomes.L3.txt' -f

#Prediction of functions by OTUs
!metagenome_contributions.py -i $out_path'/PICRUST_outputs/predicted_metagenomes.L3.biom'  -o $out_path'/PICRUST_outputs/ko_metagenome_contributions.tab'

#Summarize metagenome by plots
!summarize_taxa_through_plots.py -i $out_path'/PICRUST_outputs/predicted_metagenomes.L2.biom' -p qiime_params.txt -o $out_path'/PICRUST_outputs/plots_at_level2'

