In [1]:
import pandas as pd
import biom

In [3]:
def process_tax_table(filepath, tax_system="SILVA"):
    '''overwrite a phyloseq taxonomy table so that it is in green_genes format''''
    
    ## import the taxonomy table from phyloseq
    tax_table = pd.read_csv(filepath, sep = "\t")
        
    ## initialize list of correct taxonomic classifications
    tax_list = []
    
    if tax_system =="SILVA":
        for row in tax_table.iterrows():
            d = "D_0__" + row[1]["domain"]
            p = "D_1__" + row[1]["phylum"]
            o = "D_2__" + row[1]["order"]
            c = "D_3__" + row[1]["class"]
            f = "D_4__" + row[1]["family"]
            g = "D_5__" + row[1]["genus"]
            s = "D_6__" + row[1]["species"]
            tax = [d,p,o,c,f,g,s]
            tax_list.append((row[1]["OTUID"], tax))
    
    elif tax_system =="Greengenes":
        ## create the list of correctly formatted taxonomic classifications
        for row in tax_table.iterrows():
            d = "k__" + row[1]["domain"]
            p = "p__" + row[1]["phylum"]
            o = "o__" + row[1]["order"]
            c = "c__" + row[1]["class"]
            f = "f__" + row[1]["family"]
            g = "g__" + row[1]["genus"]
            s = "s__" + row[1]["species"]
            tax = [d,p,o,c,f,g,s]
            tax_list.append((row[1]["OTUID"], tax))
    else:
        raise ValueError('Unknown taxonomy system used. Use either SILVA or Greengenes taxonomy system.')
        
    ## save green_tax as a dataframe
    new_tax_table = pd.DataFrame(tax_list)
    
    ## rename columsn of the otu table
    new_tax_table = new_tax_table.rename(columns={0:"# OTUID", 1:"taxonomy"})
    
    ## save the green_genes taxonomy table as TSV
    new_tax_table.to_csv(filepath, index = None, sep = "\t")

In [5]:
process_tax_table("lemay_otu_metadata.txt", "Sam")

ValueError: Unknown taxonomy system used. Use either SILVA or Greengenes taxonomy system.

In [2]:
## import the taxonomy table from phyloseq
tax_table = pd.read_csv("lemay_otu_metadata.txt", sep = "\t")

In [4]:
## initialize the correctly formatted taxonomy
green_tax = []

In [5]:
## create the list of correctly formatted taxonomic classifications
for row in tax_table.iterrows():
    d = "k__" + row[1]["domain"]
    p = "p__" + row[1]["phylum"]
    o = "o__" + row[1]["order"]
    c = "c__" + row[1]["class"]
    f = "f__" + row[1]["family"]
    g = "g__" + row[1]["genus"]
    s = "s__" + row[1]["species"]
    tax = [d,p,o,c,f,g,s]
    green_tax.append((row[1]["OTUID"], tax))

In [6]:
green_tax = pd.DataFrame(green_tax)

In [7]:
green_tax = green_tax.rename(columns={0:"OTUID", 1:"taxonomy"})

In [8]:
green_tax.to_csv("lemay_otu_metadata.txt", index = None, sep = "\t")

In [4]:
func_table = biom.load_table("lemay_func.biom")