# Merge sequence tables from multiple runs and create phyloseq object

In [2]:
ncores = 20
OutFolder = "~/Hyphosphere/data/MiSeq/merged/dada2_intermediates"

#location of taxonomy database
TrainingSet = '~/databases/silva_nr_v132_train_set.fa.gz'
SpeciesTraining = '~/databases/silva_species_assignment_v132.fa.gz'

#Metadata file
SamData = '~/Hyphosphere/3exp_metadata_final.txt'

In [3]:
library(dada2)
library(tidyr)
library(dplyr)
library(phyloseq)
library(ggplot2)

Loading required package: Rcpp
“multiple methods tables found for ‘colMeans’”
Attaching package: ‘dplyr’

The following objects are masked from ‘package:stats’:

    filter, lag

The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union

“package ‘ggplot2’ was built under R version 3.6.1”

In [4]:

                   # Merge multiple runs (if necessary)
st1 <- readRDS("~/Hyphosphere/data/Exp1/DADA2Files/seqtab-nochim.rds")
st2 <- readRDS("~/Hyphosphere/data/Exp10/DADA2Files/seqtab-nochim.rds")
st3 <- readRDS("~/Hyphosphere/data/Exp11/DADA2Files/seqtab-nochim.rds")
st.all <- mergeSequenceTables(st1, st2, st3)

In [5]:
rownames(st1)

In [6]:
dim(st.all)

In [7]:
st1["HCBP3_S212",1:25]

In [96]:
st.all.2 = collapseNoMismatch(st.all)


In [97]:
dim(st.all.2)

*Note that summed total of sequences in all three experiments is 77,639 so approximately 16k sequences are shared between expriments

In [98]:
outFile = file.path(OutFolder,"seqtab-merged.rds")

saveRDS(st.all.2, outFile)

# Checkpoint
* read merged seqtab

In [17]:
CheckPoint = file.path(OutFolder,"seqtab-merged.rds")

seqtab.nochim = readRDS(CheckPoint)

# Assign taxonomy

In [100]:
taxa <- assignTaxonomy(seqtab.nochim, TrainingSet, multithread=ncores)


In [None]:
outFile = file.path(OutFolder,'taxa.rds')
outFile
saveRDS(taxa, outFile)

# Checkpoint read taxa file

In [9]:
CheckPoint = file.path(OutFolder,"taxa.rds")



In [10]:
taxa = readRDS(CheckPoint)

In [11]:
dim(taxa)

In [12]:
#inspect taxonomic assignment

taxa.print <- taxa # Removing sequence rownames for display only
rownames(taxa.print) <- NULL
head(taxa.print)


Kingdom,Phylum,Class,Order,Family,Genus
Bacteria,Proteobacteria,Alphaproteobacteria,Rhizobiales,Rhizobiaceae,Phyllobacterium
Bacteria,,,,,
Bacteria,Proteobacteria,Alphaproteobacteria,Sphingomonadales,Sphingomonadaceae,Sphingomonas
Bacteria,,,,,
Bacteria,Actinobacteria,Actinobacteria,Micrococcales,Micrococcaceae,Pseudarthrobacter
Bacteria,Proteobacteria,Gammaproteobacteria,Cellvibrionales,Cellvibrionaceae,Cellvibrio


# hand off to phyloseq

In [18]:
# read sample data
samdf = read.delim(file = SamData, header = TRUE, sep = '\t', row.names = "SampleID")
head(samdf)
rownames(samdf)[1:4]

Unnamed: 0,Sample,Experiment,Plant,Fungus,Soil,SampleType,Treatment,TimePoint,DAI,Rep,Concentration_ng.ul
HCBN1_S211,HCBN1,1,N1,Gv,Dryden,CS,HN,,81,1,6.08
HCBN2_S223,HCBN2,1,N2,Gv,Dryden,CS,HN,,81,2,7.58
HCBN3_S235,HCBN3,1,N3,Gv,Dryden,CS,HN,,81,3,7.05
HCBF1_S247,HCBF1,1,F1,Gv,Florence,CS,HN,,81,1,2.32
HCBF2_S259,HCBF2,1,F2,Gv,Florence,CS,HN,,81,2,2.08
HCBF3_S271,HCBF3,1,F3,Gv,Florence,CS,HN,,81,3,1.89


In [19]:
dim(samdf)

In [20]:
#rename taxa
taxa = cbind(taxa, row.names(taxa))
colnames(taxa)[7] = "Seq"

In [21]:
dim(taxa)
head(taxa)

Unnamed: 0,Kingdom,Phylum,Class,Order,Family,Genus,Seq,Unnamed: 8
TACGAAGGGGGCTAGCGTTGTTCGGATTTACTGGGCGTAAAGCGCACGTAGGCGGACTATTAAGTCAGGGGTGAAATCCCGGGGCTCAACCCCGGAACTGCCTTTGATACTGGTAGTCTTGAGTTCGAGAGAGGTGAGTGGAATTCCGAGTGTAGAGGTGAAATTCGTAGATATTCGGAGGAACACCAGTGGCGAAGGCGGCTCACTGGCTCGATACTGACGCTGAGGTGCGAAAGCGTGGGGAGCAAACAGG,Bacteria,Proteobacteria,Alphaproteobacteria,Rhizobiales,Rhizobiaceae,Phyllobacterium,TACGAAGGGGGCTAGCGTTGTTCGGATTTACTGGGCGTAAAGCGCACGTAGGCGGACTATTAAGTCAGGGGTGAAATCCCGGGGCTCAACCCCGGAACTGCCTTTGATACTGGTAGTCTTGAGTTCGAGAGAGGTGAGTGGAATTCCGAGTGTAGAGGTGAAATTCGTAGATATTCGGAGGAACACCAGTGGCGAAGGCGGCTCACTGGCTCGATACTGACGCTGAGGTGCGAAAGCGTGGGGAGCAAACAGG,TACGAAGGGGGCTAGCGTTGTTCGGATTTACTGGGCGTAAAGCGCACGTAGGCGGACTATTAAGTCAGGGGTGAAATCCCGGGGCTCAACCCCGGAACTGCCTTTGATACTGGTAGTCTTGAGTTCGAGAGAGGTGAGTGGAATTCCGAGTGTAGAGGTGAAATTCGTAGATATTCGGAGGAACACCAGTGGCGAAGGCGGCTCACTGGCTCGATACTGACGCTGAGGTGCGAAAGCGTGGGGAGCAAACAGG
GACATAGGTGGCAAACATTATCCGGAATTATTGGGCGTAAAGGGTGCGTAGGCGGCATGATAAGTTGCTGGTGGGAAATCAAGGCTCAACCTTGTGGAAGCTAGCAATACTGTCAAGCTAGAGGGCAGAAGAGGTTAACGGAACTCTATGTGGAGCGGTAAAATGTGTAGATATATAGAAGAACATCAATAAAGGCGAAGGCAGTTAACTAGTCTGTCCCTGACGTTGAGGCACGAAAGCGTGGGGAGCAAAACGG,Bacteria,,,,,,GACATAGGTGGCAAACATTATCCGGAATTATTGGGCGTAAAGGGTGCGTAGGCGGCATGATAAGTTGCTGGTGGGAAATCAAGGCTCAACCTTGTGGAAGCTAGCAATACTGTCAAGCTAGAGGGCAGAAGAGGTTAACGGAACTCTATGTGGAGCGGTAAAATGTGTAGATATATAGAAGAACATCAATAAAGGCGAAGGCAGTTAACTAGTCTGTCCCTGACGTTGAGGCACGAAAGCGTGGGGAGCAAAACGG,GACATAGGTGGCAAACATTATCCGGAATTATTGGGCGTAAAGGGTGCGTAGGCGGCATGATAAGTTGCTGGTGGGAAATCAAGGCTCAACCTTGTGGAAGCTAGCAATACTGTCAAGCTAGAGGGCAGAAGAGGTTAACGGAACTCTATGTGGAGCGGTAAAATGTGTAGATATATAGAAGAACATCAATAAAGGCGAAGGCAGTTAACTAGTCTGTCCCTGACGTTGAGGCACGAAAGCGTGGGGAGCAAAACGG
TACGGAGGGAGCTAGCGTTGTTCGGAATTACTGGGCGTAAAGCGCACGTAGGCGGCTTTGTAAGTCAGAGGTGAAAGCCTGGAGCTCAACTCCAGAACTGCCTTTGAGACTGCATCGCTTGAATCCAGGAGAGGTGAGTGGAATTCCGAGTGTAGAGGTGAAATTCGTAGATATTCGGAAGAACACCAGTGGCGAAGGCGGCTCACTGGACTGGTATTGACGCTGAGGTGCGAAAGCGTGGGGAGCAAACAGG,Bacteria,Proteobacteria,Alphaproteobacteria,Sphingomonadales,Sphingomonadaceae,Sphingomonas,TACGGAGGGAGCTAGCGTTGTTCGGAATTACTGGGCGTAAAGCGCACGTAGGCGGCTTTGTAAGTCAGAGGTGAAAGCCTGGAGCTCAACTCCAGAACTGCCTTTGAGACTGCATCGCTTGAATCCAGGAGAGGTGAGTGGAATTCCGAGTGTAGAGGTGAAATTCGTAGATATTCGGAAGAACACCAGTGGCGAAGGCGGCTCACTGGACTGGTATTGACGCTGAGGTGCGAAAGCGTGGGGAGCAAACAGG,TACGGAGGGAGCTAGCGTTGTTCGGAATTACTGGGCGTAAAGCGCACGTAGGCGGCTTTGTAAGTCAGAGGTGAAAGCCTGGAGCTCAACTCCAGAACTGCCTTTGAGACTGCATCGCTTGAATCCAGGAGAGGTGAGTGGAATTCCGAGTGTAGAGGTGAAATTCGTAGATATTCGGAAGAACACCAGTGGCGAAGGCGGCTCACTGGACTGGTATTGACGCTGAGGTGCGAAAGCGTGGGGAGCAAACAGG
GACATAGGTGGCGAACGTTATCCGGAATTATTGGGCGTAAAGGATGCGTAGATGGCAGAGTAAGTTACTGGTTGATGTCAAACTCAATTTGACGGAAGCTGGTAATACTGTTTTGCTAGAGGACAGGAGAGGTTGATGGAATTCTGTGTGGAGCGGTGAAATGCGTTGATCTACAGAGGAACACCAAAAAAGGCGAAGGCAGTCAACTATCCTGTTCCTGACATTGAGGCATGAAAGCGTGGGGAGCAAACCGG,Bacteria,,,,,,GACATAGGTGGCGAACGTTATCCGGAATTATTGGGCGTAAAGGATGCGTAGATGGCAGAGTAAGTTACTGGTTGATGTCAAACTCAATTTGACGGAAGCTGGTAATACTGTTTTGCTAGAGGACAGGAGAGGTTGATGGAATTCTGTGTGGAGCGGTGAAATGCGTTGATCTACAGAGGAACACCAAAAAAGGCGAAGGCAGTCAACTATCCTGTTCCTGACATTGAGGCATGAAAGCGTGGGGAGCAAACCGG,GACATAGGTGGCGAACGTTATCCGGAATTATTGGGCGTAAAGGATGCGTAGATGGCAGAGTAAGTTACTGGTTGATGTCAAACTCAATTTGACGGAAGCTGGTAATACTGTTTTGCTAGAGGACAGGAGAGGTTGATGGAATTCTGTGTGGAGCGGTGAAATGCGTTGATCTACAGAGGAACACCAAAAAAGGCGAAGGCAGTCAACTATCCTGTTCCTGACATTGAGGCATGAAAGCGTGGGGAGCAAACCGG
TACGTAGGGCGCAAGCGTTATCCGGAATTATTGGGCGTAAAGAGCTCGTAGGCGGTTTGTCGCGTCTGCCGTGAAAGTCCGGGGCTCAACTCCGGATCTGCGGTGGGTACGGGCAGACTAGAGTGATGTAGGGGAGACTGGAATTCCTGGTGTAGCGGTGAAATGCGCAGATATCAGGAGGAACACCGATGGCGAAGGCAGGTCTCTGGGCATTAACTGACGCTGAGGAGCGAAAGCATGGGGAGCGAACAGG,Bacteria,Actinobacteria,Actinobacteria,Micrococcales,Micrococcaceae,Pseudarthrobacter,TACGTAGGGCGCAAGCGTTATCCGGAATTATTGGGCGTAAAGAGCTCGTAGGCGGTTTGTCGCGTCTGCCGTGAAAGTCCGGGGCTCAACTCCGGATCTGCGGTGGGTACGGGCAGACTAGAGTGATGTAGGGGAGACTGGAATTCCTGGTGTAGCGGTGAAATGCGCAGATATCAGGAGGAACACCGATGGCGAAGGCAGGTCTCTGGGCATTAACTGACGCTGAGGAGCGAAAGCATGGGGAGCGAACAGG,TACGTAGGGCGCAAGCGTTATCCGGAATTATTGGGCGTAAAGAGCTCGTAGGCGGTTTGTCGCGTCTGCCGTGAAAGTCCGGGGCTCAACTCCGGATCTGCGGTGGGTACGGGCAGACTAGAGTGATGTAGGGGAGACTGGAATTCCTGGTGTAGCGGTGAAATGCGCAGATATCAGGAGGAACACCGATGGCGAAGGCAGGTCTCTGGGCATTAACTGACGCTGAGGAGCGAAAGCATGGGGAGCGAACAGG
TACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCACGTAGGTGGTTTGTTAAGCTAGCTGTGAAATCCCCGGGCTCAACCTGGGCACTGCAGTTAGAACTGGCAAGCTAGAGTAGGGTAGAGGGGTGTGGAATTCCAGGTGTAGCGGTGAAATGCGTAGATATCTGGAGGAACATCAGTGGCGAAGGCGACACCCTGGACTCATACTGACACTGAGGTGCGAAAGCGTGGGGAGCAAACAGG,Bacteria,Proteobacteria,Gammaproteobacteria,Cellvibrionales,Cellvibrionaceae,Cellvibrio,TACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCACGTAGGTGGTTTGTTAAGCTAGCTGTGAAATCCCCGGGCTCAACCTGGGCACTGCAGTTAGAACTGGCAAGCTAGAGTAGGGTAGAGGGGTGTGGAATTCCAGGTGTAGCGGTGAAATGCGTAGATATCTGGAGGAACATCAGTGGCGAAGGCGACACCCTGGACTCATACTGACACTGAGGTGCGAAAGCGTGGGGAGCAAACAGG,TACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCACGTAGGTGGTTTGTTAAGCTAGCTGTGAAATCCCCGGGCTCAACCTGGGCACTGCAGTTAGAACTGGCAAGCTAGAGTAGGGTAGAGGGGTGTGGAATTCCAGGTGTAGCGGTGAAATGCGTAGATATCTGGAGGAACATCAGTGGCGAAGGCGACACCCTGGACTCATACTGACACTGAGGTGCGAAAGCGTGGGGAGCAAACAGG


In [22]:
rownames(seqtab.nochim)[1:50]

In [23]:
rownames(seqtab.nochim) %>% length()

In [15]:
rownames(samdf) %>% length

In [24]:
toremove = setdiff(rownames(seqtab.nochim),rownames(samdf))

In [25]:
toremove

In [26]:

allsamples = rownames(samdf)
allsamples = allsamples[!(allsamples %in% toremove)]
seqtab.nochim2 = seqtab.nochim[allsamples,]
seqtab.nochim2 %>% dim()

## Create phyloseq object

In [27]:
ps <- phyloseq(otu_table(seqtab.nochim, taxa_are_rows=FALSE), 
               sample_data(samdf), 
               tax_table(taxa))
ps

phyloseq-class experiment-level object
otu_table()   OTU Table:         [ 61130 taxa and 434 samples ]
sample_data() Sample Data:       [ 434 samples by 11 sample variables ]
tax_table()   Taxonomy Table:    [ 61130 taxa by 8 taxonomic ranks ]

In [28]:
CheckOTU = taxa_names(ps)[150]

In [29]:
CheckOTU

In [30]:
ps %>% prune_taxa(CheckOTU,.) %>% otu_table()

Unnamed: 0,TACGTAGGTGGCAAGCGTTGTCCGGATTTATTGGGTTTAAAGGGTGCGTAGGCGGCCGCCTAAGTCAGTTGTGAAATACCCCGGCTCAACCGGGGGGGTGCGATTGATACTGGGCGGCTCGAGTAACAGCGAGGTTGGCGGAATTGACGGTGTAGCGGTGAAATGCATAGATATCGTCAAGAACACCGATAGCGAAGGCAGCCAACTAGGGGTTAACTGACGCTGAGGCACGAAAGTGCGGGGATCAAACAGG
10xBLS156_S303,0
2xBLS143_S297,0
5xBLS120_S300,0
BaseA_S310,0
BaseB_S291,0
BaseC_S294,0
HCBF1_S247,0
HCBF2_S259,0
HCBF3_S271,0
HCBN1_S211,0


In [31]:
sample_names(ps)[1:55]

## rename taxa

* 'TACGTAGGTGGCAAGCGTTGTCCGGATTTATTGGGTTTAAAGGGTGCGTAGGCGGCCGCCTAAGTCAGTTGTGAAATACCCCGGCTCAACCGGGGGGGTGCGATTGATACTGGGCGGCTCGAGTAACAGCGAGGTTGGCGGAATTGACGGTGTAGCGGTGAAATGCATAGATATCGTCAAGAACACCGATAGCGAAGGCAGCCAACTAGGGGTTAACTGACGCTGAGGCACGAAAGTGCGGGGATCAAACAGG' should have 14 reads in sample 84_RIN7HN1-RH

In [32]:
new.names <- paste0("ASV", seq(ntaxa(ps))) # Define new names ASV1, ASV2,
seqs <- taxa_names(ps) # Store sequences
names(seqs) <- new.names # Make map from ASV1 to full sequence
taxa_names(ps) <- new.names # Rename to human-friendly format
taxa_names(ps)[1:10]

In [33]:
subset_taxa(ps, Seq == CheckOTU, TRUE) %>% 
    prune_samples("84_RIN7HN1-RH",.) %>%
    otu_table()
    

Unnamed: 0,ASV150
84_RIN7HN1-RH,14


* taxa names assigned properly

In [34]:
taxa_sums(ps)[1:10]

## save full phyloseq object

In [35]:
saveRDS(ps, file = '~/Hyphosphere/data/3Exp/phyloseq/3Exp_phyloseq_full.rds')

In [36]:
ps = readRDS(file = '~/Hyphosphere/data/3Exp/phyloseq/3Exp_phyloseq_full.rds')

## Threshold to remove minor sequences

In [37]:
tax_table(ps) %>% head
ps.thresh = filter_taxa(ps, function(x) sum(x > 2) > 2, TRUE)
ps.thresh

Unnamed: 0,Kingdom,Phylum,Class,Order,Family,Genus,Seq,Unnamed: 8
ASV1,Bacteria,Proteobacteria,Alphaproteobacteria,Rhizobiales,Rhizobiaceae,Phyllobacterium,TACGAAGGGGGCTAGCGTTGTTCGGATTTACTGGGCGTAAAGCGCACGTAGGCGGACTATTAAGTCAGGGGTGAAATCCCGGGGCTCAACCCCGGAACTGCCTTTGATACTGGTAGTCTTGAGTTCGAGAGAGGTGAGTGGAATTCCGAGTGTAGAGGTGAAATTCGTAGATATTCGGAGGAACACCAGTGGCGAAGGCGGCTCACTGGCTCGATACTGACGCTGAGGTGCGAAAGCGTGGGGAGCAAACAGG,TACGAAGGGGGCTAGCGTTGTTCGGATTTACTGGGCGTAAAGCGCACGTAGGCGGACTATTAAGTCAGGGGTGAAATCCCGGGGCTCAACCCCGGAACTGCCTTTGATACTGGTAGTCTTGAGTTCGAGAGAGGTGAGTGGAATTCCGAGTGTAGAGGTGAAATTCGTAGATATTCGGAGGAACACCAGTGGCGAAGGCGGCTCACTGGCTCGATACTGACGCTGAGGTGCGAAAGCGTGGGGAGCAAACAGG
ASV2,Bacteria,,,,,,GACATAGGTGGCAAACATTATCCGGAATTATTGGGCGTAAAGGGTGCGTAGGCGGCATGATAAGTTGCTGGTGGGAAATCAAGGCTCAACCTTGTGGAAGCTAGCAATACTGTCAAGCTAGAGGGCAGAAGAGGTTAACGGAACTCTATGTGGAGCGGTAAAATGTGTAGATATATAGAAGAACATCAATAAAGGCGAAGGCAGTTAACTAGTCTGTCCCTGACGTTGAGGCACGAAAGCGTGGGGAGCAAAACGG,GACATAGGTGGCAAACATTATCCGGAATTATTGGGCGTAAAGGGTGCGTAGGCGGCATGATAAGTTGCTGGTGGGAAATCAAGGCTCAACCTTGTGGAAGCTAGCAATACTGTCAAGCTAGAGGGCAGAAGAGGTTAACGGAACTCTATGTGGAGCGGTAAAATGTGTAGATATATAGAAGAACATCAATAAAGGCGAAGGCAGTTAACTAGTCTGTCCCTGACGTTGAGGCACGAAAGCGTGGGGAGCAAAACGG
ASV3,Bacteria,Proteobacteria,Alphaproteobacteria,Sphingomonadales,Sphingomonadaceae,Sphingomonas,TACGGAGGGAGCTAGCGTTGTTCGGAATTACTGGGCGTAAAGCGCACGTAGGCGGCTTTGTAAGTCAGAGGTGAAAGCCTGGAGCTCAACTCCAGAACTGCCTTTGAGACTGCATCGCTTGAATCCAGGAGAGGTGAGTGGAATTCCGAGTGTAGAGGTGAAATTCGTAGATATTCGGAAGAACACCAGTGGCGAAGGCGGCTCACTGGACTGGTATTGACGCTGAGGTGCGAAAGCGTGGGGAGCAAACAGG,TACGGAGGGAGCTAGCGTTGTTCGGAATTACTGGGCGTAAAGCGCACGTAGGCGGCTTTGTAAGTCAGAGGTGAAAGCCTGGAGCTCAACTCCAGAACTGCCTTTGAGACTGCATCGCTTGAATCCAGGAGAGGTGAGTGGAATTCCGAGTGTAGAGGTGAAATTCGTAGATATTCGGAAGAACACCAGTGGCGAAGGCGGCTCACTGGACTGGTATTGACGCTGAGGTGCGAAAGCGTGGGGAGCAAACAGG
ASV4,Bacteria,,,,,,GACATAGGTGGCGAACGTTATCCGGAATTATTGGGCGTAAAGGATGCGTAGATGGCAGAGTAAGTTACTGGTTGATGTCAAACTCAATTTGACGGAAGCTGGTAATACTGTTTTGCTAGAGGACAGGAGAGGTTGATGGAATTCTGTGTGGAGCGGTGAAATGCGTTGATCTACAGAGGAACACCAAAAAAGGCGAAGGCAGTCAACTATCCTGTTCCTGACATTGAGGCATGAAAGCGTGGGGAGCAAACCGG,GACATAGGTGGCGAACGTTATCCGGAATTATTGGGCGTAAAGGATGCGTAGATGGCAGAGTAAGTTACTGGTTGATGTCAAACTCAATTTGACGGAAGCTGGTAATACTGTTTTGCTAGAGGACAGGAGAGGTTGATGGAATTCTGTGTGGAGCGGTGAAATGCGTTGATCTACAGAGGAACACCAAAAAAGGCGAAGGCAGTCAACTATCCTGTTCCTGACATTGAGGCATGAAAGCGTGGGGAGCAAACCGG
ASV5,Bacteria,Actinobacteria,Actinobacteria,Micrococcales,Micrococcaceae,Pseudarthrobacter,TACGTAGGGCGCAAGCGTTATCCGGAATTATTGGGCGTAAAGAGCTCGTAGGCGGTTTGTCGCGTCTGCCGTGAAAGTCCGGGGCTCAACTCCGGATCTGCGGTGGGTACGGGCAGACTAGAGTGATGTAGGGGAGACTGGAATTCCTGGTGTAGCGGTGAAATGCGCAGATATCAGGAGGAACACCGATGGCGAAGGCAGGTCTCTGGGCATTAACTGACGCTGAGGAGCGAAAGCATGGGGAGCGAACAGG,TACGTAGGGCGCAAGCGTTATCCGGAATTATTGGGCGTAAAGAGCTCGTAGGCGGTTTGTCGCGTCTGCCGTGAAAGTCCGGGGCTCAACTCCGGATCTGCGGTGGGTACGGGCAGACTAGAGTGATGTAGGGGAGACTGGAATTCCTGGTGTAGCGGTGAAATGCGCAGATATCAGGAGGAACACCGATGGCGAAGGCAGGTCTCTGGGCATTAACTGACGCTGAGGAGCGAAAGCATGGGGAGCGAACAGG
ASV6,Bacteria,Proteobacteria,Gammaproteobacteria,Cellvibrionales,Cellvibrionaceae,Cellvibrio,TACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCACGTAGGTGGTTTGTTAAGCTAGCTGTGAAATCCCCGGGCTCAACCTGGGCACTGCAGTTAGAACTGGCAAGCTAGAGTAGGGTAGAGGGGTGTGGAATTCCAGGTGTAGCGGTGAAATGCGTAGATATCTGGAGGAACATCAGTGGCGAAGGCGACACCCTGGACTCATACTGACACTGAGGTGCGAAAGCGTGGGGAGCAAACAGG,TACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCACGTAGGTGGTTTGTTAAGCTAGCTGTGAAATCCCCGGGCTCAACCTGGGCACTGCAGTTAGAACTGGCAAGCTAGAGTAGGGTAGAGGGGTGTGGAATTCCAGGTGTAGCGGTGAAATGCGTAGATATCTGGAGGAACATCAGTGGCGAAGGCGACACCCTGGACTCATACTGACACTGAGGTGCGAAAGCGTGGGGAGCAAACAGG


phyloseq-class experiment-level object
otu_table()   OTU Table:         [ 19559 taxa and 434 samples ]
sample_data() Sample Data:       [ 434 samples by 11 sample variables ]
tax_table()   Taxonomy Table:    [ 19559 taxa by 8 taxonomic ranks ]

In [38]:
tax_table(ps.thresh)[1:18]

Unnamed: 0,Kingdom,Phylum,Class,Order,Family,Genus,Seq,Unnamed: 8
ASV1,Bacteria,Proteobacteria,Alphaproteobacteria,Rhizobiales,Rhizobiaceae,Phyllobacterium,TACGAAGGGGGCTAGCGTTGTTCGGATTTACTGGGCGTAAAGCGCACGTAGGCGGACTATTAAGTCAGGGGTGAAATCCCGGGGCTCAACCCCGGAACTGCCTTTGATACTGGTAGTCTTGAGTTCGAGAGAGGTGAGTGGAATTCCGAGTGTAGAGGTGAAATTCGTAGATATTCGGAGGAACACCAGTGGCGAAGGCGGCTCACTGGCTCGATACTGACGCTGAGGTGCGAAAGCGTGGGGAGCAAACAGG,TACGAAGGGGGCTAGCGTTGTTCGGATTTACTGGGCGTAAAGCGCACGTAGGCGGACTATTAAGTCAGGGGTGAAATCCCGGGGCTCAACCCCGGAACTGCCTTTGATACTGGTAGTCTTGAGTTCGAGAGAGGTGAGTGGAATTCCGAGTGTAGAGGTGAAATTCGTAGATATTCGGAGGAACACCAGTGGCGAAGGCGGCTCACTGGCTCGATACTGACGCTGAGGTGCGAAAGCGTGGGGAGCAAACAGG
ASV2,Bacteria,,,,,,GACATAGGTGGCAAACATTATCCGGAATTATTGGGCGTAAAGGGTGCGTAGGCGGCATGATAAGTTGCTGGTGGGAAATCAAGGCTCAACCTTGTGGAAGCTAGCAATACTGTCAAGCTAGAGGGCAGAAGAGGTTAACGGAACTCTATGTGGAGCGGTAAAATGTGTAGATATATAGAAGAACATCAATAAAGGCGAAGGCAGTTAACTAGTCTGTCCCTGACGTTGAGGCACGAAAGCGTGGGGAGCAAAACGG,GACATAGGTGGCAAACATTATCCGGAATTATTGGGCGTAAAGGGTGCGTAGGCGGCATGATAAGTTGCTGGTGGGAAATCAAGGCTCAACCTTGTGGAAGCTAGCAATACTGTCAAGCTAGAGGGCAGAAGAGGTTAACGGAACTCTATGTGGAGCGGTAAAATGTGTAGATATATAGAAGAACATCAATAAAGGCGAAGGCAGTTAACTAGTCTGTCCCTGACGTTGAGGCACGAAAGCGTGGGGAGCAAAACGG
ASV3,Bacteria,Proteobacteria,Alphaproteobacteria,Sphingomonadales,Sphingomonadaceae,Sphingomonas,TACGGAGGGAGCTAGCGTTGTTCGGAATTACTGGGCGTAAAGCGCACGTAGGCGGCTTTGTAAGTCAGAGGTGAAAGCCTGGAGCTCAACTCCAGAACTGCCTTTGAGACTGCATCGCTTGAATCCAGGAGAGGTGAGTGGAATTCCGAGTGTAGAGGTGAAATTCGTAGATATTCGGAAGAACACCAGTGGCGAAGGCGGCTCACTGGACTGGTATTGACGCTGAGGTGCGAAAGCGTGGGGAGCAAACAGG,TACGGAGGGAGCTAGCGTTGTTCGGAATTACTGGGCGTAAAGCGCACGTAGGCGGCTTTGTAAGTCAGAGGTGAAAGCCTGGAGCTCAACTCCAGAACTGCCTTTGAGACTGCATCGCTTGAATCCAGGAGAGGTGAGTGGAATTCCGAGTGTAGAGGTGAAATTCGTAGATATTCGGAAGAACACCAGTGGCGAAGGCGGCTCACTGGACTGGTATTGACGCTGAGGTGCGAAAGCGTGGGGAGCAAACAGG
ASV4,Bacteria,,,,,,GACATAGGTGGCGAACGTTATCCGGAATTATTGGGCGTAAAGGATGCGTAGATGGCAGAGTAAGTTACTGGTTGATGTCAAACTCAATTTGACGGAAGCTGGTAATACTGTTTTGCTAGAGGACAGGAGAGGTTGATGGAATTCTGTGTGGAGCGGTGAAATGCGTTGATCTACAGAGGAACACCAAAAAAGGCGAAGGCAGTCAACTATCCTGTTCCTGACATTGAGGCATGAAAGCGTGGGGAGCAAACCGG,GACATAGGTGGCGAACGTTATCCGGAATTATTGGGCGTAAAGGATGCGTAGATGGCAGAGTAAGTTACTGGTTGATGTCAAACTCAATTTGACGGAAGCTGGTAATACTGTTTTGCTAGAGGACAGGAGAGGTTGATGGAATTCTGTGTGGAGCGGTGAAATGCGTTGATCTACAGAGGAACACCAAAAAAGGCGAAGGCAGTCAACTATCCTGTTCCTGACATTGAGGCATGAAAGCGTGGGGAGCAAACCGG
ASV5,Bacteria,Actinobacteria,Actinobacteria,Micrococcales,Micrococcaceae,Pseudarthrobacter,TACGTAGGGCGCAAGCGTTATCCGGAATTATTGGGCGTAAAGAGCTCGTAGGCGGTTTGTCGCGTCTGCCGTGAAAGTCCGGGGCTCAACTCCGGATCTGCGGTGGGTACGGGCAGACTAGAGTGATGTAGGGGAGACTGGAATTCCTGGTGTAGCGGTGAAATGCGCAGATATCAGGAGGAACACCGATGGCGAAGGCAGGTCTCTGGGCATTAACTGACGCTGAGGAGCGAAAGCATGGGGAGCGAACAGG,TACGTAGGGCGCAAGCGTTATCCGGAATTATTGGGCGTAAAGAGCTCGTAGGCGGTTTGTCGCGTCTGCCGTGAAAGTCCGGGGCTCAACTCCGGATCTGCGGTGGGTACGGGCAGACTAGAGTGATGTAGGGGAGACTGGAATTCCTGGTGTAGCGGTGAAATGCGCAGATATCAGGAGGAACACCGATGGCGAAGGCAGGTCTCTGGGCATTAACTGACGCTGAGGAGCGAAAGCATGGGGAGCGAACAGG
ASV6,Bacteria,Proteobacteria,Gammaproteobacteria,Cellvibrionales,Cellvibrionaceae,Cellvibrio,TACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCACGTAGGTGGTTTGTTAAGCTAGCTGTGAAATCCCCGGGCTCAACCTGGGCACTGCAGTTAGAACTGGCAAGCTAGAGTAGGGTAGAGGGGTGTGGAATTCCAGGTGTAGCGGTGAAATGCGTAGATATCTGGAGGAACATCAGTGGCGAAGGCGACACCCTGGACTCATACTGACACTGAGGTGCGAAAGCGTGGGGAGCAAACAGG,TACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCACGTAGGTGGTTTGTTAAGCTAGCTGTGAAATCCCCGGGCTCAACCTGGGCACTGCAGTTAGAACTGGCAAGCTAGAGTAGGGTAGAGGGGTGTGGAATTCCAGGTGTAGCGGTGAAATGCGTAGATATCTGGAGGAACATCAGTGGCGAAGGCGACACCCTGGACTCATACTGACACTGAGGTGCGAAAGCGTGGGGAGCAAACAGG
ASV7,Bacteria,Proteobacteria,Alphaproteobacteria,Rickettsiales,Mitochondria,,GACGGGGGGGGCAAGTGTTCTTCGGAATGACTGGGCGTAAAGGGCACGTAGGCGGTGAATCGGGTTGAAAGTGAAAGTCGCCAAAAAGTGGCGGAATGCTCTCGAAACCAATTCACTTGAGTGAGACAGAGGAGAGTGGAATTTCGTGTGTAGGGGTGAAATCCGTAGATCTACGAAGGAACGCCAAAAGCGAAGGCAGCTCTCTGGGTCCCTACCGACGCTGGGGTGCGAAAGCATGGGGAGCGAACAGG,GACGGGGGGGGCAAGTGTTCTTCGGAATGACTGGGCGTAAAGGGCACGTAGGCGGTGAATCGGGTTGAAAGTGAAAGTCGCCAAAAAGTGGCGGAATGCTCTCGAAACCAATTCACTTGAGTGAGACAGAGGAGAGTGGAATTTCGTGTGTAGGGGTGAAATCCGTAGATCTACGAAGGAACGCCAAAAGCGAAGGCAGCTCTCTGGGTCCCTACCGACGCTGGGGTGCGAAAGCATGGGGAGCGAACAGG
ASV8,Bacteria,Proteobacteria,Alphaproteobacteria,Caulobacterales,Caulobacteraceae,Asticcacaulis,TACGAAGGGGGCTAGCGTTGCTCGGAATTACTGGGCGTAAAGGGAGCGTAGGCGGGTTATCAAGTTGGAGGTGAAAGCCCAGGGCTCAACCTTGGAATTGCCTTCAAAACTGATAACCTAGAGGATGATAGAGGTAAGTGGAACTCCGAGTGTAGAGGTGAAATTCGTAGATATTCGGAAGAACACCAGTGGCGAAGGCGACTTACTGGATCATTACTGACGCTGAGGCTCGAAAGCGTGGGGAGCAAACAGG,TACGAAGGGGGCTAGCGTTGCTCGGAATTACTGGGCGTAAAGGGAGCGTAGGCGGGTTATCAAGTTGGAGGTGAAAGCCCAGGGCTCAACCTTGGAATTGCCTTCAAAACTGATAACCTAGAGGATGATAGAGGTAAGTGGAACTCCGAGTGTAGAGGTGAAATTCGTAGATATTCGGAAGAACACCAGTGGCGAAGGCGACTTACTGGATCATTACTGACGCTGAGGCTCGAAAGCGTGGGGAGCAAACAGG
ASV9,Bacteria,Proteobacteria,Alphaproteobacteria,Rickettsiales,Mitochondria,,TACGGGAGGAGCGAGCATTATTCGGAATGATTAGGCGTAAAGGGTTTGTAGGTGGTTTTTTAAGTTGAAAAAAACAGATTAAAGCTCAACTTTATAAATTTTTTCAAAACTGAGAAACTTGAGTATAAATAGAGGATAATGGAATTTCTATTGGAGGGATAAAATACGTTGATAATAGAAGGAAGGCCTAAAGCGAAGGCAATTATCTGGGTATATACTGACACTGAGAAACGAAAGCTTGGGTAGCAAACGGG,TACGGGAGGAGCGAGCATTATTCGGAATGATTAGGCGTAAAGGGTTTGTAGGTGGTTTTTTAAGTTGAAAAAAACAGATTAAAGCTCAACTTTATAAATTTTTTCAAAACTGAGAAACTTGAGTATAAATAGAGGATAATGGAATTTCTATTGGAGGGATAAAATACGTTGATAATAGAAGGAAGGCCTAAAGCGAAGGCAATTATCTGGGTATATACTGACACTGAGAAACGAAAGCTTGGGTAGCAAACGGG
ASV10,Bacteria,Cyanobacteria,Oxyphotobacteria,Chloroplast,,,GACAGAGGATGCAAGCGTTATCCGGAATGATTGGGCGTAAAGCGTCTGTAGGTGGCTTTTCAAGTCCGCCGTCAAATCCCAGGGCTCAACCCTGGACAGGCGGTGGAAACTACCAAGCTGGAGTACGGTAGGGGCAGAGGGAATTTCCGGTGGAGCGGTGAAATGCATTGAGATCGGAAAGAACACCAACGGCGAAAGCACTCTGCTGGGCCGACACTGACACTGAGAGACGAAAGCTAGGGGAGCAAATGGG,GACAGAGGATGCAAGCGTTATCCGGAATGATTGGGCGTAAAGCGTCTGTAGGTGGCTTTTCAAGTCCGCCGTCAAATCCCAGGGCTCAACCCTGGACAGGCGGTGGAAACTACCAAGCTGGAGTACGGTAGGGGCAGAGGGAATTTCCGGTGGAGCGGTGAAATGCATTGAGATCGGAAAGAACACCAACGGCGAAAGCACTCTGCTGGGCCGACACTGACACTGAGAGACGAAAGCTAGGGGAGCAAATGGG


In [39]:
rm(ps)

## Remove and save sequences from tax_table
*removing sequences will greatly speed up psmelt and subsequent operations

In [40]:
Seqs_df = cbind(rownames(tax_table(ps.thresh)), tax_table(ps.thresh)[,'Seq'])

In [41]:
colnames(Seqs_df)[1:2] = c("ASV", "Seq")
head(Seqs_df)


Unnamed: 0,ASV,Seq
ASV1,ASV1,TACGAAGGGGGCTAGCGTTGTTCGGATTTACTGGGCGTAAAGCGCACGTAGGCGGACTATTAAGTCAGGGGTGAAATCCCGGGGCTCAACCCCGGAACTGCCTTTGATACTGGTAGTCTTGAGTTCGAGAGAGGTGAGTGGAATTCCGAGTGTAGAGGTGAAATTCGTAGATATTCGGAGGAACACCAGTGGCGAAGGCGGCTCACTGGCTCGATACTGACGCTGAGGTGCGAAAGCGTGGGGAGCAAACAGG
ASV2,ASV2,GACATAGGTGGCAAACATTATCCGGAATTATTGGGCGTAAAGGGTGCGTAGGCGGCATGATAAGTTGCTGGTGGGAAATCAAGGCTCAACCTTGTGGAAGCTAGCAATACTGTCAAGCTAGAGGGCAGAAGAGGTTAACGGAACTCTATGTGGAGCGGTAAAATGTGTAGATATATAGAAGAACATCAATAAAGGCGAAGGCAGTTAACTAGTCTGTCCCTGACGTTGAGGCACGAAAGCGTGGGGAGCAAAACGG
ASV3,ASV3,TACGGAGGGAGCTAGCGTTGTTCGGAATTACTGGGCGTAAAGCGCACGTAGGCGGCTTTGTAAGTCAGAGGTGAAAGCCTGGAGCTCAACTCCAGAACTGCCTTTGAGACTGCATCGCTTGAATCCAGGAGAGGTGAGTGGAATTCCGAGTGTAGAGGTGAAATTCGTAGATATTCGGAAGAACACCAGTGGCGAAGGCGGCTCACTGGACTGGTATTGACGCTGAGGTGCGAAAGCGTGGGGAGCAAACAGG
ASV4,ASV4,GACATAGGTGGCGAACGTTATCCGGAATTATTGGGCGTAAAGGATGCGTAGATGGCAGAGTAAGTTACTGGTTGATGTCAAACTCAATTTGACGGAAGCTGGTAATACTGTTTTGCTAGAGGACAGGAGAGGTTGATGGAATTCTGTGTGGAGCGGTGAAATGCGTTGATCTACAGAGGAACACCAAAAAAGGCGAAGGCAGTCAACTATCCTGTTCCTGACATTGAGGCATGAAAGCGTGGGGAGCAAACCGG
ASV5,ASV5,TACGTAGGGCGCAAGCGTTATCCGGAATTATTGGGCGTAAAGAGCTCGTAGGCGGTTTGTCGCGTCTGCCGTGAAAGTCCGGGGCTCAACTCCGGATCTGCGGTGGGTACGGGCAGACTAGAGTGATGTAGGGGAGACTGGAATTCCTGGTGTAGCGGTGAAATGCGCAGATATCAGGAGGAACACCGATGGCGAAGGCAGGTCTCTGGGCATTAACTGACGCTGAGGAGCGAAAGCATGGGGAGCGAACAGG
ASV6,ASV6,TACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCACGTAGGTGGTTTGTTAAGCTAGCTGTGAAATCCCCGGGCTCAACCTGGGCACTGCAGTTAGAACTGGCAAGCTAGAGTAGGGTAGAGGGGTGTGGAATTCCAGGTGTAGCGGTGAAATGCGTAGATATCTGGAGGAACATCAGTGGCGAAGGCGACACCCTGGACTCATACTGACACTGAGGTGCGAAAGCGTGGGGAGCAAACAGG


In [42]:
taxa_df = tax_table(ps.thresh)[,1:6]
head(taxa_df)


Unnamed: 0,Kingdom,Phylum,Class,Order,Family,Genus
ASV1,Bacteria,Proteobacteria,Alphaproteobacteria,Rhizobiales,Rhizobiaceae,Phyllobacterium
ASV2,Bacteria,,,,,
ASV3,Bacteria,Proteobacteria,Alphaproteobacteria,Sphingomonadales,Sphingomonadaceae,Sphingomonas
ASV4,Bacteria,,,,,
ASV5,Bacteria,Actinobacteria,Actinobacteria,Micrococcales,Micrococcaceae,Pseudarthrobacter
ASV6,Bacteria,Proteobacteria,Gammaproteobacteria,Cellvibrionales,Cellvibrionaceae,Cellvibrio


In [43]:
#save table of seqs
write.table(Seqs_df, file = '~/Hyphosphere/taxa_seqs.txt', sep = '\t')

In [45]:
# save fasta file of seqs
outfile = '~/Hyphosphere/seqs_thresh.fasta'

SeqNames = Seqs_df[,'ASV'] %>%
    as.list()
SeqNames[1:4]
seqs = Seqs_df[,'Seq'] %>% as.list()
seqs[1:4]
seqinr::write.fasta(sequences = as.list(seqs), names = SeqNames, file.out = outfile)

## Save thresholded phyloseq with simplified taxa_table

In [46]:
head(taxa_df)

Unnamed: 0,Kingdom,Phylum,Class,Order,Family,Genus
ASV1,Bacteria,Proteobacteria,Alphaproteobacteria,Rhizobiales,Rhizobiaceae,Phyllobacterium
ASV2,Bacteria,,,,,
ASV3,Bacteria,Proteobacteria,Alphaproteobacteria,Sphingomonadales,Sphingomonadaceae,Sphingomonas
ASV4,Bacteria,,,,,
ASV5,Bacteria,Actinobacteria,Actinobacteria,Micrococcales,Micrococcaceae,Pseudarthrobacter
ASV6,Bacteria,Proteobacteria,Gammaproteobacteria,Cellvibrionales,Cellvibrionaceae,Cellvibrio


In [47]:
tax_table(ps.thresh) %>% head

Unnamed: 0,Kingdom,Phylum,Class,Order,Family,Genus,Seq,Unnamed: 8
ASV1,Bacteria,Proteobacteria,Alphaproteobacteria,Rhizobiales,Rhizobiaceae,Phyllobacterium,TACGAAGGGGGCTAGCGTTGTTCGGATTTACTGGGCGTAAAGCGCACGTAGGCGGACTATTAAGTCAGGGGTGAAATCCCGGGGCTCAACCCCGGAACTGCCTTTGATACTGGTAGTCTTGAGTTCGAGAGAGGTGAGTGGAATTCCGAGTGTAGAGGTGAAATTCGTAGATATTCGGAGGAACACCAGTGGCGAAGGCGGCTCACTGGCTCGATACTGACGCTGAGGTGCGAAAGCGTGGGGAGCAAACAGG,TACGAAGGGGGCTAGCGTTGTTCGGATTTACTGGGCGTAAAGCGCACGTAGGCGGACTATTAAGTCAGGGGTGAAATCCCGGGGCTCAACCCCGGAACTGCCTTTGATACTGGTAGTCTTGAGTTCGAGAGAGGTGAGTGGAATTCCGAGTGTAGAGGTGAAATTCGTAGATATTCGGAGGAACACCAGTGGCGAAGGCGGCTCACTGGCTCGATACTGACGCTGAGGTGCGAAAGCGTGGGGAGCAAACAGG
ASV2,Bacteria,,,,,,GACATAGGTGGCAAACATTATCCGGAATTATTGGGCGTAAAGGGTGCGTAGGCGGCATGATAAGTTGCTGGTGGGAAATCAAGGCTCAACCTTGTGGAAGCTAGCAATACTGTCAAGCTAGAGGGCAGAAGAGGTTAACGGAACTCTATGTGGAGCGGTAAAATGTGTAGATATATAGAAGAACATCAATAAAGGCGAAGGCAGTTAACTAGTCTGTCCCTGACGTTGAGGCACGAAAGCGTGGGGAGCAAAACGG,GACATAGGTGGCAAACATTATCCGGAATTATTGGGCGTAAAGGGTGCGTAGGCGGCATGATAAGTTGCTGGTGGGAAATCAAGGCTCAACCTTGTGGAAGCTAGCAATACTGTCAAGCTAGAGGGCAGAAGAGGTTAACGGAACTCTATGTGGAGCGGTAAAATGTGTAGATATATAGAAGAACATCAATAAAGGCGAAGGCAGTTAACTAGTCTGTCCCTGACGTTGAGGCACGAAAGCGTGGGGAGCAAAACGG
ASV3,Bacteria,Proteobacteria,Alphaproteobacteria,Sphingomonadales,Sphingomonadaceae,Sphingomonas,TACGGAGGGAGCTAGCGTTGTTCGGAATTACTGGGCGTAAAGCGCACGTAGGCGGCTTTGTAAGTCAGAGGTGAAAGCCTGGAGCTCAACTCCAGAACTGCCTTTGAGACTGCATCGCTTGAATCCAGGAGAGGTGAGTGGAATTCCGAGTGTAGAGGTGAAATTCGTAGATATTCGGAAGAACACCAGTGGCGAAGGCGGCTCACTGGACTGGTATTGACGCTGAGGTGCGAAAGCGTGGGGAGCAAACAGG,TACGGAGGGAGCTAGCGTTGTTCGGAATTACTGGGCGTAAAGCGCACGTAGGCGGCTTTGTAAGTCAGAGGTGAAAGCCTGGAGCTCAACTCCAGAACTGCCTTTGAGACTGCATCGCTTGAATCCAGGAGAGGTGAGTGGAATTCCGAGTGTAGAGGTGAAATTCGTAGATATTCGGAAGAACACCAGTGGCGAAGGCGGCTCACTGGACTGGTATTGACGCTGAGGTGCGAAAGCGTGGGGAGCAAACAGG
ASV4,Bacteria,,,,,,GACATAGGTGGCGAACGTTATCCGGAATTATTGGGCGTAAAGGATGCGTAGATGGCAGAGTAAGTTACTGGTTGATGTCAAACTCAATTTGACGGAAGCTGGTAATACTGTTTTGCTAGAGGACAGGAGAGGTTGATGGAATTCTGTGTGGAGCGGTGAAATGCGTTGATCTACAGAGGAACACCAAAAAAGGCGAAGGCAGTCAACTATCCTGTTCCTGACATTGAGGCATGAAAGCGTGGGGAGCAAACCGG,GACATAGGTGGCGAACGTTATCCGGAATTATTGGGCGTAAAGGATGCGTAGATGGCAGAGTAAGTTACTGGTTGATGTCAAACTCAATTTGACGGAAGCTGGTAATACTGTTTTGCTAGAGGACAGGAGAGGTTGATGGAATTCTGTGTGGAGCGGTGAAATGCGTTGATCTACAGAGGAACACCAAAAAAGGCGAAGGCAGTCAACTATCCTGTTCCTGACATTGAGGCATGAAAGCGTGGGGAGCAAACCGG
ASV5,Bacteria,Actinobacteria,Actinobacteria,Micrococcales,Micrococcaceae,Pseudarthrobacter,TACGTAGGGCGCAAGCGTTATCCGGAATTATTGGGCGTAAAGAGCTCGTAGGCGGTTTGTCGCGTCTGCCGTGAAAGTCCGGGGCTCAACTCCGGATCTGCGGTGGGTACGGGCAGACTAGAGTGATGTAGGGGAGACTGGAATTCCTGGTGTAGCGGTGAAATGCGCAGATATCAGGAGGAACACCGATGGCGAAGGCAGGTCTCTGGGCATTAACTGACGCTGAGGAGCGAAAGCATGGGGAGCGAACAGG,TACGTAGGGCGCAAGCGTTATCCGGAATTATTGGGCGTAAAGAGCTCGTAGGCGGTTTGTCGCGTCTGCCGTGAAAGTCCGGGGCTCAACTCCGGATCTGCGGTGGGTACGGGCAGACTAGAGTGATGTAGGGGAGACTGGAATTCCTGGTGTAGCGGTGAAATGCGCAGATATCAGGAGGAACACCGATGGCGAAGGCAGGTCTCTGGGCATTAACTGACGCTGAGGAGCGAAAGCATGGGGAGCGAACAGG
ASV6,Bacteria,Proteobacteria,Gammaproteobacteria,Cellvibrionales,Cellvibrionaceae,Cellvibrio,TACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCACGTAGGTGGTTTGTTAAGCTAGCTGTGAAATCCCCGGGCTCAACCTGGGCACTGCAGTTAGAACTGGCAAGCTAGAGTAGGGTAGAGGGGTGTGGAATTCCAGGTGTAGCGGTGAAATGCGTAGATATCTGGAGGAACATCAGTGGCGAAGGCGACACCCTGGACTCATACTGACACTGAGGTGCGAAAGCGTGGGGAGCAAACAGG,TACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCACGTAGGTGGTTTGTTAAGCTAGCTGTGAAATCCCCGGGCTCAACCTGGGCACTGCAGTTAGAACTGGCAAGCTAGAGTAGGGTAGAGGGGTGTGGAATTCCAGGTGTAGCGGTGAAATGCGTAGATATCTGGAGGAACATCAGTGGCGAAGGCGACACCCTGGACTCATACTGACACTGAGGTGCGAAAGCGTGGGGAGCAAACAGG


In [48]:
tax_table(ps.thresh) = taxa_df
head(tax_table(ps.thresh))

Unnamed: 0,Kingdom,Phylum,Class,Order,Family,Genus
ASV1,Bacteria,Proteobacteria,Alphaproteobacteria,Rhizobiales,Rhizobiaceae,Phyllobacterium
ASV2,Bacteria,,,,,
ASV3,Bacteria,Proteobacteria,Alphaproteobacteria,Sphingomonadales,Sphingomonadaceae,Sphingomonas
ASV4,Bacteria,,,,,
ASV5,Bacteria,Actinobacteria,Actinobacteria,Micrococcales,Micrococcaceae,Pseudarthrobacter
ASV6,Bacteria,Proteobacteria,Gammaproteobacteria,Cellvibrionales,Cellvibrionaceae,Cellvibrio


In [49]:
ps.thresh

phyloseq-class experiment-level object
otu_table()   OTU Table:         [ 19559 taxa and 434 samples ]
sample_data() Sample Data:       [ 434 samples by 11 sample variables ]
tax_table()   Taxonomy Table:    [ 19559 taxa by 6 taxonomic ranks ]

In [50]:
saveRDS(ps.thresh, file = '~/Hyphosphere/data/3Exp/phyloseq/3Exp_phyloseq_thresh.rds')

# Next steps: sequences will be used to build a tree in a python script and then tree will be re-united with phyloseq object in subsequent notebook

In [51]:
sessionInfo()

R version 3.6.0 (2019-04-26)
Platform: x86_64-conda_cos6-linux-gnu (64-bit)
Running under: Ubuntu 16.04.6 LTS

Matrix products: default
BLAS/LAPACK: /data/home/be68/anaconda3/envs/MyR/lib/R/lib/libRblas.so

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
[1] ggplot2_3.2.1   phyloseq_1.28.0 dplyr_1.0.0     tidyr_1.1.0    
[5] dada2_1.10.0    Rcpp_1.0.2     

loaded via a namespace (and not attached):
 [1] Biobase_2.44.0              splines_3.6.0              
 [3] jsonlite_1.6                foreach_1.4.7              
 [5] RcppParallel_4.4.2          sta