# Finalize merged dataset
* Incorporate phylogenetic tree into phyloseq object
* Save master and split versions

In [1]:
library(dada2)
library(tidyr)
library(dplyr)
library(phyloseq)
library(ggplot2)

Loading required package: Rcpp

Attaching package: ‘dplyr’

The following objects are masked from ‘package:stats’:

    filter, lag

The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union



In [12]:
ps.thresh = readRDS('~/Hyphosphere/data/3Exp/phyloseq/3Exp_phyloseq_thresh.rds')
ps.thresh

phyloseq-class experiment-level object
otu_table()   OTU Table:         [ 14615 taxa and 382 samples ]
sample_data() Sample Data:       [ 382 samples by 11 sample variables ]
tax_table()   Taxonomy Table:    [ 14615 taxa by 6 taxonomic ranks ]

In [13]:
thresh.tree = read_tree(treefile = '~/Hyphosphere/data/3Exp/Fasttree/Master.tree')
ps.thresh = merge_phyloseq(ps.thresh, thresh.tree)

In [14]:
ps.thresh

phyloseq-class experiment-level object
otu_table()   OTU Table:         [ 14457 taxa and 382 samples ]
sample_data() Sample Data:       [ 382 samples by 11 sample variables ]
tax_table()   Taxonomy Table:    [ 14457 taxa by 6 taxonomic ranks ]
phy_tree()    Phylogenetic Tree: [ 14457 tips and 14423 internal nodes ]

* note that some sequences are lost in the alignment stage because they are not predicted to align to any model

In [15]:
saveRDS(ps.thresh, '~/Hyphosphere/data/3Exp/phyloseq/3Exp_phyloseq_thresh_wtree.rds')


In [17]:
ps.thresh = readRDS('~/Hyphosphere/data/3Exp/phyloseq/3Exp_phyloseq_thresh_wtree.rds')
ps.thresh

phyloseq-class experiment-level object
otu_table()   OTU Table:         [ 14457 taxa and 382 samples ]
sample_data() Sample Data:       [ 382 samples by 11 sample variables ]
tax_table()   Taxonomy Table:    [ 14457 taxa by 6 taxonomic ranks ]
phy_tree()    Phylogenetic Tree: [ 14457 tips and 14423 internal nodes ]

In [18]:
tax_table(ps.thresh) %>% head()

Unnamed: 0,Kingdom,Phylum,Class,Order,Family,Genus
ASV6971,Bacteria,,,,,
ASV5742,Bacteria,,,,,
ASV2480,Bacteria,,,,,
ASV15180,Bacteria,Acidobacteria,Subgroup_20,,,
ASV9985,Bacteria,Acidobacteria,Subgroup_20,,,
ASV22021,Bacteria,Acidobacteria,Subgroup_20,,,


# Global preprocessing

In [19]:
mitochondria = subset_taxa(ps.thresh, Family == "Mitochondria")
mitochondria


phyloseq-class experiment-level object
otu_table()   OTU Table:         [ 20 taxa and 382 samples ]
sample_data() Sample Data:       [ 382 samples by 11 sample variables ]
tax_table()   Taxonomy Table:    [ 20 taxa by 6 taxonomic ranks ]
phy_tree()    Phylogenetic Tree: [ 20 tips and 19 internal nodes ]

In [21]:
#remove mitochondria
ps.thresh2 = subset_taxa(ps.thresh, Family != "Mitochondria" | is.na(Family))
ps.thresh2

phyloseq-class experiment-level object
otu_table()   OTU Table:         [ 14437 taxa and 382 samples ]
sample_data() Sample Data:       [ 382 samples by 11 sample variables ]
tax_table()   Taxonomy Table:    [ 14437 taxa by 6 taxonomic ranks ]
phy_tree()    Phylogenetic Tree: [ 14437 tips and 14403 internal nodes ]

# Save subsets

In [35]:
# Save Exp11 (experiment 3)
ps.exp3 = ps.thresh2 %>% subset_samples(Experiment == "3")
ps.exp3 = filter_taxa(ps.exp3, function(x) sum(x > 0) > 0, TRUE)
ps.exp3

phyloseq-class experiment-level object
otu_table()   OTU Table:         [ 12983 taxa and 192 samples ]
sample_data() Sample Data:       [ 192 samples by 11 sample variables ]
tax_table()   Taxonomy Table:    [ 12983 taxa by 6 taxonomic ranks ]
phy_tree()    Phylogenetic Tree: [ 12983 tips and 12953 internal nodes ]

In [36]:
saveRDS(ps.exp3, file = '~/Hyphosphere/Exp11/Exp11_phyloseq.rds')