# Finalize merged dataset
* Incorporate phylogenetic tree into phyloseq object
* Save master and split versions

In [1]:
library(dada2)
library(tidyr)
library(dplyr)
library(phyloseq)
library(ggplot2)

Loading required package: Rcpp

Attaching package: ‘dplyr’

The following objects are masked from ‘package:stats’:

    filter, lag

The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union



In [2]:
ps.thresh = readRDS('~/Hyphosphere/data/3Exp/phyloseq/3Exp_phyloseq_thresh.rds')
ps.thresh

phyloseq-class experiment-level object
otu_table()   OTU Table:         [ 15605 taxa and 435 samples ]
sample_data() Sample Data:       [ 435 samples by 11 sample variables ]
tax_table()   Taxonomy Table:    [ 15605 taxa by 6 taxonomic ranks ]

In [3]:
thresh.tree = read_tree(treefile = '~/Hyphosphere/data/3Exp/Fasttree/Master.tree')
ps.thresh = merge_phyloseq(ps.thresh, thresh.tree)

In [4]:
ps.thresh

phyloseq-class experiment-level object
otu_table()   OTU Table:         [ 15434 taxa and 435 samples ]
sample_data() Sample Data:       [ 435 samples by 11 sample variables ]
tax_table()   Taxonomy Table:    [ 15434 taxa by 6 taxonomic ranks ]
phy_tree()    Phylogenetic Tree: [ 15434 tips and 15394 internal nodes ]

* note that some sequences are lost in the alignment stage because they are not predicted to align to any model

In [5]:
saveRDS(ps.thresh, '~/Hyphosphere/data/3Exp/phyloseq/3Exp_phyloseq_thresh_wtree.rds')


In [6]:
ps.thresh = readRDS('~/Hyphosphere3/data/3Exp/phyloseq/3Exp_phyloseq_thresh_wtree.rds')
ps.thresh

phyloseq-class experiment-level object
otu_table()   OTU Table:         [ 15434 taxa and 435 samples ]
sample_data() Sample Data:       [ 435 samples by 11 sample variables ]
tax_table()   Taxonomy Table:    [ 15434 taxa by 6 taxonomic ranks ]
phy_tree()    Phylogenetic Tree: [ 15434 tips and 15394 internal nodes ]

In [7]:
tax_table(ps.thresh) %>% head()

Unnamed: 0,Kingdom,Phylum,Class,Order,Family,Genus
ASV5742,Bacteria,,,,,
ASV6971,Bacteria,,,,,
ASV2480,Bacteria,,,,,
ASV15180,Bacteria,Acidobacteria,Subgroup_20,,,
ASV9985,Bacteria,Acidobacteria,Subgroup_20,,,
ASV22021,Bacteria,Acidobacteria,Subgroup_20,,,


# Global preprocessing

In [8]:
mitochondria = subset_taxa(ps.thresh, Family == "Mitochondria")
mitochondria


phyloseq-class experiment-level object
otu_table()   OTU Table:         [ 20 taxa and 435 samples ]
sample_data() Sample Data:       [ 435 samples by 11 sample variables ]
tax_table()   Taxonomy Table:    [ 20 taxa by 6 taxonomic ranks ]
phy_tree()    Phylogenetic Tree: [ 20 tips and 19 internal nodes ]

In [9]:
#remove mitochondria
ps.thresh2 = subset_taxa(ps.thresh, Family != "Mitochondria" | is.na(Family))
ps.thresh2

phyloseq-class experiment-level object
otu_table()   OTU Table:         [ 15414 taxa and 435 samples ]
sample_data() Sample Data:       [ 435 samples by 11 sample variables ]
tax_table()   Taxonomy Table:    [ 15414 taxa by 6 taxonomic ranks ]
phy_tree()    Phylogenetic Tree: [ 15414 tips and 15374 internal nodes ]

# Save subsets

In [10]:
# Save Exp11 (experiment 3)
ps.exp3 = ps.thresh2 %>% subset_samples(Experiment == "3")
ps.exp3 = filter_taxa(ps.exp3, function(x) sum(x > 0) > 0, TRUE)
ps.exp3

phyloseq-class experiment-level object
otu_table()   OTU Table:         [ 13325 taxa and 192 samples ]
sample_data() Sample Data:       [ 192 samples by 11 sample variables ]
tax_table()   Taxonomy Table:    [ 13325 taxa by 6 taxonomic ranks ]
phy_tree()    Phylogenetic Tree: [ 13325 tips and 13292 internal nodes ]

In [11]:
saveRDS(ps.exp3, file = '~/Hyphosphere/Exp11/Exp11_phyloseq.rds')

In [12]:
sample_data(ps.thresh2)[,"Sample"] %>% unique() #[,'Experiment'] %>% unique()

Unnamed: 0,Sample
10xBLS156_S303,10xBLS156
2xBLS143_S297,2xBLS143
5xBLS120_S300,5xBLS120
BaseA_S310,BaseA
BaseB_S291,BaseB
BaseC_S294,BaseC
HCBF1_S247,HCBF1
HCBF2_S259,HCBF2
HCBF3_S271,HCBF3
HCBN1_S211,HCBN1


In [13]:
# Save Exp1 (experiment 1)
ps.exp1 = ps.thresh2 %>% subset_samples(Experiment == "1")
ps.exp1 = filter_taxa(ps.exp1, function(x) sum(x > 0) > 0, TRUE)
ps.exp1

phyloseq-class experiment-level object
otu_table()   OTU Table:         [ 4705 taxa and 51 samples ]
sample_data() Sample Data:       [ 51 samples by 11 sample variables ]
tax_table()   Taxonomy Table:    [ 4705 taxa by 6 taxonomic ranks ]
phy_tree()    Phylogenetic Tree: [ 4705 tips and 4701 internal nodes ]

In [14]:
saveRDS(ps.exp1, file = '~/Hyphosphere/Exp1/Exp1_phyloseq.rds')

# Save exp2

In [15]:
ps.exp2 = ps.thresh2 %>% subset_samples(Experiment == "2")
ps.exp2 = filter_taxa(ps.exp2, function(x) sum(x > 0) > 0, TRUE)
ps.exp2

phyloseq-class experiment-level object
otu_table()   OTU Table:         [ 10702 taxa and 192 samples ]
sample_data() Sample Data:       [ 192 samples by 11 sample variables ]
tax_table()   Taxonomy Table:    [ 10702 taxa by 6 taxonomic ranks ]
phy_tree()    Phylogenetic Tree: [ 10702 tips and 10680 internal nodes ]

In [16]:
sample_data(ps.exp2)[,1:2] %>% print()

                             Sample Experiment
1_3.1RH                       3.1RH          2
10_2.1RH                      2.1RH          2
100_5.5BP                     5.5BP          2
101_5.5CS                     5.5CS          2
102_5.5BS                     5.5BS          2
103_6.5BP                     6.5BP          2
104_6.5CS                     6.5CS          2
105_6.5BS                     6.5BS          2
106_6.1BP                     6.1BP          2
107_5.1BP                     5.1BP          2
108_6.1RT                     6.1RT          2
109_5.1RT                     5.1RT          2
11_1.1CS                      1.1CS          2
110_4.1RT                     4.1RT          2
111_4.2BP                     4.2BP          2
112_4.2BS                     4.2BS          2
113_4.2CS                     4.2CS          2
114_3.2CS                     3.2CS          2
115_5.2BS                     5.2BS          2
116_5.2BP                     5.2BP          2
117_6.2BP    

In [17]:
saveRDS(ps.exp2, file = '~/Hyphosphere/Exp10/Exp10_phyloseq.rds')