## Part 1: get ITS feature table for correlation analysis (ITS only). Split twp materials, compare relative VS quantitative

In [1]:
source("./ITS-PhyloseqObjects.R")

“input string 1 is invalid in this locale”

In [2]:
physeq.23s

phyloseq-class experiment-level object
otu_table()   OTU Table:         [ 138 taxa and 23 samples ]
sample_data() Sample Data:       [ 23 samples by 11 sample variables ]
tax_table()   Taxonomy Table:    [ 138 taxa by 7 taxonomic ranks ]
phy_tree()    Phylogenetic Tree: [ 138 tips and 137 internal nodes ]

In [14]:
physeq.23s.genus = tax_glom(physeq.23s, "Genus")
physeq.23s.genus

phyloseq-class experiment-level object
otu_table()   OTU Table:         [ 57 taxa and 23 samples ]
sample_data() Sample Data:       [ 23 samples by 11 sample variables ]
tax_table()   Taxonomy Table:    [ 57 taxa by 7 taxonomic ranks ]
phy_tree()    Phylogenetic Tree: [ 57 tips and 56 internal nodes ]

In [15]:
# filter out features that are not present in all samples
physeq.23s.genus = prune_taxa(taxa_sums(physeq.23s.genus) > 0, physeq.23s.genus)
physeq.23s.genus

phyloseq-class experiment-level object
otu_table()   OTU Table:         [ 36 taxa and 23 samples ]
sample_data() Sample Data:       [ 23 samples by 11 sample variables ]
tax_table()   Taxonomy Table:    [ 36 taxa by 7 taxonomic ranks ]
phy_tree()    Phylogenetic Tree: [ 36 tips and 35 internal nodes ]

In [16]:
# convert to relative abundance
p.23s.genus.rel = transform_sample_counts(physeq.23s.genus, function(x) 100 * x/sum(x))
sample_sums(p.23s.genus.rel)

### Relative MDF

In [17]:
# reduce the name length: rel.mdf = p.23s.genus.rel.mdf
rel.mdf = subset_samples(p.23s.genus.rel, Material == "MDF")
# filter out features that are not present in all samples
rel.mdf = prune_taxa(taxa_sums(rel.mdf) > 0, rel.mdf)
# filter out fatures that are NA at Genus level
rel.mdf = subset_taxa(rel.mdf, !Genus == "g__unidentified")
rel.mdf

phyloseq-class experiment-level object
otu_table()   OTU Table:         [ 16 taxa and 14 samples ]
sample_data() Sample Data:       [ 14 samples by 11 sample variables ]
tax_table()   Taxonomy Table:    [ 16 taxa by 7 taxonomic ranks ]
phy_tree()    Phylogenetic Tree: [ 16 tips and 15 internal nodes ]

In [18]:
write.csv(otu_table(rel.mdf), "./Correlation/rel-mdf-otu.csv")
write.csv(tax_table(rel.mdf), "./Correlation/mdf-taxa.csv")

### Quantitative MDF

In [19]:
# using previous objects, and convert relative abundance to quantitative abundance
#sample_data(rel.mdf)
count.mdf = as.data.frame(sample_data(rel.mdf))$Counts
quan.mdf = rel.mdf
otu_table(quan.mdf) = rel_to_quan(quan.mdf, count.mdf)

In [20]:
otu_table(quan.mdf)

Unnamed: 0,T1R1,T1R4,T2R1,T2R4,T2R5,T3R1,T3R4,T3R5,T4R1,T4R4,T4R5,T5R1,T5R4,T5R5
SH1726759.08FU_AF444541_refs,0.0,0.0,0.0,0.0,0.0,0.10865936,0.0,0.01166622,0.0,0.0,0.0,0.0,0.0,0.0
SH1725251.08FU_KJ706901_reps,0.0,0.0,2.869668,0.0,0.0,1.81616353,0.0,0.0,0.37489962,0.0,0.0,2.872101,0.0,0.0
SH1674913.08FU_AJ244232_refs,0.0,0.0,8.271395,0.0,0.0,0.43463743,0.0,0.0,0.04439601,0.0,0.0,0.0,0.0,0.0
SH1676198.08FU_KF984792_refs,0.0,0.0,0.0,0.0,0.8033863,0.0,0.0,0.01166622,0.0,0.05230945,0.0,0.0,0.1545583,0.04981254
SH1694434.08FU_EU037063_reps,0.0,0.0,0.0,0.0,0.0,0.03104553,0.0,0.0,0.02466445,0.0,0.02555106,0.0,0.0,0.0
SH1692811.08FU_FJ430773_reps,2398.786,0.0,1622.375033,1243.48836,1291.8450976,174.16542575,32.3081536,13.21083059,50.35987152,76.63987835,41.3160635,840.0097608,119.5438405,52.96318826
SH1695393.08FU_U65307_reps,0.0,157.6667,721.130594,1361.03391,576.8313309,132.95248409,49.7346021,56.6068458,95.27876563,84.97015776,248.39036813,408.7159279,95.4327415,332.69798672
SH1685996.08FU_AJ292429_refs,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02466445,0.0,0.0,0.3191223,0.0,0.0
SH1738712.08FU_MG372138_reps,0.0,0.0,0.0,144.38071,63.3068369,0.0,2.4774816,0.30098855,0.0,0.92849268,8.18485612,0.0,0.8430454,2.46572097
SH1744621.08FU_KC181928_reps,0.0,0.0,70.728867,88.44264,237.8023314,3.1666441,0.9809033,6.85740571,0.28610761,1.61505417,2.10370391,6.4622272,2.2200196,5.09333271


In [21]:
write.csv(otu_table(quan.mdf), "./Correlation/quan-mdf-otu.csv")

### Relative Gypsum

In [22]:
# reduce the name length: p.rel.mdf = p.23s.genus.rel.mdf
rel.gyp = subset_samples(p.23s.genus.rel, Material == "Gypsum")
# filter out features that are not present in all samples
rel.gyp = prune_taxa(taxa_sums(rel.gyp) > 0, rel.gyp)
# filter out fatures that are NA at Genus level
rel.gyp = subset_taxa(rel.gyp, !Genus == "g__unidentified")
rel.gyp

phyloseq-class experiment-level object
otu_table()   OTU Table:         [ 22 taxa and 9 samples ]
sample_data() Sample Data:       [ 9 samples by 11 sample variables ]
tax_table()   Taxonomy Table:    [ 22 taxa by 7 taxonomic ranks ]
phy_tree()    Phylogenetic Tree: [ 22 tips and 21 internal nodes ]

In [23]:
write.csv(otu_table(rel.gyp), "./Correlation/rel-gyp-otu.csv")
write.csv(tax_table(rel.gyp), "./Correlation/gyp-taxa.csv")

### Quantitative Gypsum

In [24]:
# using previous objects, and convert relative abundance to quantitative abundance
#sample_data(rel.gyp)
count.gyp = as.data.frame(sample_data(rel.gyp))$Counts
quan.gyp = rel.gyp
otu_table(quan.gyp) = rel_to_quan(quan.gyp, count.gyp)

In [25]:
otu_table(quan.gyp)

Unnamed: 0,T1R7,T2R7,T2R9,T3R7,T3R9,T4R7,T4R9,T5R7,T5R9
SH1726759.08FU_AF444541_refs,1018.769,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
SH1715510.08FU_KU204769_reps,0.0,269.6135,314.3633678,1052.313,0.0,0.0,702.37312,0.0,0.0
SH1676198.08FU_KF984792_refs,0.0,0.0,2.2060587,0.0,41.10314,0.0,884.46985,0.0,20905.413
SH1694434.08FU_EU037063_reps,0.0,0.0,354.2930307,0.0,335.67568,0.0,65.03455,0.0,0.0
SH1692811.08FU_FJ430773_reps,0.0,6654.14946,2328.0537689,83850.183,8432.99518,231093.209,2913.54774,223494.7772,78300.275
SH1695393.08FU_U65307_reps,0.0,16886.2026,370.6178652,302252.883,3062.18428,580828.584,897.47676,436293.8921,12163.149
SH1679963.08FU_KX664387_reps,0.0,0.0,0.0,0.0,13.70105,0.0,0.0,0.0,0.0
SH1711465.08FU_AY554212_refs,0.0,0.0,0.4412117,0.0,0.0,0.0,0.0,0.0,0.0
SH1685996.08FU_AJ292429_refs,0.0,0.0,11.9127171,239.162,3665.0304,0.0,58283.9617,0.0,557604.384
SH1729543.08FU_FR682361_reps,0.0,0.0,0.0,0.0,0.0,2526.99,0.0,0.0,0.0


In [26]:
write.csv(otu_table(quan.gyp), "./Correlation/quan-gyp-otu.csv")

## Part 2: get ITS feature table for 16S and ITS co-occurence. Only include taxas that are present in at least 10% of all samples. Split two materials, compare relative VS quantitative

In [5]:
# build a prevalence table for filtering
prevalencedf = apply(X = otu_table(physeq.23s.genus),
                     MARGIN = 1,
                     FUN = function(x){sum(x > 0)})

prevalencedf = data.frame(Prevalence = prevalencedf,
                          TotalAbundance = taxa_sums(physeq.23s.genus))

In [6]:
# filter out features that are not presnet in 10% of all samples
prevalenceThreshold = 0.10 * nsamples(physeq.23s.genus)
prevalenceThreshold
keepTaxa = rownames(prevalencedf)[(prevalencedf$Prevalence >= prevalenceThreshold)]
length(keepTaxa)
#keepTaxa

In [7]:
physeq.23s.genus.prevalent = prune_taxa(keepTaxa, physeq.23s.genus)
physeq.23s.genus.prevalent

phyloseq-class experiment-level object
otu_table()   OTU Table:         [ 26 taxa and 23 samples ]
sample_data() Sample Data:       [ 23 samples by 11 sample variables ]
tax_table()   Taxonomy Table:    [ 26 taxa by 7 taxonomic ranks ]
phy_tree()    Phylogenetic Tree: [ 26 tips and 25 internal nodes ]

### Relative

In [8]:
physeq.23s.genus.prevalent.rel = transform_sample_counts(physeq.23s.genus.prevalent, function(x) 100 * x/sum(x))
sample_sums(physeq.23s.genus.prevalent.rel)

In [9]:
# reduce the name length: p.rel.mdf = physeq.whole.genus.prevelant.rel.mdf
p.rel.mdf = subset_samples(physeq.23s.genus.prevalent.rel, Material == "MDF")
# filter out features that are not present in all samples
p.rel.mdf = prune_taxa(taxa_sums(p.rel.mdf) > 0, p.rel.mdf)
# filter out fatures that are NA at Genus level
p.rel.mdf = subset_taxa(p.rel.mdf, !Genus == "g__unidentified")
p.rel.mdf

phyloseq-class experiment-level object
otu_table()   OTU Table:         [ 16 taxa and 14 samples ]
sample_data() Sample Data:       [ 14 samples by 11 sample variables ]
tax_table()   Taxonomy Table:    [ 16 taxa by 7 taxonomic ranks ]
phy_tree()    Phylogenetic Tree: [ 16 tips and 15 internal nodes ]

In [10]:
otu_table(p.rel.mdf)

Unnamed: 0,T1R1,T1R4,T2R1,T2R4,T2R5,T3R1,T3R4,T3R5,T4R1,T4R4,T4R5,T5R1,T5R4,T5R5
SH1726759.08FU_AF444541_refs,0,0,0.0,0.0,0.0,0.031587022,0.0,0.01479859,0.0,0.0,0.0,0.0,0.0,0.0
SH1725251.08FU_KJ706901_reps,0,0,0.1112274,0.0,0.0,0.527954515,0.0,0.0,0.2179649,0.0,0.0,0.14520227,0.0,0.0
SH1674913.08FU_AJ244232_refs,0,0,0.3205967,0.0,0.0,0.126348089,0.0,0.0,0.02581163,0.0,0.0,0.0,0.0,0.0
SH1676198.08FU_KF984792_refs,0,0,0.0,0.0,0.03575771,0.0,0.0,0.01479859,0.0,0.03041247,0.0,0.0,0.05992265,0.01263743
SH1694434.08FU_EU037063_reps,0,0,0.0,0.0,0.0,0.009024863,0.0,0.0,0.0143398,0.0,0.008488724,0.0,0.0,0.0
SH1692811.08FU_FJ430773_reps,100,0,62.8827532,43.16169236,57.4983909,50.629484229,37.5676204,16.75792465,29.27899507,44.55806881,13.726266942,42.46763199,46.34744239,13.43674965
SH1695393.08FU_U65307_reps,0,100,27.9507982,47.2417186,25.67403275,38.648977934,57.8309327,71.8057241,55.39463118,49.40125451,82.521716986,20.66309039,36.99950972,84.40540882
SH1685996.08FU_AJ292429_refs,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0143398,0.0,0.0,0.01613359,0.0,0.0
SH1738712.08FU_MG372138_reps,0,0,0.0,5.01147917,2.81770722,0.0,2.8807925,0.38180365,0.0,0.53982133,2.719221301,0.0,0.32685079,0.62555289
SH1744621.08FU_KC181928_reps,0,0,2.7414289,3.06985897,10.58428091,0.920536077,1.1405853,8.69861189,0.16634163,0.93898498,0.698904955,0.32670512,0.86070709,1.29217743


In [11]:
write.csv(otu_table(p.rel.mdf), "ITS-rel-mdf.csv")
write.csv(tax_table(p.rel.mdf), "ITS-mdf-taxa.csv")

In [12]:
p.rel.gyp = subset_samples(physeq.23s.genus.prevalent.rel, Material == "Gypsum")
p.rel.gyp = prune_taxa(taxa_sums(p.rel.gyp) > 0, p.rel.gyp)
p.rel.gyp = subset_taxa(p.rel.gyp, !Genus == "g__unidentified")
p.rel.gyp

phyloseq-class experiment-level object
otu_table()   OTU Table:         [ 18 taxa and 9 samples ]
sample_data() Sample Data:       [ 9 samples by 11 sample variables ]
tax_table()   Taxonomy Table:    [ 18 taxa by 7 taxonomic ranks ]
phy_tree()    Phylogenetic Tree: [ 18 tips and 17 internal nodes ]

In [13]:
otu_table(p.rel.gyp)

Unnamed: 0,T1R7,T2R7,T2R9,T3R7,T3R9,T4R7,T4R9,T5R7,T5R9
SH1726759.08FU_AF444541_refs,100,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
SH1715510.08FU_KU204769_reps,0,1.0668999,6.33981403,0.10066346,0.0,0.0,0.57148905,0.0,0.0
SH1676198.08FU_KF984792_refs,0,0.0,0.04448992,0.0,0.10223207,0.0,0.71965287,0.0,1.216276
SH1694434.08FU_EU037063_reps,0,0.0,7.14508164,0.0,0.83489521,0.0,0.05291565,0.0,0.0
SH1692811.08FU_FJ430773_reps,0,26.3314386,46.95021578,8.02104782,20.97461237,7.4827149,2.37062123,6.51256412,4.5555064
SH1695393.08FU_U65307_reps,0,66.8211631,7.47430707,28.91329215,7.61628898,18.8070204,0.730236,12.71346016,0.7076515
SH1685996.08FU_AJ292429_refs,0,0.0,0.24024558,0.02287806,9.11569262,0.0,47.42300773,0.0,32.4413976
SH1706045.08FU_GU054033_reps,0,0.0,0.02669395,0.0,1.00528199,0.1145522,0.96306487,0.25972339,0.4201681
SH1744621.08FU_KC181928_reps,0,1.7665063,5.22311696,0.5719515,3.27142614,1.1455222,0.82548418,0.53892604,0.619195
SH1744611.08FU_EF679363_refs,0,1.6353301,7.55883792,0.41638069,5.62276367,0.5073027,2.38120436,0.44802286,2.034498


In [51]:
write.csv(otu_table(p.rel.gyp), "ITS-rel-gyp.csv")
write.csv(tax_table(p.rel.gyp), "ITS-taxonomy-gyp.csv")

### Quantitative

In [52]:
# using previous objects, and convert relative abundance to quantitative abundance
#sample_data(p.rel.mdf)
count.mdf = as.data.frame(sample_data(p.rel.mdf))$Counts
p.quan.mdf = p.rel.mdf
otu_table(p.quan.mdf) = rel_to_quan(p.quan.mdf, count.mdf)

In [53]:
write.csv(otu_table(p.quan.mdf), "ITS-quan-mdf.csv")

In [55]:
#sample_data(p.rel.gyp)
count.gyp = as.data.frame(sample_data(p.rel.gyp))$Counts
p.quan.gyp = p.rel.gyp
otu_table(p.quan.gyp) = rel_to_quan(p.quan.gyp, count.gyp)

In [56]:
write.csv(otu_table(p.quan.gyp), "ITS-quan-gyp.csv")

In [57]:
count.gyp