# Prep for iTOL

* This script uses the differential abundance outputs and the phylogenetic tree to generate output files for import into the tree of life

In [1]:
out_Dir = '~/Hyphosphere/data/3Exp/For_iTOL'

work_dir = '~/Hyphosphere/data/3Exp/DESeq'

# phyloseq - full object with tree
physeq_file = '~/Hyphosphere/data/3Exp/phyloseq/3Exp_phyloseq_thresh_wtree_experimental_woContam.rds'

In [51]:
library(RColorBrewer)
library(plyr)
library(tidyr)
library(dplyr)
library(reshape2)
library(phyloseq)

In [6]:
ps = readRDS(physeq_file) %>% subset_taxa(., !is.na(Phylum)) %>%
    # remove non-experimental controls
    
    subset_samples(., SampleType != "GV" & Experiment %in% c(1,2) & SampleType %in% c("CS","CH") & Soil != "Sand") %>%

     # re-normalize after subsetting taxa
    transform_sample_counts(., function(x) x/sum(x))

ps


phyloseq-class experiment-level object
otu_table()   OTU Table:         [ 18707 taxa and 74 samples ]
sample_data() Sample Data:       [ 74 samples by 11 sample variables ]
tax_table()   Taxonomy Table:    [ 18707 taxa by 6 taxonomic ranks ]
phy_tree()    Phylogenetic Tree: [ 18707 tips and 18706 internal nodes ]

In [7]:
#Filter to the 200 most abundant OTUs
physeq.thresh <- prune_taxa(names(sort(taxa_sums(ps),TRUE)[1:200]), ps)

#physeq.thresh = filter_taxa(physeq.Full, 
#                 function(x) sum(x > 3) > 0, TRUE)
physeq.thresh


phyloseq-class experiment-level object
otu_table()   OTU Table:         [ 200 taxa and 74 samples ]
sample_data() Sample Data:       [ 74 samples by 11 sample variables ]
tax_table()   Taxonomy Table:    [ 200 taxa by 6 taxonomic ranks ]
phy_tree()    Phylogenetic Tree: [ 200 tips and 199 internal nodes ]

In [8]:
outFile = file.path(out_Dir,'3ExpTree_200.tree')
MyTree =  physeq.thresh %>% phy_tree
ape::write.tree(MyTree, file = outFile)
rm(MyTree)

In [17]:
TreeTax = taxa_names(physeq.thresh)

# Create annotation files for iTOL

## Hyphal ASVs

In [13]:
readfile = file.path(work_dir, 'dfl2fc-Exp1.txt')
df.l2fc.Exp1 = read.table(readfile, header = TRUE, sep= '\t')

readfile = file.path(work_dir, 'dfl2fc-Exp2.txt')
df.l2fc.Exp2 = read.table(readfile, header = TRUE, sep= '\t')

df.l2fc = bind_rows(df.l2fc.Exp1, df.l2fc.Exp2)

“binding character and factor vector, coercing into character vector”

In [15]:
head(df.l2fc)
tail(df.l2fc)

baseMean,log2FoldChange,lfcSE,stat,pvalue,padj,OTU,Subset,Term,Contrast,DAI,Kingdom,Phylum,Class,Order,Family,Genus,Experiment
331.72029,-2.716566,0.5217389,-5.206754,1.92173e-07,1.885675e-05,ASV5,Lansing-Gv,SampleType,CH_CS,81,Bacteria,Actinobacteria,Actinobacteria,Micrococcales,Micrococcaceae,Pseudarthrobacter,1
136.95264,4.977612,1.1905977,4.180767,2.905274e-05,0.001192425,ASV6,Lansing-Gv,SampleType,CH_CS,81,Bacteria,Proteobacteria,Gammaproteobacteria,Cellvibrionales,Cellvibrionaceae,Cellvibrio,1
0.0,,,,,,ASV8,Lansing-Gv,SampleType,CH_CS,81,Bacteria,Proteobacteria,Alphaproteobacteria,Caulobacterales,Caulobacteraceae,Asticcacaulis,1
0.0,,,,,,ASV13,Lansing-Gv,SampleType,CH_CS,81,Bacteria,Proteobacteria,Gammaproteobacteria,Xanthomonadales,Xanthomonadaceae,Luteimonas,1
26.70389,8.43,1.7857267,4.720767,2.349571e-06,0.0001478663,ASV14,Lansing-Gv,SampleType,CH_CS,81,Bacteria,Proteobacteria,Deltaproteobacteria,Myxococcales,Sandaracinaceae,,1
0.0,,,,,,ASV15,Lansing-Gv,SampleType,CH_CS,81,Bacteria,Proteobacteria,Gammaproteobacteria,Betaproteobacteriales,Burkholderiaceae,,1


Unnamed: 0,baseMean,log2FoldChange,lfcSE,stat,pvalue,padj,OTU,Subset,Term,Contrast,DAI,Kingdom,Phylum,Class,Order,Family,Genus,Experiment
641779,0.0,,,,,,ASV36849,Lansing-RI,SampleType,BH_BS,27,Bacteria,Patescibacteria,Microgenomatia,Candidatus_Roizmanbacteria,,,2
641780,0.1757976,0.9586283,3.053845,0.3139086,0.7535904,0.9999796,ASV36865,Lansing-RI,SampleType,BH_BS,27,Bacteria,Dependentiae,Babeliae,Babeliales,,,2
641781,0.0,,,,,,ASV36871,Lansing-RI,SampleType,BH_BS,27,Bacteria,Bacteroidetes,Bacteroidia,Sphingobacteriales,AKYH767,,2
641782,0.0,,,,,,ASV36896,Lansing-RI,SampleType,BH_BS,27,Bacteria,Acidobacteria,Blastocatellia_(Subgroup_4),Blastocatellales,Blastocatellaceae,JGI_0001001-H03,2
641783,0.0,,,,,,ASV37791,Lansing-RI,SampleType,BH_BS,27,Archaea,Nanoarchaeaeota,Woesearchaeia,,,,2
641784,0.0,,,,,,ASV38128,Lansing-RI,SampleType,BH_BS,27,Bacteria,Patescibacteria,WWE3,,,,2


In [39]:
df.l2fc$Subset %>% unique

In [None]:
#ffffcc - Lansing 1
#ffeda0 - Lansing 2
#fed976
#feb24c - Florence 1
#fd8d3c - Florence 2
#fc4e2a
#e31a1c
#bd0026 - Pendelton 1
#800026 - Pendelton 2

In [30]:
tmp = df.l2fc %>% 
    filter(Experiment == 1 & Subset == 'Lansing-Gv') %>%
    filter(OTU %in% TreeTax & Contrast == "CH_CS") %>% 
    mutate(Color = ifelse(padj < 0.05 & log2FoldChange > 0, "#ffffcc", "#ffffff")) %>%
    mutate(Color = ifelse(!is.na(Color), Color, "#ffffff")) %>%
    select(OTU, Color)
write.table(tmp, file = file.path(out_Dir, "HA-Lansing1.txt"), sep = "\t", row.names = FALSE, quote = FALSE)

tmp = df.l2fc %>% 
    filter(Experiment == 2 & Subset == 'Lansing-Gv') %>%
    filter(OTU %in% TreeTax & Contrast == "CH_CS") %>% 
    mutate(Color = ifelse(padj < 0.05 & log2FoldChange > 0, "#ffeda0", "#ffffff")) %>%
    mutate(Color = ifelse(!is.na(Color), Color, "#ffffff")) %>%
    select(OTU, Color)
write.table(tmp, file = file.path(out_Dir, "HA-Lansing2.txt"), sep = "\t", row.names = FALSE, quote = FALSE)

In [40]:
tmp = df.l2fc %>% 
    filter(Experiment == 1 & Subset == 'Florence-Gv') %>%
    filter(OTU %in% TreeTax & Contrast == "CH_CS") %>% 
    mutate(Color = ifelse(padj < 0.05 & log2FoldChange > 0, "#feb24c", "#ffffff")) %>%
    mutate(Color = ifelse(!is.na(Color), Color, "#ffffff")) %>%
    select(OTU, Color)
write.table(tmp, file = file.path(out_Dir, "HA-Florence1.txt"), sep = "\t", row.names = FALSE, quote = FALSE)

tmp = df.l2fc %>% 
    filter(Experiment == 2 & Subset == 'Florence-Gv') %>%
    filter(OTU %in% TreeTax & Contrast == "CH_CS") %>% 
    mutate(Color = ifelse(padj < 0.05 & log2FoldChange > 0, "#fd8d3c", "#ffffff")) %>%
    mutate(Color = ifelse(!is.na(Color), Color, "#ffffff")) %>%
    select(OTU, Color)
write.table(tmp, file = file.path(out_Dir, "HA-Florence2.txt"), sep = "\t", row.names = FALSE, quote = FALSE)

In [41]:
tmp = df.l2fc %>% 
    filter(Experiment == 1 & Subset == 'Pendelton-Gv') %>%
    filter(OTU %in% TreeTax & Contrast == "CH_CS") %>% 
    mutate(Color = ifelse(padj < 0.05 & log2FoldChange > 0, "#bd0026", "#ffffff")) %>%
    mutate(Color = ifelse(!is.na(Color), Color, "#ffffff")) %>%
    select(OTU, Color)
write.table(tmp, file = file.path(out_Dir, "HA-Pendelton1.txt"), sep = "\t", row.names = FALSE, quote = FALSE)

tmp = df.l2fc %>% 
    filter(Experiment == 2 & Subset == 'Pendelton-Gv') %>%
    filter(OTU %in% TreeTax & Contrast == "CH_CS") %>% 
    mutate(Color = ifelse(padj < 0.05 & log2FoldChange > 0, "#800026", "#ffffff")) %>%
    mutate(Color = ifelse(!is.na(Color), Color, "#ffffff")) %>%
    select(OTU, Color)
write.table(tmp, file = file.path(out_Dir, "HA-Pendelton2.txt"), sep = "\t", row.names = FALSE, quote = FALSE)

### Leaf labels

In [71]:
taxa = physeq.thresh %>% tax_table %>% as.data.frame
taxa$OTU = tax_table(physeq.thresh) %>% row.names

# taxa = mutate(taxa, label = paste(Family, Genus, sep = "_")) %>%
#     select(OTU, label)
# head(taxa)

taxa = taxa %>%
    mutate(GenusLabel = ifelse(!is.na(Genus), paste(Genus), 
                        ifelse(!is.na(Family), paste('Unclassified ', Family, sep = ""), 
                        ifelse(!is.na(Order), paste('Unclassified ', Order, sep = ""),
                        ifelse(!is.na(Class), paste('Unclassified ', Class, sep = ""), 
                               paste("Unclassified ", Phylum, sep = "")))))) %>%
    select(OTU, GenusLabel)
head(taxa)

write.table(taxa, file = file.path(out_Dir, 'taxa_labels.txt'), sep = '\t',
            row.names = FALSE, quote = FALSE)

Unnamed: 0,OTU,GenusLabel
ASV212,ASV212,Unclassified Subgroup_7
ASV109,ASV109,Unclassified Pedosphaeraceae
ASV37,ASV37,Candidatus_Udaeobacter
ASV55,ASV55,Candidatus_Udaeobacter
ASV108,ASV108,Candidatus_Udaeobacter
ASV230,ASV230,Candidatus_Udaeobacter


In [45]:
dim(taxa)

## Relative abundance data

### Mean of two experiments both sample types

In [46]:
mdf = psmelt(physeq.thresh)
head(mdf)

“The sample variables: 
Sample
 have been renamed to: 
sample_Sample
to avoid conflicts with special phyloseq plot attribute names.”

Unnamed: 0,OTU,Sample,Abundance,sample_Sample,Experiment,Plant,Fungus,Soil,SampleType,Treatment,TimePoint,DAI,Rep,Concentration_ng.ul,Kingdom,Phylum,Class,Order,Family,Genus
11734,ASV57,GVN7HN3-CH,0.1996501,GVN7HN3-CH,2,GVN7HN3,Gv,Lansing,CH,HN,,27,3,,Bacteria,Proteobacteria,Gammaproteobacteria,Betaproteobacteriales,Burkholderiaceae,
11920,ASV6,GVCLHN4-CH,0.1869506,GVCLHN4-CH,2,GVCLHN4,Gv,Pendelton,CH,HN,,27,4,,Bacteria,Proteobacteria,Gammaproteobacteria,Cellvibrionales,Cellvibrionaceae,Cellvibrio
1275,ASV118,GVN7HP3-CH,0.1715024,GVN7HP3-CH,2,GVN7HP3,Gv,Lansing,CH,HP,,27,3,,Bacteria,Proteobacteria,Gammaproteobacteria,Betaproteobacteriales,Burkholderiaceae,Noviherbaspirillum
6414,ASV246,GVFLHN1-CH,0.1610611,GVFLHN1-CH,2,GVFLHN1,Gv,Florence,CH,HN,,27,1,,Bacteria,Cyanobacteria,Oxyphotobacteria,Nostocales,Coleofasciculaceae,
11953,ASV6,GVN7HP3-CH,0.1594556,GVN7HP3-CH,2,GVN7HP3,Gv,Lansing,CH,HP,,27,3,,Bacteria,Proteobacteria,Gammaproteobacteria,Cellvibrionales,Cellvibrionaceae,Cellvibrio
12142,ASV61,GVN7HP2-CH,0.1479419,GVN7HP2-CH,2,GVN7HP2,Gv,Lansing,CH,HP,,27,2,,Bacteria,Proteobacteria,Gammaproteobacteria,Betaproteobacteriales,Burkholderiaceae,


In [53]:

# split dataframe by day and OTU, sumarize mean abundance
mdf.byOTU = ddply(mdf, .(Soil, SampleType, OTU), summarize, mean_Abundance = mean(Abundance))
head(mdf.byOTU)

Soil,SampleType,OTU,mean_Abundance
Florence,CH,ASV100,0.001304324
Florence,CH,ASV101,0.0005555983
Florence,CH,ASV102,0.0
Florence,CH,ASV103,0.0023316966
Florence,CH,ASV104,0.0014897948
Florence,CH,ASV106,0.0


In [59]:
#cast by Plant
dfc = dcast(mdf.byOTU, OTU ~ Soil + SampleType) %>%
    select(OTU, Lansing_CS, Lansing_CH, Florence_CS, Florence_CH, Pendelton_CS, Pendelton_CH)
head(dfc)

Using mean_Abundance as value column: use value.var to override.


OTU,Lansing_CS,Lansing_CH,Florence_CS,Florence_CH,Pendelton_CS,Pendelton_CH
ASV100,0.003019926,0.0027452123,0.001567567,0.001304324,0.0009879816,0.0009181631
ASV101,0.002068219,0.0009525206,0.002963502,0.0005555983,0.0012679631,0.0003717635
ASV102,9.782109e-05,0.0156217216,0.0,0.0,0.0,0.0
ASV103,0.004116493,0.0016980743,0.008537221,0.0023316966,0.0095266452,0.0026123151
ASV104,0.002542281,0.0002285653,0.001733103,0.0014897948,0.0042357781,0.0014481912
ASV106,0.004124973,0.001216435,0.0,0.0,0.0,0.0


In [60]:
write.table(dfc, file.path(out_Dir,'CSCH_Abund.txt'), sep = "\t", 
            row.names = FALSE, quote = FALSE)

### Hyphal samples only stacked by experiment

In [63]:
mdf.byOTU = ddply(mdf, .(Soil, SampleType, OTU, Experiment), summarize, mean_Abundance = mean(Abundance))

mdf.F.byOTU = filter(mdf.byOTU, Soil == "Florence" & SampleType == 'CH')
mdf.L.byOTU = filter(mdf.byOTU, Soil == "Lansing" & SampleType == 'CH')
mdf.P.byOTU = filter(mdf.byOTU, Soil == "Pendelton" & SampleType == 'CH')
head(mdf.F.byOTU)

Soil,SampleType,OTU,Experiment,mean_Abundance
Florence,CH,ASV100,1,0.0007582158
Florence,CH,ASV100,2,0.0015091146
Florence,CH,ASV101,1,0.0008770968
Florence,CH,ASV101,2,0.0004350364
Florence,CH,ASV102,1,0.0
Florence,CH,ASV102,2,0.0


In [68]:
dfc.F = dcast(mdf.F.byOTU, OTU ~ Experiment) 
dfc.L = dcast(mdf.L.byOTU, OTU ~ Experiment)    
dfc.P = dcast(mdf.P.byOTU, OTU ~ Experiment) 

Using mean_Abundance as value column: use value.var to override.
Using mean_Abundance as value column: use value.var to override.
Using mean_Abundance as value column: use value.var to override.


In [69]:
write.table(dfc.F, file.path(out_Dir,'Florence_Abund.txt'), sep = "\t", 
            row.names = FALSE, quote = FALSE)
write.table(dfc.L, file.path(out_Dir,'Lansing_Abund.txt'), sep = "\t", 
            row.names = FALSE, quote = FALSE)
write.table(dfc.P, file.path(out_Dir,'Pendelton_Abund.txt'), sep = "\t", 
            row.names = FALSE, quote = FALSE)