# Prep for iTOL

* This script uses the differential abundance outputs and the phylogenetic tree to generate output files for import into the tree of life

In [1]:
out_Dir = '~/Hyphosphere/data/3Exp/For_iTOL'

work_dir = '~/Hyphosphere/data/3Exp/DESeq'

# phyloseq - full object with tree
physeq_file = '~/Hyphosphere/data/3Exp/phyloseq/3Exp_phyloseq_thresh_wtree_experimental_woContam.rds'

In [2]:
library(RColorBrewer)
library(tidyr)
library(dplyr)
library(reshape2)
library(phyloseq)

“package ‘RColorBrewer’ was built under R version 3.6.1”Registered S3 method overwritten by 'dplyr':
  method               from  
  as.data.frame.tbl_df tibble

Attaching package: ‘dplyr’

The following objects are masked from ‘package:stats’:

    filter, lag

The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union

“package ‘reshape2’ was built under R version 3.6.1”
Attaching package: ‘reshape2’

The following object is masked from ‘package:tidyr’:

    smiths

“package ‘phyloseq’ was built under R version 3.6.1”

In [6]:
ps = readRDS(physeq_file) %>% subset_taxa(., !is.na(Phylum)) %>%
    # remove non-experimental controls
    
    subset_samples(., SampleType != "GV" & Experiment %in% c(1,2) & SampleType %in% c("CS","CH") & Soil != "Sand") %>%

     # re-normalize after subsetting taxa
    transform_sample_counts(., function(x) x/sum(x))

ps


phyloseq-class experiment-level object
otu_table()   OTU Table:         [ 18707 taxa and 74 samples ]
sample_data() Sample Data:       [ 74 samples by 11 sample variables ]
tax_table()   Taxonomy Table:    [ 18707 taxa by 6 taxonomic ranks ]
phy_tree()    Phylogenetic Tree: [ 18707 tips and 18706 internal nodes ]

In [7]:
#Filter to the 200 most abundant OTUs
physeq.thresh <- prune_taxa(names(sort(taxa_sums(ps),TRUE)[1:200]), ps)

#physeq.thresh = filter_taxa(physeq.Full, 
#                 function(x) sum(x > 3) > 0, TRUE)
physeq.thresh


phyloseq-class experiment-level object
otu_table()   OTU Table:         [ 200 taxa and 74 samples ]
sample_data() Sample Data:       [ 74 samples by 11 sample variables ]
tax_table()   Taxonomy Table:    [ 200 taxa by 6 taxonomic ranks ]
phy_tree()    Phylogenetic Tree: [ 200 tips and 199 internal nodes ]

In [8]:
outFile = file.path(out_Dir,'3ExpTree_200.tree')
MyTree =  physeq.thresh %>% phy_tree
ape::write.tree(MyTree, file = outFile)
rm(MyTree)

In [17]:
TreeTax = taxa_names(physeq.thresh)

# Create annotation files for iTOL

## Hyphal ASVs

In [13]:
readfile = file.path(work_dir, 'dfl2fc-Exp1.txt')
df.l2fc.Exp1 = read.table(readfile, header = TRUE, sep= '\t')

readfile = file.path(work_dir, 'dfl2fc-Exp2.txt')
df.l2fc.Exp2 = read.table(readfile, header = TRUE, sep= '\t')

df.l2fc = bind_rows(df.l2fc.Exp1, df.l2fc.Exp2)

“binding character and factor vector, coercing into character vector”

In [15]:
head(df.l2fc)
tail(df.l2fc)

baseMean,log2FoldChange,lfcSE,stat,pvalue,padj,OTU,Subset,Term,Contrast,DAI,Kingdom,Phylum,Class,Order,Family,Genus,Experiment
331.72029,-2.716566,0.5217389,-5.206754,1.92173e-07,1.885675e-05,ASV5,Lansing-Gv,SampleType,CH_CS,81,Bacteria,Actinobacteria,Actinobacteria,Micrococcales,Micrococcaceae,Pseudarthrobacter,1
136.95264,4.977612,1.1905977,4.180767,2.905274e-05,0.001192425,ASV6,Lansing-Gv,SampleType,CH_CS,81,Bacteria,Proteobacteria,Gammaproteobacteria,Cellvibrionales,Cellvibrionaceae,Cellvibrio,1
0.0,,,,,,ASV8,Lansing-Gv,SampleType,CH_CS,81,Bacteria,Proteobacteria,Alphaproteobacteria,Caulobacterales,Caulobacteraceae,Asticcacaulis,1
0.0,,,,,,ASV13,Lansing-Gv,SampleType,CH_CS,81,Bacteria,Proteobacteria,Gammaproteobacteria,Xanthomonadales,Xanthomonadaceae,Luteimonas,1
26.70389,8.43,1.7857267,4.720767,2.349571e-06,0.0001478663,ASV14,Lansing-Gv,SampleType,CH_CS,81,Bacteria,Proteobacteria,Deltaproteobacteria,Myxococcales,Sandaracinaceae,,1
0.0,,,,,,ASV15,Lansing-Gv,SampleType,CH_CS,81,Bacteria,Proteobacteria,Gammaproteobacteria,Betaproteobacteriales,Burkholderiaceae,,1


Unnamed: 0,baseMean,log2FoldChange,lfcSE,stat,pvalue,padj,OTU,Subset,Term,Contrast,DAI,Kingdom,Phylum,Class,Order,Family,Genus,Experiment
641779,0.0,,,,,,ASV36849,Lansing-RI,SampleType,BH_BS,27,Bacteria,Patescibacteria,Microgenomatia,Candidatus_Roizmanbacteria,,,2
641780,0.1757976,0.9586283,3.053845,0.3139086,0.7535904,0.9999796,ASV36865,Lansing-RI,SampleType,BH_BS,27,Bacteria,Dependentiae,Babeliae,Babeliales,,,2
641781,0.0,,,,,,ASV36871,Lansing-RI,SampleType,BH_BS,27,Bacteria,Bacteroidetes,Bacteroidia,Sphingobacteriales,AKYH767,,2
641782,0.0,,,,,,ASV36896,Lansing-RI,SampleType,BH_BS,27,Bacteria,Acidobacteria,Blastocatellia_(Subgroup_4),Blastocatellales,Blastocatellaceae,JGI_0001001-H03,2
641783,0.0,,,,,,ASV37791,Lansing-RI,SampleType,BH_BS,27,Archaea,Nanoarchaeaeota,Woesearchaeia,,,,2
641784,0.0,,,,,,ASV38128,Lansing-RI,SampleType,BH_BS,27,Bacteria,Patescibacteria,WWE3,,,,2


In [39]:
df.l2fc$Subset %>% unique

In [None]:
#ffffcc - Lansing 1
#ffeda0 - Lansing 2
#fed976
#feb24c - Florence 1
#fd8d3c - Florence 2
#fc4e2a
#e31a1c
#bd0026 - Pendelton 1
#800026 - Pendelton 2

In [30]:
tmp = df.l2fc %>% 
    filter(Experiment == 1 & Subset == 'Lansing-Gv') %>%
    filter(OTU %in% TreeTax & Contrast == "CH_CS") %>% 
    mutate(Color = ifelse(padj < 0.05 & log2FoldChange > 0, "#ffffcc", "#ffffff")) %>%
    mutate(Color = ifelse(!is.na(Color), Color, "#ffffff")) %>%
    select(OTU, Color)
write.table(tmp, file = file.path(out_Dir, "HA-Lansing1.txt"), sep = "\t", row.names = FALSE, quote = FALSE)

tmp = df.l2fc %>% 
    filter(Experiment == 2 & Subset == 'Lansing-Gv') %>%
    filter(OTU %in% TreeTax & Contrast == "CH_CS") %>% 
    mutate(Color = ifelse(padj < 0.05 & log2FoldChange > 0, "#ffeda0", "#ffffff")) %>%
    mutate(Color = ifelse(!is.na(Color), Color, "#ffffff")) %>%
    select(OTU, Color)
write.table(tmp, file = file.path(out_Dir, "HA-Lansing2.txt"), sep = "\t", row.names = FALSE, quote = FALSE)

In [40]:
tmp = df.l2fc %>% 
    filter(Experiment == 1 & Subset == 'Florence-Gv') %>%
    filter(OTU %in% TreeTax & Contrast == "CH_CS") %>% 
    mutate(Color = ifelse(padj < 0.05 & log2FoldChange > 0, "#feb24c", "#ffffff")) %>%
    mutate(Color = ifelse(!is.na(Color), Color, "#ffffff")) %>%
    select(OTU, Color)
write.table(tmp, file = file.path(out_Dir, "HA-Florence1.txt"), sep = "\t", row.names = FALSE, quote = FALSE)

tmp = df.l2fc %>% 
    filter(Experiment == 2 & Subset == 'Florence-Gv') %>%
    filter(OTU %in% TreeTax & Contrast == "CH_CS") %>% 
    mutate(Color = ifelse(padj < 0.05 & log2FoldChange > 0, "#fd8d3c", "#ffffff")) %>%
    mutate(Color = ifelse(!is.na(Color), Color, "#ffffff")) %>%
    select(OTU, Color)
write.table(tmp, file = file.path(out_Dir, "HA-Florence2.txt"), sep = "\t", row.names = FALSE, quote = FALSE)

In [41]:
tmp = df.l2fc %>% 
    filter(Experiment == 1 & Subset == 'Pendelton-Gv') %>%
    filter(OTU %in% TreeTax & Contrast == "CH_CS") %>% 
    mutate(Color = ifelse(padj < 0.05 & log2FoldChange > 0, "#bd0026", "#ffffff")) %>%
    mutate(Color = ifelse(!is.na(Color), Color, "#ffffff")) %>%
    select(OTU, Color)
write.table(tmp, file = file.path(out_Dir, "HA-Pendelton1.txt"), sep = "\t", row.names = FALSE, quote = FALSE)

tmp = df.l2fc %>% 
    filter(Experiment == 2 & Subset == 'Pendelton-Gv') %>%
    filter(OTU %in% TreeTax & Contrast == "CH_CS") %>% 
    mutate(Color = ifelse(padj < 0.05 & log2FoldChange > 0, "#800026", "#ffffff")) %>%
    mutate(Color = ifelse(!is.na(Color), Color, "#ffffff")) %>%
    select(OTU, Color)
write.table(tmp, file = file.path(out_Dir, "HA-Pendelton2.txt"), sep = "\t", row.names = FALSE, quote = FALSE)