# Differential abundance analysis using DESeq2
* Test for differential abundance between sample types or time points.  
* Test for correlations between MRE abundance and hyphal ASVs in bulk hyphosphere samples

In [None]:
figDir = '~/Hyphosphere/data/3Exp/figures'

work_dir = '~/Hyphosphere/data/3Exp/DESeq'

# phyloseq - full object without tree (includes archaea); MREs must be removed
physeq_file = '~/Hyphosphere/data/3Exp/phyloseq/3Exp_phyloseq_thresh_woTree_experimental_woContam.rds'

In [None]:
library(RColorBrewer)
library(vegan)
library(ggpubr)
library(phyloseq)
library(gridExtra)
library(ggplot2)
library(DESeq2)
library(VennDiagram)
library(tidyr)
library(dplyr)
library(reshape2)


In [None]:
theme_pub = function (base_size = 11, base_family = "Helvetica") 
{
    theme_grey(base_size = base_size, base_family = base_family) %+replace% 
        theme(
            axis.line.x = element_line(size = .5),
            axis.line.y = element_line(size = .5),
            panel.background = element_rect(fill = "white", colour = NA), 
            panel.border = element_rect(fill = NA, color = "black", size = .5), 
            panel.grid.major = element_blank(), 
            panel.grid.minor = element_blank(), 
            strip.background = element_rect(fill = "grey85", 
                colour = "grey20"), 
            legend.key = element_rect(fill = "white", 
                colour = NA), complete = TRUE)
    }

In [None]:
ps = readRDS(physeq_file) 
ps
ps = ps %>% subset_taxa(., !is.na(Phylum)) %>%
    # remove non-experimental controls
    
    subset_samples(., SampleType != "GV") 
    # T0R1 has all NaN for some reason
    #subset_samples(!Sample == "T0R1") #%>%
     # re-normalize after subsetting taxa
    #transform_sample_counts(., function(x) x/sum(x))
sample_data(ps)$Rep = as.factor(sample_data(ps)$Rep)

ps


                                  


In [None]:
taxa = tax_table(ps) %>% as.data.frame() %>% mutate(ASV = row.names(.))
head(taxa)

In [None]:
sample_data(ps)[,'Soil'] %>% unique()

In [None]:
sample_data(ps)$SoilxFungus = paste(sample_data(ps)$Soil,"-",sample_data(ps)$Fungus, sep = "")
sample_data(ps) %>% head()


# Experiment 2

In [None]:
tmp = ps %>% subset_samples(Experiment == 2)

## Loop script for sample type contrasts in experiment 2
* Fertilizer treatments grouped together

In [None]:
sample_data(tmp)$SampleType %>% unique

In [None]:
library("BiocParallel")
register(MulticoreParam(20))

In [None]:
sample_data(tmp)$DAI %>% unique()

In [None]:
sample_data(tmp) %>% as.data.frame() %>% filter(., Fungus == "RI") %>% .$SampleType  %>% unique()

In [None]:
#sample_data(tmp)$SoilxFungus = paste(sample_data(tmp)$Soil, sample_data(tmp)$Fungus, sep = '-')
df.l2fc = NULL
Grouplist = c('Dryden-Gv', 'Florence-Gv', 'Pendelton-Gv')
ContrastList = list(c("CH","CS"), c("CH", "BS"), c("CH", "BP"), c("RH", "RS"), c("BH","BS"), c("BP", "CS"), c("BP", "BS")) 

    for (i in Grouplist) {
            print(i)
          for (j in ContrastList) { 
                phy = subset_samples(tmp, SoilxFungus == i & SampleType %in% j)
                diag
                numerator = j[1]
                denom<-j[2]
                print(paste(numerator, denom, sep = "_"))
                #make DESeq object
                diagdds = phyloseq_to_deseq2(phy, ~ Rep + SampleType)
                diagdds = DESeq(diagdds, fitType="local")
#               for (k in (j+1):length(GenoList)) { # k = index of 2nd genotype
#                 geno2<-GenoList[k]
#               #  print(paste0(geno1,".",geno2)) # debugging line to make sure all pairwise comparisons are represented
           # get log2 fold change between this pair of genotypes
              r =  results(diagdds,contrast=c('SampleType',numerator,denom),parallel=TRUE) %>% as.data.frame %>% 
                    mutate(OTU=row.names(.),Subset = i, Term='SampleType',Contrast=paste0(numerator,'_',denom),DAI='27') 
             df.l2fc = rbind(df.l2fc, r)
                     }
          }

i = "Dryden-RI"
RIContrastList = list(c("CH","CS"), c("CH", "BS"), c("RH", "RS"), c("BH","BS"))

    for (j in RIContrastList) { 
                phy = subset_samples(tmp, SoilxFungus == i & SampleType %in% j)
                diag
                numerator = j[1]
                denom<-j[2]
                print(paste(numerator, denom, sep = "_"))
                #make DESeq object
                diagdds = phyloseq_to_deseq2(phy, ~ Rep + SampleType)
                diagdds = DESeq(diagdds, fitType="local")
#               for (k in (j+1):length(GenoList)) { # k = index of 2nd genotype
#                 geno2<-GenoList[k]
#               #  print(paste0(geno1,".",geno2)) # debugging line to make sure all pairwise comparisons are represented
           # get log2 fold change between this pair of genotypes
              r =  results(diagdds,contrast=c('SampleType',numerator,denom),parallel=TRUE) %>% as.data.frame %>% 
                    mutate(OTU=row.names(.),Subset = i, Term='SampleType',Contrast=paste0(numerator,'_',denom),DAI='27') 
             df.l2fc = rbind(df.l2fc, r)
        }

df.l2fc.Exp2 = df.l2fc
# #cleanup
diagdds = NULL
df.l2fc = NULL
r = NULL

In [None]:
taxa %>% head() 

In [None]:
out = df.l2fc.Exp2  %>% left_join(., taxa, by = c("OTU" = "ASV"))
out$Experiment = 2
head(out)
outfile = file.path(work_dir, 'dfl2fc-Exp2.txt')

In [None]:
write.table(out, file = outfile, row.names = FALSE, sep = '\t')

## Get Ri CH vs Gv BPOM contrasts

In [None]:
#sample_data(tmp)$SoilxFungus = paste(sample_data(tmp)$Soil, sample_data(tmp)$Fungus, sep = '-')
df.l2fc = NULL

phy = subset_samples(tmp, Soil == "Dryden")
phy = subset_samples(phy, SampleType == "BP" | Fungus == "RI" & SampleType == "CH")
    numerator = "CH"
    denom<-"BP"
              
#make DESeq object
    diagdds = phyloseq_to_deseq2(phy, ~ Rep + SampleType)
    diagdds = DESeq(diagdds, fitType="local")

r =  results(diagdds,contrast=c('SampleType',numerator,denom),parallel=TRUE) %>% as.data.frame %>% 
                    mutate(OTU=row.names(.),Subset = "Dryden-RI", Term='SampleType',Contrast=paste0(numerator,'_',denom),DAI='27') 

df.l2fc = rbind(df.l2fc, r)
        

#df.l2fc.Exp2 = df.l2fc
# #cleanup
diagdds = NULL
#df.l2fc = NULL
r = NULL

In [None]:
out = df.l2fc  %>% left_join(., taxa, by = c("OTU" = "ASV"))
out$Experiment = 2
head(out)
outfile = file.path(work_dir, 'dfl2fc-Exp2.txt')

In [None]:
write.table(out, file = outfile, row.names = FALSE, col.names = FALSE, sep = '\t', append = TRUE)

In [None]:
tmp = read.delim(outfile, header= TRUE, sep = '\t')
head(tmp)
tail(tmp)

# Experiment 1

In [None]:
tmp = ps %>% subset_samples(Experiment == 1)

In [None]:
sample_data(tmp)$DAI %>% unique

In [None]:
colnames(sample_data(tmp))

In [None]:
#sample_data(tmp)$SoilxFungus = paste(sample_data(tmp)$Soil, sample_data(tmp)$Fungus, sep = '-')
df.l2fc = NULL
Grouplist = c('Dryden-Gv', 'Florence-Gv', 'Pendelton-Gv')
ContrastList = list(c("CH","CS"), c("CH", "RT"), c("RT", "CS")) 

    for (i in Grouplist) {
            print(i)
          for (j in ContrastList) { 
                phy = subset_samples(tmp, SoilxFungus == i & SampleType %in% j)
                diag
                numerator = j[1]
                denom<-j[2]
                print(paste(numerator, denom, sep = "_"))
                #make DESeq object
                diagdds = phyloseq_to_deseq2(phy, ~ Rep + SampleType)
                diagdds = DESeq(diagdds, fitType="local")
#               for (k in (j+1):length(GenoList)) { # k = index of 2nd genotype
#                 geno2<-GenoList[k]
#               #  print(paste0(geno1,".",geno2)) # debugging line to make sure all pairwise comparisons are represented
           # get log2 fold change between this pair of genotypes
              r =  results(diagdds,contrast=c('SampleType',numerator,denom),parallel=TRUE) %>% as.data.frame %>% 
                    mutate(OTU=row.names(.),Subset = i, Term='SampleType',Contrast=paste0(numerator,'_',denom),DAI='81') 
             df.l2fc = rbind(df.l2fc, r)
                     }
          }



df.l2fc.Exp1 = df.l2fc
# #cleanup
diagdds = NULL
df.l2fc = NULL
r = NULL

In [None]:
out = df.l2fc.Exp1  %>% left_join(., taxa, by = c("OTU" = "ASV"))
out$Experiment = 1
head(out)
outfile = file.path(work_dir, 'dfl2fc-Exp1.txt')

In [None]:
write.table(out, file = outfile, row.names = FALSE, sep = '\t')

# Experiment 3 sample type

In [None]:
tmp = ps %>% subset_samples(Experiment == 3)
sample_data(tmp)$DAI %>% unique
sample_data(tmp)$SampleType %>% unique()
colnames(sample_data(tmp))

In [None]:
sample_data(tmp)$SoilxFungus = paste(sample_data(tmp)$Soil, sample_data(tmp)$Fungus, sep = '-')
sample_data(tmp)$Rep = factor(sample_data(tmp)$Rep)
sample_data(tmp)$DAI = factor(sample_data(tmp)$DAI)
df.l2fc = NULL

Grouplist = c('Dryden-Gv')

# Date 65 does not have root hyphae samples, so run in separate loop
DateList = c(14, 24, 35, 45)
# Contrasts for first for sampling dates
ContrastList = list(c("CH","CS"), c("CH", "BS"), c("CH", "BP"), c("RH", "RT"), c("RH","CS"), c("BP", "CS"), c("BP", "BS")) 


    for (i in Grouplist) {
            print(i)
        for (k in DateList) {
                print(k)
          for (j in ContrastList) { 
              print(j)
                phy = subset_samples(tmp, SoilxFungus == i & SampleType %in% j & DAI == k)
                diag
                numerator = j[1]
                denom<-j[2]
                print(paste(numerator, denom, sep = "_"))
                #make DESeq object
                diagdds = phyloseq_to_deseq2(phy, ~ Rep + SampleType)
                diagdds = DESeq(diagdds, fitType="local", parallel = TRUE)
#               for (k in (j+1):length(GenoList)) { # k = index of 2nd genotype
#                 geno2<-GenoList[k]
#               #  print(paste0(geno1,".",geno2)) # debugging line to make sure all pairwise comparisons are represented
           # get log2 fold change between this pair of genotypes
              r =  results(diagdds,contrast=c('SampleType',numerator,denom),parallel=TRUE) %>% as.data.frame %>% 
                    mutate(OTU=row.names(.),Subset = i, Term='SampleType',Contrast=paste0(numerator,'_',denom),DAI=k) 
             df.l2fc = rbind(df.l2fc, r)
                     }
            }
          }



# Date 65 does not have root hyphae samples, so run in separate loop
DateList = 65
# Contrast list for day 65
ContrastList = list(c("CH","CS"), c("CH", "BS"), c("CH", "BP"), c("BP", "CS"), c("BP", "BS")) 

    for (i in Grouplist) {
            print(i)
        for (k in DateList) {
                print(k)
          for (j in ContrastList) { 
              print(j)
                phy = subset_samples(tmp, SoilxFungus == i & SampleType %in% j & DAI == k)
                numerator = j[1]
                denom<-j[2]
                print(paste(numerator, denom, sep = "_"))
                #make DESeq object
                diagdds = phyloseq_to_deseq2(phy, ~ Rep + SampleType)
                diagdds = DESeq(diagdds, fitType="local", parallel = TRUE)
#               for (k in (j+1):length(GenoList)) { # k = index of 2nd genotype
#                 geno2<-GenoList[k]
#               #  print(paste0(geno1,".",geno2)) # debugging line to make sure all pairwise comparisons are represented
           # get log2 fold change between this pair of genotypes
              r =  results(diagdds,contrast=c('SampleType',numerator,denom),parallel=TRUE) %>% as.data.frame %>% 
                    mutate(OTU=row.names(.),Subset = i, Term='SampleType',Contrast=paste0(numerator,'_',denom),DAI=k) 
             df.l2fc = rbind(df.l2fc, r)
                     }
            }
          }

df.l2fc.Exp3 = df.l2fc
# #cleanup
diagdds = NULL
df.l2fc = NULL
r = NULL

In [None]:
out = df.l2fc.Exp3  %>% left_join(., taxa, by = c("OTU" = "ASV"))
out$Experiment = 3
head(out)
outfile = file.path(work_dir, 'dfl2fc-Exp3.txt')

In [None]:
write.table(out, file = outfile, row.names = FALSE, sep = '\t')

# Experiment 3 time

In [None]:
tmp = ps %>% subset_samples(Experiment == 3)
sample_data(tmp)$DAI %>% unique
sample_data(tmp)$SampleType %>% unique()
colnames(sample_data(tmp))

In [None]:
sample_data(tmp)$SoilxFungus = paste(sample_data(tmp)$Soil, sample_data(tmp)$Fungus, sep = '-')
sample_data(tmp)$Rep = factor(sample_data(tmp)$Rep)
sample_data(tmp)$DAI = factor(sample_data(tmp)$DAI)
df.l2fc = NULL

Grouplist = c("BS", "CS", "RH", "CH", "BP")

# Date 65 does not have root hyphae samples, so run in separate loop
#DateList = c(14, 24, 35, 45)
# Contrasts for first for sampling dates
ContrastList = list(c("24","14"), c("35","14"), c("45","14"), c("65","14")) 


    for (i in Grouplist) {
            print(i)
          for (j in ContrastList) { 
              print(j)
    phy = subset_samples(tmp, SampleType == i & DAI %in% j )
                numerator = j[1]
                denom<-j[2]
                print(paste(numerator, denom, sep = "_"))
                #make DESeq object
                diagdds = phyloseq_to_deseq2(phy, ~ Rep + DAI)
                diagdds = DESeq(diagdds, fitType="local", parallel = TRUE)
#               for (k in (j+1):length(GenoList)) { # k = index of 2nd genotype
#                 geno2<-GenoList[k]
#               #  print(paste0(geno1,".",geno2)) # debugging line to make sure all pairwise comparisons are represented
           # get log2 fold change between this pair of genotypes
              r =  results(diagdds,contrast=c('DAI',numerator,denom),parallel=TRUE) %>% as.data.frame %>% 
                    mutate(OTU=row.names(.),Subset = "Dryden-Gv", SampleType = i, Term='TimePoint',Contrast=paste0(numerator,'_',denom)) 
             df.l2fc = rbind(df.l2fc, r)
            }
         } 




df.l2fc.Exp3.time = df.l2fc
# #cleanup
diagdds = NULL
df.l2fc = NULL
r = NULL

In [None]:
out = df.l2fc.Exp3.time  %>% left_join(., taxa, by = c("OTU" = "ASV"))
out$Experiment = 3
head(out)
outfile = file.path(work_dir, 'dfl2fc-Exp3-time.txt')

In [None]:
write.table(out, file = outfile, row.names = FALSE, sep = '\t')

# Experiment 2 Nutrient Treatment

In [None]:
tmp = ps %>% subset_samples(Experiment == 2 & SampleType == "CH" & Soil %in% c("Dryden", "Florence"))

In [None]:
sample_data(tmp)$Treatment %>% unique()

In [None]:
sample_data(tmp)$SoilxFungus = paste(sample_data(tmp)$Soil, sample_data(tmp)$Fungus, sep = '-')
sample_data(tmp)$Rep = factor(sample_data(tmp)$Rep)
sample_data(tmp)$DAI = factor(sample_data(tmp)$DAI)
df.l2fc = NULL

SoilFungusList = c("Dryden-Gv", "Dryden-RI", "Florence-Gv")




    for (i in SoilFungusList) {
            print(i)

    phy = subset_samples(tmp, SoilxFungus == i)
                numerator = "HP"
                denom<- "HN"
                print(paste(numerator, denom, sep = "_"))
                #make DESeq object
                diagdds = phyloseq_to_deseq2(phy, ~ Rep + Treatment)
                diagdds = DESeq(diagdds, fitType="local", parallel = TRUE)
#               for (k in (j+1):length(GenoList)) { # k = index of 2nd genotype
#                 geno2<-GenoList[k]
#               #  print(paste0(geno1,".",geno2)) # debugging line to make sure all pairwise comparisons are represented
           # get log2 fold change between this pair of genotypes
              r =  results(diagdds,contrast=c('Treatment',numerator,denom),parallel=TRUE, independentFiltering = TRUE) %>% as.data.frame %>% 
                    mutate(OTU=row.names(.),Subset = i, SampleType = "CH", Term='Treatment',Contrast=paste0(numerator,'_',denom)) 
             df.l2fc = rbind(df.l2fc, r)
            }
          




df.l2fc.Exp2.treatment = df.l2fc
# #cleanup
diagdds = NULL
df.l2fc = NULL
r = NULL

In [None]:
out = df.l2fc.Exp2.treatment  %>% left_join(., taxa, by = c("OTU" = "ASV"))
out$Experiment = 2
head(out)
outfile = file.path(work_dir, 'dfl2fc-Exp2-treatment.txt')

In [None]:
write.table(out, file = outfile, row.names = FALSE, sep = '\t')

# Checkpoint
read back in log2foldchange files

In [None]:
readfile = file.path(work_dir, 'dfl2fc-Exp1.txt')
df.l2fc.Exp1 = read.table(readfile, header = TRUE, sep= '\t')

readfile = file.path(work_dir, 'dfl2fc-Exp2.txt')
df.l2fc.Exp2 = read.table(readfile, header = TRUE, sep= '\t')

readfile = file.path(work_dir, 'dfl2fc-Exp3.txt')
df.l2fc.Exp3 = read.table(readfile, header = TRUE, sep= '\t')

readfile = file.path(work_dir, 'dfl2fc-Exp3-time.txt')
df.l2fc.Exp3.time = read.table(readfile, header = TRUE, sep= '\t')

head(df.l2fc.Exp1)

In [None]:
df.l2fc.Exp1$Contrast %>% levels()
df.l2fc.Exp1$Subset %>% levels()

df.l2fc.Exp2$Contrast %>% levels()
df.l2fc.Exp2$Subset %>% levels()

## Identify how many ASVs are enriched in multiple soils

In [None]:
tmp = df.l2fc.Exp1 %>% 
    filter(Contrast == "CH_CS" & padj < 0.05 & log2FoldChange > 0) %>%
    group_by(OTU) %>%
    summarize(nSig = length(padj)) %>%
    group_by(nSig) %>%
    summarize(nGroup = length(OTU))
head(tmp)
range(tmp$nSig)

In [None]:
tmp = df.l2fc.Exp1 %>% 
    filter(Contrast == "CH_CS" & padj < 0.05 & log2FoldChange > 0) %>%
    group_by(OTU, Phylum, Class, Order, Family, Genus) %>%
    summarize(nSig = length(padj)) %>%
    filter(nSig > 1)
tmp

In [None]:
tmp = df.l2fc.Exp2 %>% 
    filter(Contrast == "CH_CS" & padj < 0.05 & log2FoldChange > 0 & Subset != "Dryden-RI") %>%
    group_by(OTU) %>%
    summarize(nSig = length(padj)) %>%
    group_by(nSig) %>%
    summarize(nGroup = length(OTU))
head(tmp)
range(tmp$nSig)

In [None]:
tmp = df.l2fc.Exp2 %>% 
    filter(Contrast == "CH_CS" & padj < 0.05 & log2FoldChange > 0 & Subset != "Dryden-RI") %>%
    group_by(OTU, Phylum, Class, Order, Family, Genus) %>%
    summarize(nSig = length(padj)) %>%
    filter(nSig > 1)
tmp
range(tmp)

## Number of ASVs enriched in any soil, either experiment

In [None]:
HA.exp1 = df.l2fc.Exp1 %>%
  filter(Contrast == "CH_CS" & padj < 0.05 & log2FoldChange > 0 & Subset != "Dryden-RI") %>%
    .$OTU %>%
    unique() 

HA.exp1 %>%
    length() %>%
    paste(., "ASVs enriched in hyphal samples in any soil in experiment 1")

HA.exp2 = df.l2fc.Exp2 %>%
  filter(Contrast == "CH_CS" & padj < 0.05 & log2FoldChange > 0 & Subset != "Dryden-RI") %>%
    .$OTU %>%
    unique() 

HA.exp2 %>%
    length() %>%
    paste(., "ASVs enriched in hyphal samples in any soil in experiment 2")

append(HA.exp1, HA.exp2) %>% unique() %>% length() %>%
    paste(., "ASVs enriched in hyphal samples in a particular soil in at least one of the two experiments")

### Hyphal ASVs enriched in other comparisons

In [None]:
df.l2fc.Exp2 %>%
  filter(Contrast == "RH_RS" & padj < 0.05 & log2FoldChange > 0 & Subset != "Dryden-RI") %>%
    .$OTU %>%
    unique() %>%
    length() %>%
    paste(., "hyphal ASVs enriched in hyphae of the root compartment compared to the root slurry")

df.l2fc.Exp2 %>%
  filter(Contrast == "CH_BS" & padj < 0.05 & log2FoldChange > 0 & Subset != "Dryden-RI") %>%
    .$OTU %>%
    unique() %>%
    length() %>%
    paste(., "hyphal ASVs enriched in hyphae compared to bulk soil")

## Which ASVs are shared between Ri And Gv

In [None]:
RIASVs = df.l2fc.Exp2 %>% 
    filter(Contrast == "CH_CS" & padj < 0.05 & log2FoldChange > 0 & Subset %in% c("Dryden-RI")) %>%
    .$OTU %>%
    unique() 
    paste(length(RIASVs), "ASVs enriched in Ri hyphal samples")

GVASVs = df.l2fc.Exp2 %>% 
    filter(Contrast == "CH_CS" & padj < 0.05 & log2FoldChange > 0 & Subset %in% c("Dryden-Gv")) %>%
    .$OTU %>%
    unique() 
    paste(length(GVASVs), "ASVs enriched in Gv hyphal samples")

sharedASVs = intersect(RIASVs, GVASVs)
# tmp = df.l2fc.Exp2 %>% 
#     filter(Contrast == "CH_CS" & padj < 0.05 & log2FoldChange > 0 & Subset %in% c("Dryden-RI", 
#                                                                                  "Dryden-Gv")) %>%
#     group_by(OTU, Phylum, Class, Order, Family, Genus) %>%
#     summarize(nSig = length(padj)) %>%
#     filter(nSig > 1)
           
paste(length(sharedASVs), 'ASVs enriched in both Ri and Gv hyphal samples in Dryden soil', sep = '')


draw.pairwise.venn(area1 = length(RIASVs), area2 = length(GVASVs), 
cross.area = length(sharedASVs), category = c("R. irregularis", "G. versiforme"), lty = rep(2, 2), 
                 fill = c("blue",  "red"), cex = 2, cat.cex = 2, scaled = TRUE, rotation.degree = 180, 
                   euler.d = TRUE, cat.pos = c(0,0))

outfile = file.path(figDir,'Ri_GV_Dryden_Venn.pdf')
pdf(file = outfile, width = 3, height = 3)

draw.pairwise.venn(area1 = length(RIASVs), area2 = length(GVASVs), 
cross.area = length(sharedASVs), category = c("R. irregularis", "G. versiforme"), lty = rep(2, 2), 
                 fill = c("blue",  "red"), cex = 2, cat.cex = 2, scaled = TRUE, rotation.degree = 180, 
                   euler.d = TRUE, cat.pos = c(0,0))
dev.off()

## Which ASVs are enriched in both experiments in the same soil

In [None]:
# Get lists of experiment 1 and three lists of experiment 2, see which match
HA1.Dryden = df.l2fc.Exp1 %>% 
    filter(Contrast == "CH_CS" & padj < 0.05 & log2FoldChange > 0 & Subset == "Dryden-Gv")
HA2.Dryden = df.l2fc.Exp2 %>% 
    filter(Contrast == "CH_CS" & padj < 0.05 & log2FoldChange > 0 & Subset == "Dryden-Gv")

ConsistentDrydenASVs = inner_join(HA1.Dryden, HA2.Dryden, by = "OTU")
ConsistentDrydenASVs

In [None]:
# Get lists of experiment 1 and three lists of experiment 2, see which match
HA1.Florence = df.l2fc.Exp1 %>% 
    filter(Contrast == "CH_CS" & padj < 0.05 & log2FoldChange > 0 & Subset == "Florence-Gv")
HA2.Florence = df.l2fc.Exp2 %>% 
    filter(Contrast == "CH_CS" & padj < 0.05 & log2FoldChange > 0 & Subset == "Florence-Gv")

ConsistentFlorenceASVs  = inner_join(HA1.Florence, HA2.Florence, by = "OTU")
ConsistentFlorenceASVs

In [None]:
# Get lists of experiment 1 and three lists of experiment 2, see which match
HA1.Pendelton = df.l2fc.Exp1 %>% 
    filter(Contrast == "CH_CS" & padj < 0.05 & log2FoldChange > 0 & Subset == "Pendelton-Gv")
HA2.Pendelton = df.l2fc.Exp2 %>% 
    filter(Contrast == "CH_CS" & padj < 0.05 & log2FoldChange > 0 & Subset == "Pendelton-Gv")

ConsistentPendeltonASVs = inner_join(HA1.Pendelton, HA2.Pendelton, by = "OTU")

ConsistentPendeltonASVs

In [None]:
SuperConsistentASVs = inner_join(ConsistentDrydenASVs, ConsistentFlorenceASVs, by = "OTU") %>% 
        select(OTU, Phylum.x.x, Class.x.x, Order.x.x, Family.x.x, Genus.x.x) %>%
        inner_join(., ConsistentPendeltonASVs, by = "OTU") %>% 
        select(OTU, Phylum.x.x, Class.x.x, Order.x.x, Family.x.x, Genus.x.x)
SuperConsistentASVs

## Review ASV found enriched in both experiments in all soils for signature of contamination
* Sandaracinaceae ASV14 and ASV18 is enriched in all soils in all experiments 

In [None]:
physeq_file = "~/Hyphosphere/data/3Exp/phyloseq/3Exp_phyloseq_thresh_woTree_woContam.rds"

ps = readRDS(physeq_file) %>% subset_taxa(., !is.na(Phylum))
    
    # T0R1 has all NaN for some reason
    #subset_samples(!Sample == "T0R1") #%>%
     # re-normalize after subsetting taxa
    #transform_sample_counts(., function(x) x/sum(x))
sample_data(ps)$Rep = as.factor(sample_data(ps)$Rep)

ps


In [None]:
ps %>% 
    subset_samples(SampleType == "T0") %>%
    transform_sample_counts(., function(x) x/sum(x)) %>%
   prune_taxa(c('ASV14', "ASV18"), .) %>%
   otu_table()

In [None]:
ps %>% 
    subset_samples(Experiment == 2 & SampleType %in% c("BS", "BH", "CH", "CS", "RH", "RS") & Fungus == "Gv") %>%
    transform_sample_counts(., function(x) x/sum(x)) %>%
   prune_taxa("ASV18",.)  %>%
    psmelt() %>%
    mutate(AbundScale = log2(Abundance + .000001)) %>% 
    ggplot(., aes(x = SampleType, y= AbundScale)) +
    geom_boxplot()+
    geom_point()+
    facet_grid(Soil~SampleType, scales = 'free_x')+
    #ggtitle(as.character(cTax))+
    ylab('Relative abundance (log2)')

* ASV18 is detected in Bulk hyphal samples that are not otherwise low biomass and prone to contamination.  In contrast it is absent from all bulk soil samples without the hyphal influence.  Suggests clear hyphal signal.  Also less abundant in the root compartment slurry samples which had the lowest DNA yields of any sampletype

In [None]:
ps %>% 
    subset_samples(Experiment == 2 & SampleType %in% c("BS", "BH", "CH", "CS", "RH", "RS") & Fungus == "RI") %>%
    transform_sample_counts(., function(x) x/sum(x)) %>%
   prune_taxa("ASV18",.)  %>%
    psmelt() %>%
    mutate(AbundScale = log2(Abundance + .000001)) %>% 
    ggplot(., aes(x = SampleType, y= AbundScale)) +
    geom_point()+
    geom_boxplot()+
    facet_grid(Soil~SampleType, scales = 'free_x')+
    #ggtitle(as.character(cTax))+
    ylab('Relative abundance (log2)')

* Ri would not have carried ASV14 or ASV18 in on spores.  Suggesting that it may also have come from the soil or at least cross contamination in the experiment.  Root slurry is still lower than root hyphae.  RH had higher DNA yields, so this is not consistent with contamination.


# Which ASVs are time specific

In [None]:
df.l2fc.Exp3 %>% 
    filter(Contrast == "CH_CS" & padj < 0.05 & log2FoldChange > 0 & Subset == "Dryden-Gv") %>%
        .$OTU %>%
        unique() %>%
        length() %>%
paste(., "ASVs enriched in at least one timepoint")

df.l2fc.Exp3 %>% 
    filter(Contrast == "CH_CS" & padj < 0.05 & log2FoldChange > 0 & Subset == "Dryden-Gv") %>%
     group_by(OTU, Phylum, Class, Order, Family, Genus) %>%
     summarize(nSig = length(padj)) %>%
     filter(nSig > 1) %>%
     .$OTU %>%
        unique() %>%
        length() %>%
paste(.,'ASVs enriched in more than one time point')

df.l2fc.Exp3 %>% 
    filter(Contrast == "CH_CS" & padj < 0.05 & log2FoldChange > 0 & Subset == "Dryden-Gv") %>%
     group_by(OTU, Phylum, Class, Order, Family, Genus) %>%
     summarize(nSig = length(padj)) %>%
     filter(nSig > 2) %>%
     .$OTU %>%
        unique() %>%
        length() %>%
paste(.,'ASVs enriched in more than two time points')

df.l2fc.Exp3 %>% 
    filter(Contrast == "CH_CS" & padj < 0.05 & log2FoldChange > 0 & Subset == "Dryden-Gv") %>%
     group_by(OTU, Phylum, Class, Order, Family, Genus) %>%
     summarize(nSig = length(padj)) %>%
     filter(nSig > 3) %>%
     .$OTU %>%
        unique() %>%
        length() %>%
paste(.,'ASVs enriched in more than three time points')

tmp = df.l2fc.Exp3 %>% 
    filter(Contrast == "CH_CS" & padj < 0.05 & log2FoldChange > 0 & Subset == "Dryden-Gv") %>%
     group_by(OTU, Phylum, Class, Order, Family, Genus) %>%
     summarize(nSig = length(padj)) %>%
     filter(nSig > 4)

tmp %>%
     .$OTU %>%
        unique() %>%
        length() %>%
paste(.,'ASVs enriched in more than four time points')

tmp


## How many hyphal ASVs at each time point

In [None]:
df.l2fc.Exp3 %>% 
    filter(Contrast == "CH_CS" & padj < 0.05 & log2FoldChange > 0 & Subset == "Dryden-Gv" & DAI == 14) %>%
        .$OTU %>%
        unique() %>%
        length() %>%
paste(., "ASVs enriched at 14 days")

df.l2fc.Exp3 %>% 
    filter(Contrast == "CH_CS" & padj < 0.05 & log2FoldChange > 0 & Subset == "Dryden-Gv" & DAI == 24) %>%
        .$OTU %>%
        unique() %>%
        length() %>%
paste(., "ASVs enriched at 24 days")

df.l2fc.Exp3 %>% 
    filter(Contrast == "CH_CS" & padj < 0.05 & log2FoldChange > 0 & Subset == "Dryden-Gv" & DAI == 35) %>%
        .$OTU %>%
        unique() %>%
        length() %>%
paste(., "ASVs enriched at 35 days")

df.l2fc.Exp3 %>% 
    filter(Contrast == "CH_CS" & padj < 0.05 & log2FoldChange > 0 & Subset == "Dryden-Gv" & DAI == 45) %>%
        .$OTU %>%
        unique() %>%
        length() %>%
paste(., "ASVs enriched at 45 days")

df.l2fc.Exp3 %>% 
    filter(Contrast == "CH_CS" & padj < 0.05 & log2FoldChange > 0 & Subset == "Dryden-Gv" & DAI == 65) %>%
        .$OTU %>%
        unique() %>%
        length() %>%
paste(., "ASVs enriched at 65 days")

In [None]:
## what proportion of reads are hyphal ASVs over time

In [None]:
Exp3ASVs = df.l2fc.Exp3 %>% 
    filter(Contrast == "CH_CS" & padj < 0.05 & log2FoldChange > 0 & Subset == "Dryden-Gv") %>%
        .$OTU %>%
        unique() %>%
        as.character()

paste(length(Exp3ASVs), " ASVs enriched in hyphal samples in Experiment 3")

In [None]:
Exp3.HASV.m = ps %>% 
transform_sample_counts(., function(x) x/sum(x)) %>%
prune_taxa(Exp3ASVs, .) %>% 
subset_samples(SampleType == "CH" & Experiment == 3) %>%
psmelt()
                        
head(Exp3.HASV.m)

In [None]:
Exp3.HASV.m %>% 
    group_by(DAI, Rep) %>% 
    summarize(sAbund = sum(Abundance)) %>% 
    group_by(DAI) %>% 
    summarize(HASV_abund = mean(sAbund),
             HASV_abund.se = sd(sAbund)/sqrt(length(sAbund)))

# Which Hyphal ASVs change with time

In [None]:
HASVs = df.l2fc.Exp3 %>%
    filter(Contrast == "CH_CS" & padj < 0.05 & log2FoldChange > 0 & Subset == "Dryden-Gv") %>%
    .$OTU %>%
    unique()
length(HASVs)

df.THASVs = df.l2fc.Exp3.time %>%
    filter(SampleType == "CH" & padj < 0.05 & OTU %in% HASVs)
dim(df.THASVs)

In [None]:
df.THASVs %>% head()

In [None]:
tmp = df.THASVs %>% select(OTU, Phylum, Class, Order, Family, Genus)
dft.CH.padj = df.THASVs %>%
    select(OTU,padj,Contrast) %>% 
    dcast(OTU ~ Contrast, value.var = "padj") 
colnames(dft.CH.padj)[-1] = c(paste(colnames(dft.CH.padj)[-1],'padj', sep = '.'))

dft.CH.l2fc = df.THASVs %>%
    select(OTU,log2FoldChange,Contrast) %>% 
    dcast(OTU ~ Contrast, value.var = "log2FoldChange") 
colnames(dft.CH.l2fc)[-1] = c(paste(colnames(dft.CH.l2fc)[-1],'l2fc', sep = '.'))

dft = full_join(dft.CH.padj, dft.CH.l2fc, by = "OTU") %>%
                select(OTU,"35_14.l2fc", "35_14.padj", "45_14.l2fc", "45_14.padj","65_14.l2fc", "65_14.padj" ) %>%
      left_join(., tmp, by = "OTU")
head(dft)

### write file of logfold change for HASVs over time

In [None]:
outfile = file.path(figDir, 'HASVs_l2fcTime.txt')
write.table(dft, file = outfile, sep = '\t', row.names = FALSE)

# Which hyphal ASVs are associated with POM

### Gv

In [None]:
HASVs = df.l2fc.Exp2 %>% 
    filter(Contrast == "CH_CS" & padj < 0.05 & log2FoldChange > 0 & Subset %in% c("Dryden-Gv", "Florence-Gv", "Pendelton-Gv")) %>%
    .$OTU %>%
    unique() 
paste(length(HASVs), 'enriched in hyphal samples compared to core slurry in at least one of three soils')

#Currently cannot check Ri as BPOM was not collected in those mesocosms.  
#Could run Deseq model against Gv BPOM in Dryden soil since it was from the bulk compartment anyway

BPASVs = df.l2fc.Exp2 %>% 
    filter(Contrast == "BP_CS" & padj < 0.05 & log2FoldChange > 0 & Subset %in% c("Dryden-Gv", "Florence-Gv", "Pendelton-Gv")) %>%
    .$OTU %>%
    unique() 
paste(length(BPASVs), 'enriched in BP samples compared to CS in at least one of three soils')

NonSpecificASVs = intersect(HASVs, BPASVs) 
NonSpecificASVs %>% length() %>% paste(., "Gv ASVs also enriched BPOM samples compared to CS")



tmp = df.l2fc.Exp2 %>%
    filter(Contrast == "CH_BP" & OTU %in% GvASVs) %>%
    filter(OTU %in% BPASVs)

dim(tmp)

tmp %>% 
    group_by(Order) %>%
    summarise(count = length(Order))

tmp
    

In [None]:
library(VennDiagram)

In [None]:
#area 1 = hybrid
#area 2 = Fert
#area 3 = Enzyme
draw.pairwise.venn(area1 = 221, area2 = 299, 
cross.area = 34, category = c("Hyphae", "POM"), lty = rep(2, 2), 
                 fill = c("blue",  "red"), cex = 2, cat.cex = 2, scaled = TRUE, rotation.degree = 180, 
                   euler.d = TRUE, cat.pos = c(0,0))

outfile = file.path(figDir,'CH-BP_Venn_Gv.pdf')
pdf(file = outfile, width = 3, height = 3)

draw.pairwise.venn(area1 = 221, area2 = 299, 
cross.area = 34, category = c("Hyphae", "POM"), lty = rep(2, 2), 
                 fill = c("blue",  "red"), cex = 2, cat.cex = 2, scaled = TRUE, rotation.degree = 180, 
                   euler.d = TRUE, cat.pos = c(0,0))
dev.off()


### Ri

In [None]:
HASVs = df.l2fc.Exp2 %>% 
    filter(Contrast == "CH_CS" & padj < 0.05 & log2FoldChange > 0 & Subset %in% c("Dryden-RI")) %>%
    .$OTU %>%
    unique() 
paste(length(HASVs), 'enriched in Ri hyphal samples compared to core slurry in Dryden soils')

#Currently cannot check Ri as BPOM was not collected in those mesocosms.  
#Could run Deseq model against Gv BPOM in Dryden soil since it was from the bulk compartment anyway

BPASVs = df.l2fc.Exp2 %>% 
    filter(Contrast == "BP_CS" & padj < 0.05 & log2FoldChange > 0 & Subset %in% c("Dryden-Gv")) %>%
    .$OTU %>%
    unique() 
paste(length(BPASVs), 'enriched in BP samples compared to CS in Dryden soils')

NonSpecificASVs = intersect(HASVs, BPASVs) 
NonSpecificASVs %>% length() %>% paste(., "Ri ASVs also enriched BPOM samples compared to CS")

In [None]:
draw.pairwise.venn(area1 = 236, area2 = 191, 
cross.area = 35, category = c("Hyphae", "POM"), lty = rep(2, 2), 
                 fill = c("blue",  "red"), cex = 2, cat.cex = 2, scaled = TRUE, rotation.degree = 0, 
                   euler.d = TRUE, cat.pos = c(0,0))

outfile = file.path(figDir,'CH-BP_Venn_Ri.pdf')
pdf(file = outfile, width = 3, height = 3)

draw.pairwise.venn(area1 = 236, area2 = 191, 
cross.area = 35, category = c("Hyphae", "POM"), lty = rep(2, 2), 
                 fill = c("blue",  "red"), cex = 2, cat.cex = 2, scaled = TRUE, rotation.degree = 0, 
                   euler.d = TRUE, cat.pos = c(0,0))
dev.off()


## Overlap between enriched ASVs in hyphal compartment and root compartment

### Gv

In [None]:
HASVs = df.l2fc.Exp2 %>% 
    filter(Contrast == "CH_CS" & padj < 0.05 & log2FoldChange > 0 & Subset %in% c("Dryden-Gv", "Florence-Gv", "Pendelton-Gv")) %>%
    .$OTU %>%
    unique() 
paste(length(HASVs), 'enriched in hyphal samples compared to core slurry in at least one of three soils')

#Currently cannot check Ri as BPOM was not collected in those mesocosms.  
#Could run Deseq model against Gv BPOM in Dryden soil since it was from the bulk compartment anyway

RHASVs = df.l2fc.Exp2 %>% 
    filter(Contrast == "RH_RS" & padj < 0.05 & log2FoldChange > 0 & Subset %in% c("Dryden-Gv", "Florence-Gv", "Pendelton-Gv")) %>%
    .$OTU %>%
    unique() 
paste(length(RHASVs), 'enriched in root hyphal samples compared to root sand slurry in at least one of three soils')

SharedASVs = intersect(HASVs, RHASVs) 
SharedASVs %>% length() %>% paste(., "Gv ASVs also enriched in root hyphal samples compared to root sand slurry")


draw.pairwise.venn(area1 = length(HASVs), area2 = length(RHASVs), 
cross.area = length(SharedASVs), category = c("Core Hyphae", "Root Hyphae"), lty = rep(2, 2), 
                 fill = c("blue",  "red"), cex = 2, cat.cex = 2, scaled = TRUE, rotation.degree = 0, 
                   euler.d = TRUE, cat.pos = c(0,0))


df.l2fc.Exp2 %>%
    filter(OTU %in% RHASVs) %>%
        select(OTU, Phylum, Class, Order, Family, Genus) %>%
        unique()
#outfile = file.path(figDir,'CH-BP_Venn_Ri.pdf')
# pdf(file = outfile, width = 3, height = 3)

# draw.pairwise.venn(area1 = 236, area2 = 191, 
# cross.area = 35, category = c("Hyphae", "POM"), lty = rep(2, 2), 
#                  fill = c("blue",  "red"), cex = 2, cat.cex = 2, scaled = TRUE, rotation.degree = 0, 
#                    euler.d = TRUE, cat.pos = c(0,0))
# dev.off()

### Ri

In [None]:
HASVs = df.l2fc.Exp2 %>% 
    filter(Contrast == "CH_CS" & padj < 0.05 & log2FoldChange > 0 & Subset %in% c("Dryden-RI")) %>%
    .$OTU %>%
    unique() 
paste(length(HASVs), 'enriched in RI hyphal samples from Dryden')

#Currently cannot check Ri as BPOM was not collected in those mesocosms.  
#Could run Deseq model against Gv BPOM in Dryden soil since it was from the bulk compartment anyway

RHASVs = df.l2fc.Exp2 %>% 
    filter(Contrast == "RH_RS" & padj < 0.05 & log2FoldChange > 0 & Subset %in% c("Dryden-RI")) %>%
    .$OTU %>%
    unique() 
paste(length(RHASVs), 'enriched in root hyphal samples compared to root sand slurry from Dryden')

SharedASVs = intersect(HASVs, RHASVs) 
SharedASVs %>% length() %>% paste(., "ASVs enriched in hyphal samples of both root and soil compartments")


draw.pairwise.venn(area1 = length(HASVs), area2 = length(RHASVs), 
cross.area = length(SharedASVs), category = c("Core Hyphae", "Root Hyphae"), lty = rep(2, 2), 
                 fill = c("blue",  "red"), cex = 2, cat.cex = 2, scaled = TRUE, rotation.degree = 0, 
                   euler.d = TRUE, cat.pos = c(0,0))

# Check hyphal ASVs can be detected in targeted hypothesis test

In [None]:
RiASVs = df.l2fc.Exp2 %>% 
    filter(Contrast == "CH_CS" & padj < 0.05 & log2FoldChange > 0 & Subset %in% c("Dryden-RI")) %>%
    .$OTU %>%
    unique() 
length(RiASVs)

GvASVs = df.l2fc.Exp2 %>% 
    filter(Contrast == "CH_CS" & padj < 0.05 & log2FoldChange > 0 & Subset %in% c("Dryden-Gv")) %>%
    .$OTU %>%
    unique() 
length(GvASVs)

In [None]:
df.l2fc.Exp2 %>% 
    filter(Contrast == "BH_BS" & OTU %in% RiASVs & Subset %in% c("Dryden-RI")) %>%
    arrange(pvalue)

In [None]:
df.l2fc.Exp2 %>% 
    filter(Contrast == "BH_BS" & OTU %in% GvASVs & padj < 0.05 & log2FoldChange > 0) %>%
    arrange(pvalue)

# Check against abundance of MREs in BH samples

### Get MRE table from original phyloseq object

In [None]:
ps.MRE = readRDS("~/Hyphosphere/data/3Exp/phyloseq/3Exp_phyloseq_thresh_woTree_experimental_woContam.rds") %>%
        subset_samples(Fungus == "Gv" & Experiment == 2) %>%
        
         transform_sample_counts(., function(x) x/sum(x)) %>% 
         prune_taxa(c("ASV2", "ASV4", "ASV11"),.)

tax_table(ps.MRE)
        

In [None]:
ps.MRE %>% 
    psmelt() %>% 
    #filter(Soil %in% c("Dryden", "Pendelton", "Florence") & Fungus == "Gv")  %>%                                     
    group_by(sample_Sample, Soil, SampleType, Treatment, Rep) %>%
    summarise(sAbund = sum(Abundance)) %>%
    ggplot(., aes(x=interaction(Treatment,Rep), y= log2(sAbund))) +
    geom_point()+
    facet_grid(Soil~SampleType, scales = 'free_x')+
    ggtitle(as.character("MREs"))+
    theme(axis.text.x = element_blank())+
    ylab('Relative abundance (log2)')


In [None]:
MREtab = ps.MRE %>% 
    psmelt() %>% 
    filter(Soil != "Sand", Experiment == 2, SampleType %in% c("BS","BH") & Fungus == "Gv") %>%
    group_by(sample_Sample, Soil) %>%
    summarise(sAbund = sum(Abundance))

### Get groups - Myxococcales, Fibrobacteres, Burkholderiaceae, Chloroflexaceae, Oli

In [None]:
TopHASVOrderList = df.l2fc.Exp2 %>% 
    filter(Contrast == "CH_CS" & padj < 0.05 & log2FoldChange > 0 & Subset %in% 
           c("Dryden-Gv", "Florence-Gv", "Pendelton-Gv")) %>%
    group_by(Order) %>% 
    summarize(sL2FC = sum(log2FoldChange), n = length(Order)) %>%
    arrange(desc(sL2FC)) %>%
    filter(!is.na(Order))%>%
    .$Order %>%  
   as.character()
TopHASVOrderList[1:10]

## Get taxa relative abundance from phyloseq object without MREs

In [None]:
ps.rel = ps %>% 
transform_sample_counts(., function(x) x/sum(x)) 



In [None]:
TaxList = df.l2fc.Exp2 %>% 
    filter(Contrast == "CH_CS" & padj < 0.05 & log2FoldChange > 0 & Subset %in% 
           c("Dryden-Gv", "Florence-Gv", "Pendelton-Gv") & Order %in% TopHASVOrderList[1:5]) %>%
    .$OTU %>%
    as.character()%>%
    unique() 
 

Taxtab = ps.rel %>% 
    prune_taxa(TaxList, .) %>%
    subset_samples(Experiment == 2 & SampleType %in% c("BS","BH") & Fungus == "Gv") %>%
      psmelt() %>% 
     group_by(sample_Sample, Soil, SampleType, Order) %>%
    summarise(sAbund = sum(Abundance))
 head(Taxtab)

In [None]:
cs = brewer.pal(3,"Set2")
df = full_join(MREtab,Taxtab, by = "sample_Sample") 
head(df)

In [None]:
df = full_join(MREtab,Taxtab, by = c("sample_Sample","Soil")) %>%
               rename(MRE_rel = sAbund.x, HyphalASVs_rel = sAbund.y)

head(df)
#tmp = df %>% filter(SampleType == "BH")

In [None]:
p1 = df %>%
    #filter(OTU %in% c("ASV14", "ASV18")) %>%
    ggplot(.,aes(x = MRE_rel, y = HyphalASVs_rel)) +
        geom_jitter(aes(fill = Soil, shape = SampleType)) +
        theme_pub()+
        scale_shape_manual(values = c(21,22))+
        #scale_x_continuous(trans='log2')+
        #scale_y_continuous(trans='log2')+
        scale_fill_manual(values = cs, guide = guide_legend(ncol = 3)) +#, labels = c("Bulk hyphosphere", "Bulk soil"))+
        ylab("Hyphal associated taxa relative abundance") +
        xlab("MRE relative abundance")+
       #annotate(geom="text", x=0.0005, y=.0020, label=EqLabel,
       #      color="black")+
        geom_smooth(aes(x = MRE_rel, y = HyphalASVs_rel), method='lm', se = FALSE,  lty = 2)+
       facet_wrap(~Order, scales = "free_y", ncol = 1)+
        theme(legend.position = 'bottom')+
        guides(fill=guide_legend(override.aes=list(shape = 21, fill= cs), ncol = 3))



p1

In [None]:
tmp = df %>% filter(Order == "Betaproteobacteriales")
m1 = lm(HyphalASVs_rel~Soil + SampleType + MRE_rel, data = tmp)
tmp = anova(m1) %>% as.data.frame()
tmp[,2:5] = signif(tmp[,2:5], digits=2)
summary(m1)
Bm.df = ggtexttable(tmp, theme = ttheme("blank", base_size = 8))

In [None]:
tmp = df %>% filter(Order == "Myxococcales")
m2 = lm(HyphalASVs_rel~Soil + SampleType + MRE_rel, data = tmp)
tmp = anova(m2) %>% as.data.frame()
tmp[,2:5] = signif(tmp[,2:5], digits=2)
summary(m2)
Mm.df = ggtexttable(tmp, theme = ttheme("blank", base_size = 8))

In [None]:
tmp = df %>% filter(Order == "Fibrobacterales")
m3 = lm(HyphalASVs_rel~Soil + SampleType + MRE_rel, data = tmp)
tmp = anova(m3) %>% as.data.frame()
tmp[,2:5] = signif(tmp[,2:5], digits=2)
summary(m3)
Fm.df = ggtexttable(tmp, theme = ttheme("blank", base_size = 8))

In [None]:
tmp = df %>% filter(Order == "Chloroflexales")
m4 = lm(HyphalASVs_rel~Soil + SampleType + MRE_rel, data = tmp)
tmp = anova(m4) %>% as.data.frame()
tmp[,2:5] = signif(tmp[,2:5], digits=2)
summary(m4)
Cm.df = ggtexttable(tmp, theme = ttheme("blank", base_size = 8))

In [None]:
tmp = df %>% filter(Order == "Rhizobiales")
m5 = lm(HyphalASVs_rel~Soil + SampleType + MRE_rel, data = tmp)
tmp = anova(m5) %>% as.data.frame()
tmp[,2:5] = signif(tmp[,2:5], digits=2)
summary(m5)
Rm.df = ggtexttable(tmp, theme = ttheme("blank", base_size = 8))
#Rm.df = table_cell_font(Rm.df, row = .8, column = .8,
 #                     face = "bold")

In [None]:
ModelsForGraph = ggarrange(Bm.df, Cm.df, Fm.df, Mm.df, Rm.df, nullGrob(), ncol = 1, heights = c(1,1,1,1,1,.8))
PlotAndTables = ggarrange(p1, ModelsForGraph, ncol = 2)
PlotAndTables

filename = file.path(figDir,"HASVs_vs_MRE.pdf")

pdf(file = filename, width = 6.5, height = 8, useDingbats = FALSE)
PlotAndTables
dev.off()


In [None]:
anova(m1) %>% as.data.frame()  %>% 
    bind_rows(., as.data.frame(anova(m2))) 
    #rbind(.,cbind(,,,,,))

In [None]:
dfout = bind_rows(df.l2fc.Exp1, df.l2fc.Exp2)%>% bind_rows(., df.l2fc.Exp3) %>% filter(Contrast == "CH_CS")
fileout = file.path(work_dir, "CombinedCH_CS_l2fc.txt")
write.table(dfout, file = fileout, sep = '\t', row.names = FALSE)

In [None]:
tail(dfout)

In [None]:
sessionInfo()

In [None]:
citation()