In [None]:
library(tidyverse)
library(dplyr)
library(stringr)
library(microViz)
library(metagMisc)
library(phyloseq)
library(phylosignal)
library(ggplot2)
library(ggsci)
library(viridis)
library(vegan)

#Plants results

###Import kraken results - Plants

In [None]:
kraken.plants=read_tsv("/ebio/abt3_scratch/aborbon/taxonomy_profiles/plants/llmgp/llmgp/output_plants/kraken/all-combined-bracken.tsv",col_names=TRUE)
kraken.plants

#Relative abundance
kraken.plants.frac=select(kraken.plants,name,taxonomy_id,taxonomy,ends_with("frac"))
colnames(kraken.plants.frac)=gsub("_frac","",colnames(kraken.plants.frac))
kraken.plants

#Counts
kraken.plants.num=select(kraken.plants,name,taxonomy_id,taxonomy,ends_with("num"))
colnames(kraken.plants.num)=gsub("_num","",colnames(kraken.plants.frac))

###Create OTU table from kraken results

In [None]:
#otu counts
kraken_plants_otus=kraken.plants.frac %>%
  select(-c(taxonomy_id,taxonomy))

otus_kraken.plants=otu_table(kraken_plants_otus[,2:ncol(kraken_plants_otus)],taxa_are_rows = TRUE)
rownames(otus_kraken.plants)=kraken_plants_otus$name
otus_kraken.plants

#taxonomy table
tax.kraken.plants=kraken.plants %>%
  select(name,taxonomy)

tax.kraken.plants=tax_table(as.matrix(tax.kraken.plants[,2:ncol(tax.kraken.plants)]))
rownames(tax.kraken.plants)=kraken.plants$name


#metadata
View(mappingAll)
#ps_plants=ps_filter(filt.ps.normAll, biome_2 == "Plants")
mapping_ps_plants=sample_data(filter(mappingAll,biome_2=="Plants"))
rownames(mapping_ps_plants)=(filter(mappingAll,biome_2=="Plants"))$Sample


#Phyloseq kraken amato
ps.kraken.plants=phyloseq(otus_kraken.plants,tax.kraken.plants,mapping_ps_plants)
ps.kraken.plants

In [None]:

#Filter based on prevalence
filter_taxa_All.plants = phyloseq_filter_prevalence(ps.kraken.plants, prev.trh = 0.05, abund.trh = NULL,
  threshold_condition = "OR", abund.type = "total") #943 taxa and 809 samples

#Prune samples without any OTU
filter_taxa_All2.plants=prune_samples(sample_sums(filter_taxa_All.plants)>0,filter_taxa_All.plants)
filter_taxa_All2.plants

#Normalize relative abundances
filt.ps.relab.normAll.plants  = transform_sample_counts(filter_taxa_All2.plants, function(x) (x / sum(x)) ) %>%
                                ps_filter(biome_3 != "bioreactor metagenome")%>%
                                ps_filter(!is.na(family))

filt.ps.relab.normAll.plants@sam_data

In [None]:
diss.jacc.plants=phyloseq_group_dissimilarity(filt.ps.relab.normAll.plants,group="biome_3",method="jaccard",between_groups = FALSE)+
  ggtitle("Jaccard")+
  geom_point()+
  scale_fill_lancet(alpha=0.7)
  
diss.bray.plants=phyloseq_group_dissimilarity(filt.ps.relab.normAll.plants,group="biome_3",method="bray",between_groups = FALSE)+
  ggtitle("Bray-Curtis")+
  geom_point()+
  scale_fill_lancet(alpha=0.7)

#diss.bray.plants.fam=phyloseq_group_dissimilarity(filt.ps.relab.normAll.plants,group="family",method="bray",between_groups = FALSE)+ggtitle("Bray-Curtis")+geom_point()

# diss.unifrac1=phyloseq_group_dissimilarity(filt.ps.relab.normAll.vert,group="biome_1",method="unifrac",between_groups = FALSE)+ggtitle("uUniFrac")
# diss.wunifrac1=phyloseq_group_dissimilarity(filt.ps.relab.normAll.vert,group="biome_1",method="wunifrac",between_groups = FALSE)+ggtitle("wUniFrac")

diss.jacc.plants
diss.bray.plants

#Ordination plots with non-phylogenetic metrics - microbiome

In [None]:
plot_ordination(physeq = filt.ps.relab.normAll.plants,ordinate(filt.ps.relab.normAll.plants,distance="jaccard",method="PCoA"),type="samples",color="family")+
  theme_bw()+
  geom_point(size=2)+
  scale_color_lancet()+
  ggtitle("Jaccard")

plot_ordination(physeq = filt.ps.relab.normAll.plants,ordinate(filt.ps.relab.normAll.plants,distance="bray",method="PCoA"),type="samples",color="family")+
  theme_bw()+
  geom_point(size=2)+
  scale_color_lancet()+
  ggtitle("BrayCurtis")

In [None]:
diss.plants.ubiom.bray.fam=phyloseq_group_dissimilarity((ps_filter(filt.ps.relab.normAll.plants,family!="Asparagaceae")),group="family",method="bray",between_groups = FALSE,notch=FALSE)+ggtitle("uBiome-Bray-Curtis")
diss.plants.ubiom.bray.fam

#Mantel test for Plants results

In [None]:
#Subset results of flagellome for plantsbauer dataset
ps_fla.plants=ps_filter(filt.ps.relab.normAll, biome_2 == "Plants")
ps_fla.plants

##

In [None]:
#Match samples from flagellin results amd overal microbiome results
tmp.sam.plants=filt.ps.relab.normAll.plants@sam_data

#Filtered flagellome phyloseq to match the same samples than the microbiome results (less samples)
ps.filtab.plants=phyloseq(filt.ps.relab.normAll@otu_table,filt.ps.relab.normAll@tax_table,tmp.sam.plants)

#Distance matrices
bray.plants.fla=distance(ps.filtab.plants,method="bray")
bray.plants.ubiom=distance(filt.ps.relab.normAll.plants,method="bray")

mantel.plants=mantel(bray.plants.fla,bray.plants.ubiom,method="spearman",permutations=9999)
mantel.plants

#Overall microbiome compositon in plants dataset

In [None]:
ps.filtab.plants

#Flagellome
plot_ordination(physeq = filt.ps.relab.normAll.plants,ordinate(filt.ps.relab.normAll.plants,distance="bray",method="PCoA"),type="samples",color="biome_3")+
  theme_bw()+
  geom_point(size=2)+
  scale_color_viridis_d()+
  ggtitle("BrayCurtis")

phyloseq_group_dissimilarity(ps.filt.ab.amato,group="family",method="bray",between_groups = FALSE)+ggtitle("Bray-Curtis")

#Alpha diversity with absolute counts

In [None]:
kraken_plants_otus.num=kraken.plants.num %>%
  select(-c(taxonomy_id,taxonomy))

otus_kraken.plants.num=otu_table(kraken_plants_otus.num[,2:ncol(kraken_plants_otus.num)],taxa_are_rows = TRUE)
rownames(otus_kraken.plants.num)=kraken_plants_otus.num$name
#otus_kraken.plants

ps.kraken.plants.num=phyloseq(otus_kraken.plants.num,tax.kraken.plants,mapping_ps_plants)%>%
                                ps_filter(biome_3 != "bioreactor metagenome")%>%
                                ps_filter(!is.na(family))
#ps.kraken.plants

#Filter based on prevalence
filter_taxa_plants.tmp1 = phyloseq_filter_prevalence(ps.kraken.plants.num, prev.trh = 0.05, abund.trh = NULL,
  threshold_condition = "OR", abund.type = "total") #943 taxa and 809 samples

#Prune samples without any OTU
filter_taxa_tmp2=prune_samples(sample_sums(filter_taxa_plants.tmp1)>0,filter_taxa_plants.tmp1)
filter_taxa_tmp2

#Normalize relative abundances
filt.ps.relab.normAll.plants.num  = transform_sample_counts(filter_taxa_tmp2, function(x) (x / sum(x)) ) 



#Procrustes analysis with microbiome and flagellome matrices

In [None]:
richness=plot_richness(filter_taxa_tmp2,measures=c("observed","chao1","shannon"),color="biome_3",x="biome_3")+
  geom_boxplot()
richness