In [None]:
#Loading libraries
library(ggplot2)
library(vegan)
library(phyloseq)
library(RColorBrewer)
library(reshape2)
library(ggsignif)
library(ggplot2)
library(ape)
library(VennDiagram)
library(tidyverse)
library(tibble)
library(ggnewscale)
library(forcats)
library(scales)
library(UpSetR)
library(grDevices)

In [None]:
#Creating phyloseq object
otu <- as.matrix(read.table("hugorepset.otus", header = T, row.names = 1))
OTU = otu_table(otu, taxa_are_rows=T)
taximat = as.matrix(read.table("tax.clean", header=T, row.names=1))
taxi=tax_table(taximat)
tomate_soil <- phyloseq(OTU,taxi)
sample_data = read.table("metadata2_o.tsv", header = T, row.names = 1, sep = "\t")
head(sample_data)
sampledata = sample_data(data.frame(kind=sample_data$kind, tag=sample_data$tag, type=sample_data$type, row.names=sample_names(tomate_soil)))
tomate_soil <- phyloseq(OTU,sampledata,taxi)
tomate_soil

In [None]:
#Collapsing OTUs based on genus
tom_soil_genus<-tax_glom(tomate_soil, taxrank="genus")

#Estimate relative abundance
tom_soil_genus_r <- transform_sample_counts(tom_soil_genus, function(x) x / sum(x))

#Creating Solirubrobacter subset
solir_l<- list(c("g__Solirubrobacter"))
soli<- subset_taxa(tom_soil_genus_r, genus %in% solir_l)
soli

In [None]:
#Abundance heatmap per sample#
#Obtaining data from graph
p <- plot_bar(soli, "genus")
yx <- p$data
yx <- as.data.frame(yx)
#yx
yx <- yx[order(-yx$Abundance),]

#Reorder samples
ord <- factor(yx$Sample,levels= c("GTO1US", "GTO3US", "GTO2US", "JAL1US", 
                                  "JAL2US", "JAL3US", "JAL4US", "JAL5US",
                                  "NAY2US", "NAY3US", "SIN2US", "SLP1US",
                                  "AGS1US", "DGO1US", "ZAC1US", "SIN1US", 
                                  "GTO3S0", "SIN2S0", "SLP1S0", "AGS1S0", 
                                  "DGO1S0", "ZAC1S0", "JAL1S0", "JAL2S0", 
                                  "JAL5S0", "NAY2S0", "NAY3S0", "GTO1S0",
                                  "GTO2S0", "JAL3S0", "SIN1S0", "JAL4S0",
                                  "JAL3SF", "GTO1SF", "SIN1SF", "GTO2SF", 
                                  "JAL1SF", "JAL2SF", "JAL4SF", "JAL5SF",
                                  "NAY2SF", "NAY3SF", "GTO3SF", "SLP1SF", 
                                  "SIN2SF", "DGO1SF", "ZAC1SF", "AGS1SF",
                                  "DGORZ", "GTO1RZ", "GTO2RZ", "GTO3RZ",
                                  "JAL2RZ", "JAL3RZ", "JAL5RZ", "NAY2RZ",
                                  "NAY3RZ", "ZACRZ", "SIN1RZ", "JAL1RZ", 
                                  "JAL4RZ", "SLPRZ", "SIN2RZ", "GTO1EC",
                                  "GTO3EC", "JAL2EC", "JAL3EC", "JAL5EC",
                                  "NAY2EC", "SIN1EC", "SIN2EC", "AGSEC",
                                  "GTO2EC", "JAL1EC", "SLPEC", "JAL2RFT", 
                                  "GTO2RFT", "JAL1RFT", "JAL3RFT", "JAL4RFT",
                                  "JAL5RFT", "GTO1RFT", "SLP1RFT", "DGO1RFT",
                                  "ZAC1RFT", "NAY2RFT", "SIN2RFT", "SIN1RFT", 
                                  "AGS1RFT", "GTO3RFT", "NAY3RFT", "JAL1ECT", 
                                  "JAL4ECT", "JAL5ECT", "GTO3ECT", "NAY2ECT",
                                  "SIN1ECT", "GTO1ECT", "JAL2ECT", "SIN2ECT", 
                                  "AGS1ECT", "GTO2ECT", "SLP1ECT", "ZAC1ECT", 
                                  "DGO1ECT", "JAL3ECT", "NAY3ECT"))

#Creating heatmap 
ggplot(yx, aes(x=reorder (genus, Abundance), y= ord , fill= Abundance))+ 
geom_raster() + theme_minimal ()+ theme(text = element_text(size=17)) +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
scale_fill_gradient(low="#bebebe", high="#000000", na.value = "white", trans = "log2")

#Abundance heatmap per sample type#
#Collapsing samples according to sample type
merged <- merge_samples(soli, "type")
merged
#Getting data from graph
p <- plot_bar(merged, "genus")
yx <- p$data
yx <- as.data.frame(yx)
#yx
yx <- yx[order(-yx$Abundance),]

#Reordering samples
ord <- factor(yx$Sample,levels= c("soil","ruderal","tomato"))

#Generating graph
ggplot(yx, aes(x=ord, y= reorder (genus, Abundance), fill= Abundance))+ 
geom_raster() + theme_minimal ()+ theme(text = element_text(size=30)) +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
scale_fill_gradient(low="#bebebe", high="#000000", na.value = "white", trans = "log2")
ggsave ("soli_heatmap2.pdf", width=58, height=12, units="cm")

In [None]:
#Generating boxplot#
#Getting data from graph
p <- plot_bar(soli, "genus")
yx <- p$data
yx <- as.data.frame(yx)
head(yx)

#Normality test
print("Normality test soil")
soil <- yx %>% filter(type == "soil")
soil_ra <- soil$Abundance
shapiro.test(soil_ra)

print("Normality test ruderal")
ruderal <- yx %>% filter(type == "ruderal")
ruderal_ra <- ruderal$Abundance
shapiro.test(ruderal_ra)

print("Normality test tomato")
tomato <- yx %>% filter(type == "tomato")
tomato_ra <- tomato$Abundance
shapiro.test(tomato_ra)

#get data
p <- plot_bar(soli, "genus")
yx <- p$data
yx <- as.data.frame(yx)

#Solirubrobacter abundance box plot
orden <- factor(yx$type, levels= c("soil","ruderal","tomato"))
cajas_dob <- ggplot(data = yx, aes(x = orden, y = Abundance)) +
  geom_boxplot() +
  geom_point(size = 5) +theme_light() 

#Using geom_signif to add significance bars and asteriks
cajas_dob + geom_signif(test = 'wilcox.test',comparisons = list(c("soil", "ruderal")), 
                       map_signif_level = TRUE, y_position = 0.055) +
            geom_signif(test = 'wilcox.test', comparisons = list(c("soil", "tomato")),
                       map_signif_level = TRUE, y_position = 0.05) +
            geom_signif(test = 'wilcox.test', comparisons = list(c("tomato", "ruderal")),
                       map_signif_level = TRUE, y_position = 0.045)+ 
theme(text = element_text(size=15)) + labs(title = "Solirubrobacter Relatibve Abundance")
ggsave ("Soli_boxplot.pdf", width=25, height=20, units="cm")