-
Notifications
You must be signed in to change notification settings - Fork 29
Description
Hello and thanks for the package.
I have been using differential heat trees to compare the relative abundance of taxa between three land uses, combining samples taken at multiple sites from each land use. I have noticed a result on the heat tree matrix which does not match the relative abundance data contained in the dataframe created by psmelt(physeq) - treatment_1 is shown on the tree as having higher abundance than treatment_2, when relative abundance data from psmelt shows the opposite.
This is the diff_table entry for the comparison in question. Why does it display "Inf" for the log2_median_ratio for this comparison? Is this causing the inconsistency?
# A tibble: 231 × 7
taxon_id | treatment_1| treatment_2 | log2_median_ratio | median_diff | mean_diff | wilcox_p_value
bi | maize | native | Inf | 0.0302 | -0.0532 | 0.767
The taxa in question is represented by 5 OTUs in treatment_1 (combining 2 samples) and only 1 OTU from treatment_2 (from one sample) but I believed I was plotting the taxon abundance not the OTUs so think this shouldn't be an issue. All OTUs are assigned the same species. Below is the code I have used to create the metacoder plot:
#CREATE` OBJ ----
{otu_table <- (otu_table(physeq))
otu_table <- t(otu_table)
sample_data <- sample_data(physeq)
sample_data <- as.data.frame(sample_data)
names(sample_data)[names(sample_data) == "sample_name"] <- "SampleID"
# Ensure `otu_table` and `tax_table` are dataframes
otu_table_df <- as.data.frame(otu_table)
tax_table_df <- as.data.frame(tax_table)
# Add row names as a new column named 'OTU'
otu_table_df <- otu_table_df %>%
rownames_to_column(var = "OTU")
tax_table_df <- tax_table_df %>%
rownames_to_column(var = "OTU")
#add phyla to tax table
tax_table_df <- tax_table_df %>%
mutate(Phylum = "Glomeromycota")
#create taxonomy column
tax_table_df$taxonomy <- with(tax_table_df,
paste(Phylum, Class, Order, Family, Genus, Species, sep = ";"))
otu_data <- otu_table_df
tax_data <- tax_table_df
tax_data
#remove NA from all ranks except Genus and Species
tax_data <- tax_data %>%
filter(!is.na(Class) & !is.na(Order) & !is.na(Family))
tax_data
# Join
otu_data <- left_join(otu_data, tax_data, by = "OTU")
# Create a sequential OTU_ID column and place first
otu_data$OTU_ID <- seq_along(otu_data[, 1])
otu_data <- otu_data %>%
select(OTU_ID, everything())
#remove OTU sequences
otu_data <- otu_data[, !colnames(otu_data) %in% "OTU"]
#make taxmap
obj <- parse_tax_data(otu_data,
class_cols = "taxonomy",
class_sep = ";")
names(obj$data) <- "otu_counts"
#rarefy (physeq has already been rarefied)
obj$data$otu_rarefied <- rarefy_obs(obj, "otu_counts", other_cols = TRUE)
#remove 0 read OTUs
no_reads <- rowSums(obj$data$otu_rarefied[, sample_data$SampleID]) == 0
obj <- filter_obs(obj, "otu_rarefied", ! no_reads)
#convert read counts to proportions
obj$data$otu_props <- calc_obs_props(obj, "otu_counts", other_cols = TRUE)
obj$data$tax_abund <- calc_taxon_abund(obj, "otu_props")
obj$data$type_abund <- calc_group_mean(obj, "tax_abund",
cols = sample_data$SampleID,
groups = sample_data$system)
obj$data$diff_table <- compare_groups(obj, data = "tax_abund",
cols = sample_data$SampleID,
groups = sample_data$system)
print(obj$data$diff_table)
#correct for multiple comparisons
obj <- mutate_obs(obj, "diff_table",
wilcox_p_value = p.adjust(wilcox_p_value, method = "fdr"))
range(obj$data$diff_table$wilcox_p_value, finite = TRUE)
dont_print <- c("NA")
jpeg(filename = "heat_tree_matrix_system_soil.jpg")
obj %>%
metacoder::filter_taxa(supertaxa = TRUE, reassign_obs = c(diff_table = FALSE)) %>%
heat_tree_matrix(data = "diff_table",
node_label = ifelse(taxon_names %in% dont_print, "", taxon_names),
node_size = n_obs,
node_color = log2_median_ratio, # difference between groups
node_color_trans = "linear",
node_color_interval = c(-3, 3), # symmetric interval
edge_color_interval = c(-3, 3), # symmetric interval
node_color_range = diverging_palette(), # diverging colors
node_color_axis_label = "Log 2 ratio of median counts",
node_size_axis_label = "Number of OTUs",
layout = "da", initial_layout = "re",
key_size = 0.6,
seed = 2)
dev.off()