# Run Mantel tests

Specify input distances and output file:

In [None]:
# input files
phylodist_file <- "results/clade_founder_tree/clade_founders.mldist"
ratesdist_file <- "results/synonymous_mut_rates/clade_rate_distances.csv"

outdir <- "results/mantel_test"
dir.create(outdir,showWarnings = F)

Load packages and set seed:

In [None]:
require(vegan) # for mantel test 
require(dplyr)
require(tidyverse)
require(ggplot2)
require(cowplot)
require(scales)
require(broom) # for tidying distance matrix

Read data and setup for tests:

In [None]:
# read phylogenetic distances, skipping row 1 (empty) and setting row names equal to column 1 
phylodist <- read.table(phylodist_file, header=F, skip=1, row.names=1)
# make column names the same as row names:
colnames(phylodist) <- rownames(phylodist)

phylodist %>% head

In [None]:
# read rates distances
ratesdist <- read.table(ratesdist_file, sep=",", header=T)

ratesdist %>% head

Do analysis for each rate type:

In [None]:
rate_types = ratesdist$rate_type %>% unique

plot_list <- list()

for (is_omicron in c("any", "Omicron", "not Omicron")) {
for (ratetype in rate_types) {
    
    if ((is_omicron != "any") & (ratetype != "rates")) {
        next
    }
    
    if (ratetype == "clr_rates")
        next

    # select just rates of interest and add diagonal
    ratesdist_type <- ratesdist %>% 
        filter(rate_type==ratetype) %>%
        select(!rate_type)
    
    if (is_omicron != "any") {
        ratesdist_type <- ratesdist_type %>% filter(is_Omicron==is_omicron | clade_1 == clade_2)
    }

    clades <- c(ratesdist_type$clade_1, ratesdist_type$clade_2) %>% unique
    
    # self-self comparisons for diagonal
    self_distances_diagonal <- data.frame(
        clade_1=clades,
        clade_2=clades,
        mut_rate_distance=0,
        is_Omicron=is_omicron
    )
    
    ratesdist_type <- bind_rows(ratesdist_type, self_distances_diagonal)
    
    # pivot wider
    ratesdist_matrix <- ratesdist_type %>%
        select(!is_Omicron) %>%
        pivot_wider(names_from=clade_2, values_from=mut_rate_distance) %>%
        column_to_rownames('clade_1') %>%
        as.matrix()

    # reorder columns to match order of rownames
    ratesdist_matrix <- ratesdist_matrix[,rownames(ratesdist_matrix)]

    # this works to fill in the matrix (note both are lower.tri )
    ratesdist_matrix[lower.tri(ratesdist_matrix)] <- t(ratesdist_matrix)[lower.tri(ratesdist_matrix)]
    
    # order the two matrices in the same way
    # also removes clades from the phylo matrix that aren't in spectrum distance matrix
    # finally, take square root of phylogenetic distances
    phylodist_reordered_subset <- sqrt(phylodist[rownames(ratesdist_matrix), colnames(ratesdist_matrix)])
    
    # make sure row/col names are the same 
    if(sum(rownames(phylodist_reordered_subset) != rownames(ratesdist_matrix)) != 0){
        stop("something is wrong with your matrix setup")
    }

    if(sum(colnames(phylodist_reordered_subset) != colnames(ratesdist_matrix)) != 0){
        stop("something is wrong with your matrix setup")
    }
    
    # run the mantel test on square root of phylogenetic distances
    set.seed(1)
    mantelTestResults <- vegan::mantel(
        phylodist_reordered_subset,
        ratesdist_matrix,
        permutations=100000
    ) # default method = pearson
    
    # data frame of results
    mantelTestResult_df <- data.frame(
        method=as.character(mantelTestResults$method),
        statistic=as.numeric(mantelTestResults$statistic),
        permCount=as.numeric(mantelTestResults$permutations),
        signif=as.numeric(mantelTestResults$signif)
    )
    
    # merge distances for plotting
    phylodist_reordered_subset_table <- tidy(as.dist(phylodist_reordered_subset)) # turn into a table

    # this got rid of self-self distances and duplicates
    colnames(phylodist_reordered_subset_table) <- c("clade_1","clade_2","phylogenetic_distance")

    # combine:
    merged_distances <- merge(
        ratesdist_type,
        phylodist_reordered_subset_table,
        by=c("clade_1","clade_2")
    )
    
    # add labels:
    merged_distances$comparison_label <- paste0(merged_distances$clade_1,"x",merged_distances$clade_2)
    
    if (is_omicron == "any")
        title <- "all clades"
    else
        title <- paste0(is_omicron, " clades")
    
    if (ratetype == "no_GtoT_rates")
        title <- paste0(title, ", exclude G->T")
    else if (ratetype != "rates")
        title <- paste0(title, " ", ratetype)

    # plot results
    plot <- ggplot(merged_distances, aes(x=phylogenetic_distance, y=mut_rate_distance)) +
        geom_point(size=2,alpha=0.85) +
        geom_text(
            data=mantelTestResult_df,
            aes(
                x=0,
                hjust="left",
                y=Inf,
                vjust=1.1,
                label=paste0(
                    "Pearson's r:  ", signif(statistic,2),
                    "\nP-value: ", as.character(scientific(signif,2))
                )
            ),
            size=5
        ) +
      ggtitle(title) +
      theme_cowplot() +
      theme(plot.title=element_text(hjust=0.5)) +
      xlab("square root phylogenetic distance") +
      ylab("distance between mutation rates")
    
    plot_list[[length(plot_list) + 1]] <- plot
    
}}

all_plot <- plot_grid(plotlist=plot_list)

all_plot

In [None]:
plotfile <- paste0(outdir, "/", "mantel_test_plot.pdf")

save_plot(
    plotfile,
    all_plot,
    base_width=7.5,
    base_height=7.5,
)