# Identication of putative contaminant taxa

In [1]:
setwd("/mnt/c/git_repos/blood_microbial_signatures/")
require(foreach)
require(tidyverse)
require(ggplot2)
require(data.table)
require(doParallel)
require(compositions)

Loading required package: foreach

“package ‘foreach’ was built under R version 4.1.3”
Loading required package: tidyverse

“package ‘tidyverse’ was built under R version 4.1.3”
“running command 'timedatectl' had status 1”
── [1mAttaching packages[22m ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.2 ──
[32m✔[39m [34mggplot2[39m 3.4.0     [32m✔[39m [34mpurrr  [39m 1.0.1
[32m✔[39m [34mtibble [39m 3.1.8     [32m✔[39m [34mdplyr  [39m 1.1.0
[32m✔[39m [34mtidyr  [39m 1.3.0     [32m✔[39m [34mstringr[39m 1.5.0
[32m✔[39m [34mreadr  [39m 2.1.3     [32m✔[39m [34mforcats[39m 1.0.0
“package ‘ggplot2’ was built under R version 4.1.3”
“package ‘tibble’ was built under R version 4.1.3”
“package ‘tidyr’ was built under R version 4.1.3”
“package ‘readr’ was built under R version 4.1.3”
“package ‘purrr’ was built under R version 4.1.3”
“package ‘dplyr’ was built under R version 4.1.3”
“p

### Data processing functions

In [2]:
load_data <- function(file_path) {
    df <- as.data.frame(fread(file_path)) %>%
        separate(sample, into = c(NA, "npm_research_id"), sep = "\\.")
    return(df)
}


load_metadata <- function(file_path, df) {
    meta <- fread(file_path, na.strings=c("", NA))
    meta <- meta %>% 
        filter(npm_research_id %in% df$npm_research_id) %>%
        select(-removal_requested_by_supplier) %>%
        replace(is.na(.), "unknown")
    return(meta)
}


subset_metadata <- function(meta, n_subset) {
    meta <- as.data.frame(meta)
    cohorts <- unique(meta$site_supplying_sample)
    subset_vec <- c()

    for (i in cohorts) {
        ids <- meta$npm_research_id[meta$site_supplying_sample == i]
        
        if (length(ids) > n_subset) {
            subset_ids <- sample(ids, n_subset)
            subset_vec <- c(subset_vec, subset_ids)
        } else {
            subset_vec <- c(subset_vec, ids)
        }
    }

    meta_sub <- meta %>%
      filter(npm_research_id %in% subset_vec) 
    return(meta_sub)
}


retrieve_rows_from_meta <- function(df, meta) {
    return(df %>% filter(npm_research_id %in% meta$npm_research_id)) 
}


remove_cols <- function(df, col_to_exclude) {
    return(df %>% select(-all_of(col_to_exclude)))
}


remove_low_freq_taxa <- function(df, frac_presence) {
    n_original <- ncol(df[, colnames(df) != "npm_research_id"])
    PA_df <- apply(df[, 2:ncol(df)], 2, function(x) {ifelse(x > 0, T, F)})
    frac_df <- apply(PA_df, 2, function(x) {sum(x) / nrow(PA_df)})
    to_keep <- names(frac_df[frac_df > frac_presence])
    to_keep <- c("npm_research_id", to_keep)
    n_new <- length(to_keep) - 1
    print(str_glue("{n_new} / {n_original} taxa are present in {frac_presence} of samples"))
    return(df %>% select(all_of(to_keep)))
}


otu_to_RA <- function(df) {
    mat <- as.matrix(df[, colnames(df) != "npm_research_id"])
    RA_df <- as.data.frame(mat / rowSums(mat))
    RA_df <- add_column(RA_df, df$npm_research_id, .before = 1)
    colnames(RA_df)[1] <- "npm_research_id"
    
    return(RA_df)
}


get_meta_cols <- function(meta, meta_regex, to_exclude) {
    meta_cols <- colnames(meta)[grep(meta_regex, colnames(meta))]
    meta_cols <- meta_cols[!(meta_cols %in% to_exclude)]
    return(meta_cols)
}


otu_to_PA <- function(df, read_threshold) {
    prev_read <- df %>%
        column_to_rownames("npm_research_id")
    
    prev_read[prev_read <= read_threshold] <- 0
    prev_read[prev_read > read_threshold] <- 1

    return(prev_read)
}


RA_to_PA <- function(RA_df, PA_threshold) {
    prev_RA <- RA_df %>% column_to_rownames("npm_research_id")
    prev_RA[prev_RA <= PA_threshold] <- 0
    prev_RA[prev_RA > PA_threshold] <- 1
    
    return(prev_RA)
}


RA_to_clr <- function(df) {
    mat <- df[, colnames(df) != "npm_research_id"]
    clr_df <- clr(mat)
    return(cbind(data.frame(npm_research_id = df$npm_research_id), as.data.frame(clr_df, check.names = F)))
}


filter_taxa_by_presence <- function(prev_df, presence_t) {
    taxa_counts <- apply(prev_df, 2, sum)
    to_keep <- names(taxa_counts)[taxa_counts > presence_t]
    return(prev_df %>% select(all_of(to_keep)))
}

### Decontamination functions

In [3]:
get_diff_prev <- function(dat, metadat, meta_cols, taxa_vec, prev_threshold, min_samples) {
    cl <- makeCluster(9)
    registerDoParallel(cl)
    filter_batch_levels <- function(dat, metadat, column) {
    # Remove levels with < x samples
    tmp <- tibble(data.frame(metadat)) %>%
            group_by(get(column)) %>%
            summarise(n = n())

    # Vector of group levels to keep
    to_keep <- tmp[tmp$n >= min_samples, "get(column)"]$`get(column)`
    to_keep <- to_keep[to_keep != "Unknown"]
    n_levels <- length(to_keep)

    if (n_levels < 2) {
        print(str_glue("After pruning, {column} has < 2 levels"))
    } else {
        print(str_glue("After pruning, {column} has {n_levels} levels"))
    }

    # Remove rows in metadata
    dat_meta <- dat %>%
        left_join(metadat, by = "npm_research_id") %>%
        filter(get(column) %in% to_keep)

    return(dat_meta)
    }
    
    get_taxon_diff_prev <- function(dat_meta, column, taxon) {
        prev_stats <- dat_meta %>%
            group_by_at(c(column)) %>%
            summarise(prevalence = sum(get(taxon)) / n())

        max_prev <- max(prev_stats$prevalence)
        min_prev <- min(prev_stats$prevalence)
        fold_diff <- max_prev / min_prev
        max_level <- pull(prev_stats, column)[which(prev_stats$prevalence == max(prev_stats$prevalence))][1]
        min_level <- pull(prev_stats, column)[which(prev_stats$prevalence == min(prev_stats$prevalence))][1]

        crumb <- tibble(taxa = taxon, meta_col = column,
               max_level = max_level, min_level = min_level, 
               max_prev = max_prev, min_prev = min_prev,
               fold_diff = fold_diff)

        crumb <- distinct(crumb, taxa, .keep_all = T)

        return(crumb)
    }    
    
    morsels <- foreach (column = meta_cols, .packages = c("tidyverse", "foreach")) %dopar% {
        dat_meta <- filter_batch_levels(dat, metadat, column)
        
        crumbs <- foreach (taxon = taxa_vec) %do% {
            get_taxon_diff_prev(dat_meta, column, taxon)
        }
        
        morsel <- bind_rows(crumbs)
        return(morsel)
    }
    
    stopCluster(cl)
    
    result_df <- bind_rows(morsels)
    return(result_df)
}


preprocess_data <- function(df, meta_filt, RA_threshold, read_threshold, presence_t) {
    # Filter data
    df_filt <- retrieve_rows_from_meta(df, meta_filt)
    df_filt2 <- remove_cols(df_filt, c(human, "unclassified"))
    RA_df <- otu_to_RA(df_filt2)
    prev_RA <- RA_to_PA(RA_df, RA_threshold)
    prev_read <- otu_to_PA(df_filt2, read_threshold)
    prev_df <- as.data.frame(prev_read & prev_RA)
    prev_df <- filter_taxa_by_presence(prev_df, presence_t = presence_t) # Remove taxa that are not present in any samples
    prev_df <- prev_df %>% rownames_to_column("npm_research_id")
    return(prev_df)
}

decontaminate <- function(df, meta_filt, meta_cols, human, RA_threshold, read_threshold, presence_t, min_samples, taxa_vec = NA) {
    # Preprocess df
    prev_df <- preprocess_data(df, meta_filt, RA_threshold, read_threshold, presence_t)
    
    # Get list of taxa
    if (is.na(taxa_vec)) {
        taxa_vec <- colnames(prev_df)
        taxa_vec <- taxa_vec[taxa_vec != "npm_research_id"]
    }

    # Differential prevalence
    res <- get_diff_prev(prev_df, meta_filt, meta_cols, taxa_vec, min_samples = min_samples)

    return(res)
}

corr_decontam <- function(dat, non_contaminants, contaminants, meta_col, level) {
    nc_list <- foreach (non_contaminant_taxon = non_contaminants) %do% {
        morsel <- tibble()
        
        for (contaminant_taxon in contaminants) {
            spearman_test <- cor.test(dat[ , contaminant_taxon], dat[, non_contaminant_taxon])
            rho <- spearman_test$estimate
            print(str_glue("Computing correlations for {meta_col}: {level}"))
            morsel <- morsel %>% bind_rows(tibble(meta_col = meta_col, 
                                                  level = level,
                                                  non_contaminant_taxon = non_contaminant_taxon, 
                                                  contaminant_taxon = contaminant_taxon,
                                                  rho = rho))
        }

        return(morsel)
    }

    corr_df <- bind_rows(nc_list)  
    return(corr_df)
}


### Load data

In [4]:
human <- "Homo sapiens"
RA_threshold <- 0.005
read_threshold <- 10
presence_t <- 0
min_samples <- 100

# Load data and remove low microbial read samples
to_retain <- fread("data/samples_above_100_microbial_reads.txt")$npm_research_id

max_prev_t <- 0.25
fold_diff_t <- 2

In [5]:
# df_raw <- load_data(str_glue("data/taxonomic_profiles/07_abundance_matrix/abundance_matrix.S.pipeline2_210322.tsv")) %>% 
#     filter(npm_research_id %in% to_retain)

# meta_raw <- load_metadata("data/SG10K_Health_metadata.n10714.16March2021.parsed.csv", df_raw) %>% 
#     filter(npm_research_id %in% to_retain)

# # Sample with replacement
# df_raw2 <- df_raw

# # Make npm_research_id unique
# merged <- df_raw2 %>%
#     left_join(meta_raw) %>%
#     mutate(npm_research_id = paste0("n", seq(nrow(df_raw2))))

# meta <- merged %>%
#     select(all_of(colnames(meta_raw)))

# df <- merged %>%
#     select(all_of(colnames(df_raw2)))

# # Get metadata columns of interest
# meta_cols <- get_meta_cols(meta, 
#                            meta_regex = "kit|flow_cell|site_supplying", 
#                            to_exclude = c("hiseq_xtm_flow_cell_v2_5_id"))

# # Remove GUSTO Kids cohort
# gusto_meta <- meta %>%
#     filter(site_supplying_sample == "GUSTO")

# gusto_only <- df %>%
#     filter(npm_research_id %in% gusto_meta$npm_research_id)
# no_gusto <- df %>%
#     filter(!(npm_research_id %in% gusto_meta$npm_research_id))

# # Get abundance matrix of non-zero taxa
# df_zeroed <- preprocess_data(df, meta, RA_threshold, read_threshold, presence_t)

# # For correlation analysis
# species_df <- df %>%
#     select(all_of(colnames(df_zeroed)))

# all(colnames(species_df) == colnames(df_zeroed))

# meta_filt <- meta

# # Convert to CLR abundances
# clr_df <- RA_to_clr(otu_to_RA(species_df))
# dim(species_df)

# # Remove absent species
# species_zeroed <- species_df %>% 
#     select(all_of(colnames(df_zeroed))) %>%
#     column_to_rownames("npm_research_id")

# prev_bool_df <- df_zeroed %>%
#     column_to_rownames("npm_research_id")

# for(i in seq(ncol(species_zeroed))) {
#     species_zeroed[!prev_bool_df[, i], i] <- 0
# }
# # Diff prev filter
# decon_raw <- decontaminate(no_gusto, meta_filt = meta, meta_cols = meta_cols, 
#                            human = human, RA_threshold = RA_threshold, read_threshold = read_threshold, 
#                            presence_t = presence_t, min_samples = min_samples)
# decon_raw_gusto <- decontaminate(gusto_only, meta_filt = meta, meta_cols = meta_cols, 
#                                  human = human, RA_threshold = RA_threshold, read_threshold = read_threshold, 
#                                  presence_t = presence_t, min_samples = min_samples)

# no_gusto_contam <- decon_raw %>%
#     mutate(diff_abn = ifelse(fold_diff > fold_diff_t, T, F)) %>%
#     filter(max_prev > max_prev_t, diff_abn) %>%
#     distinct(taxa)

# gusto_contams <- decon_raw_gusto %>%
#     mutate(diff_abn = ifelse(fold_diff > fold_diff_t, T, F)) %>%
#     filter(max_prev > max_prev_t, diff_abn) %>%
#     distinct(taxa)

# contam <- bind_rows(no_gusto_contam, gusto_contams) %>%
#     distinct(taxa)

# diff_prev_contams <- contam$taxa

# nc <- colnames(df_zeroed)
# nc <- nc[!(nc %in% contam$taxa)]
# nc <- tibble(taxa = nc[nc != "npm_research_id"])
# diff_prev_contams_nc <- nc$taxa

# diff_prev <- decon_raw

# diff_filt <- diff_prev %>%
#     filter(fold_diff > 2, max_prev > 0.25)

# # Correlation filter
# iter_list <- diff_filt %>% 
#     distinct(meta_col, max_level)

# cl <- makeCluster(9)
# registerDoParallel(cl)

# morsels <- foreach(i = seq(nrow(iter_list)), .packages = c("tidyverse", "foreach")) %dopar% {
#     var_name <- iter_list[i, ]$meta_col
#     level_name <- iter_list[i, ]$max_level

#     contams <- (diff_filt %>% filter(meta_col == var_name, max_level == level_name))$taxa

#     noncontams <- colnames(clr_df)
#     noncontams <- noncontams[!(noncontams %in% c("npm_research_id", contams))]

#     batch_dat <- clr_df %>%
#         left_join(meta_filt %>% select(all_of(c("npm_research_id", var_name)))) %>%
#         filter(get(var_name) == level_name)

#     corr_decontam(batch_dat, noncontams, contams, var_name, level_name)
# }

# corr_res <- bind_rows(morsels)

# stopCluster(cl)

# corr_c <- corr_res %>% 
#     filter(rho > 0.7) %>%
#     distinct(non_contaminant_taxon) %>%
#     rename(contaminants = non_contaminant_taxon)

# corr_nc <- tibble(non_contaminant_taxon = colnames(clr_df)[!(colnames(clr_df) %in% c("npm_research_id", 
#                                                                                        corr_c$contaminants, 
#                                                                                        diff_prev_contams))])

# # Batch filter
# meta_col_filt <-  meta_cols[!(meta_cols %in% c("library_prep_kit", "hiseq_xtm_flow_cell_v2_5_id"))]
# result_df <- tibble(taxa = corr_nc$non_contaminant_taxon)

# for(column in meta_col_filt) {
#     res <- df_zeroed %>% 
#         left_join(meta %>% select(all_of(c("npm_research_id", column)))) %>%
#         select(-npm_research_id) %>%
#         pivot_longer(!column, names_to = "taxa", values_to = "presence") %>%
#         filter(presence) %>%
#         group_by(taxa) %>%
#         summarise(n_batches = n_distinct(get(column)))
#     colnames(res)[2] <- column

#     result_df <- result_df %>%
#         left_join(res, "taxa")
# }

# result_df <- result_df %>% column_to_rownames("taxa")
# result_df[result_df <= 1] <- 0
# result_df[result_df > 1] <- 1

# row_sums <- rowSums(result_df)
# parsed_df <- tibble(taxa = names(row_sums), n_cols = row_sums)

# prev_stats <- apply(df_zeroed[, 2:ncol(df_zeroed)], 2, sum) / nrow(df_zeroed)

# overall_prev <- data.frame(taxa = names(prev_stats), overall_prevalence = as.vector(prev_stats)) %>%
#     mutate(n_samples = overall_prevalence * nrow(df_zeroed))

# parsed_df %>% 
#     left_join(overall_prev) %>%
#     filter(n_cols == 7) %>%
#     arrange(desc(overall_prevalence))

# simple_nc <- parsed_df %>%
#     filter(n_cols == 7) %>%
#     select(taxa)

# # Max count filter
# batch_nc <- simple_nc$taxa
# species_filt <- species_zeroed %>% 
#     rownames_to_column("npm_research_id") %>%
#     select(all_of(c("npm_research_id", batch_nc)))

# prev_max_filt <- species_filt %>% 
#     select(-npm_research_id) %>%
#     pivot_longer(everything(), names_to = "taxa", values_to = "read_count") %>%
#     group_by(taxa) %>%
#     summarise(max_count = max(read_count)) %>%
#     arrange(desc(max_count)) %>%
#     filter(max_count > 100)

# final_nc <- prev_max_filt %>% select(taxa)

# final_nc

In [5]:
set.seed(66)

for (iter_n in seq(100)) {

    df_raw <- load_data(str_glue("data/taxonomic_profiles/07_abundance_matrix/abundance_matrix.S.pipeline2_210322.tsv")) %>% 
        filter(npm_research_id %in% to_retain)
    
    meta_raw <- load_metadata("data/SG10K_Health_metadata.n10714.16March2021.parsed.csv", df_raw) %>% 
        filter(npm_research_id %in% to_retain)
        
    # Sample with replacement
    df_raw2 <- df_raw %>%
        sample_n(nrow(df_raw), replace = T)
    
    # Make npm_research_id unique
    merged <- df_raw2 %>%
        left_join(meta_raw) %>%
        mutate(npm_research_id = paste0("n", seq(nrow(df_raw2))))
    
    meta <- merged %>%
        select(all_of(colnames(meta_raw)))
    
    df <- merged %>%
        select(all_of(colnames(df_raw2)))

    # Get metadata columns of interest
    meta_cols <- get_meta_cols(meta, 
                               meta_regex = "kit|flow_cell|site_supplying", 
                               to_exclude = c("hiseq_xtm_flow_cell_v2_5_id"))

    # Remove GUSTO Kids cohort
    gusto_meta <- meta %>%
        filter(site_supplying_sample == "GUSTO")

    gusto_only <- df %>%
        filter(npm_research_id %in% gusto_meta$npm_research_id)
    no_gusto <- df %>%
        filter(!(npm_research_id %in% gusto_meta$npm_research_id))

    # Get abundance matrix of non-zero taxa
    df_zeroed <- preprocess_data(df, meta, RA_threshold, read_threshold, presence_t)

    # For correlation analysis
    species_df <- df %>%
        select(all_of(colnames(df_zeroed)))

    all(colnames(species_df) == colnames(df_zeroed))

    meta_filt <- meta

    # Convert to CLR abundances
    clr_df <- RA_to_clr(otu_to_RA(species_df))
    dim(species_df)

    # Remove absent species
    species_zeroed <- species_df %>% 
        select(all_of(colnames(df_zeroed))) %>%
        column_to_rownames("npm_research_id")

    prev_bool_df <- df_zeroed %>%
        column_to_rownames("npm_research_id")

    for(i in seq(ncol(species_zeroed))) {
        species_zeroed[!prev_bool_df[, i], i] <- 0
    }
    
    # Diff prev filter
    decon_raw <- decontaminate(no_gusto, meta_filt = meta, meta_cols = meta_cols, 
                               human = human, RA_threshold = RA_threshold, read_threshold = read_threshold, 
                               presence_t = presence_t, min_samples = min_samples)
    decon_raw_gusto <- decontaminate(gusto_only, meta_filt = meta, meta_cols = meta_cols, 
                                     human = human, RA_threshold = RA_threshold, read_threshold = read_threshold, 
                                     presence_t = presence_t, min_samples = min_samples)

    no_gusto_contam <- decon_raw %>%
        mutate(diff_abn = ifelse(fold_diff > fold_diff_t, T, F)) %>%
        filter(max_prev > max_prev_t, diff_abn) %>%
        distinct(taxa)

    gusto_contams <- decon_raw_gusto %>%
        mutate(diff_abn = ifelse(fold_diff > fold_diff_t, T, F)) %>%
        filter(max_prev > max_prev_t, diff_abn) %>%
        distinct(taxa)

    contam <- bind_rows(no_gusto_contam, gusto_contams) %>%
        distinct(taxa)

    diff_prev_contams <- contam$taxa

    nc <- colnames(df_zeroed)
    nc <- nc[!(nc %in% contam$taxa)]
    nc <- tibble(taxa = nc[nc != "npm_research_id"])
    diff_prev_contams_nc <- nc$taxa

    diff_prev <- decon_raw

    diff_filt <- diff_prev %>%
        filter(fold_diff > 2, max_prev > 0.25)

    # Correlation filter
    iter_list <- diff_filt %>% 
        distinct(meta_col, max_level)

    cl <- makeCluster(9)
    registerDoParallel(cl)

    morsels <- foreach(i = seq(nrow(iter_list)), .packages = c("tidyverse", "foreach")) %dopar% {
        var_name <- iter_list[i, ]$meta_col
        level_name <- iter_list[i, ]$max_level

        contams <- (diff_filt %>% filter(meta_col == var_name, max_level == level_name))$taxa

        noncontams <- colnames(clr_df)
        noncontams <- noncontams[!(noncontams %in% c("npm_research_id", contams))]

        batch_dat <- clr_df %>%
            left_join(meta_filt %>% select(all_of(c("npm_research_id", var_name)))) %>%
            filter(get(var_name) == level_name)

        corr_decontam(batch_dat, noncontams, contams, var_name, level_name)
    }

    corr_res <- bind_rows(morsels)

    stopCluster(cl)

    corr_c <- corr_res %>% 
        filter(rho > 0.7) %>%
        distinct(non_contaminant_taxon) %>%
        rename(contaminants = non_contaminant_taxon)

    corr_nc <- tibble(non_contaminant_taxon = colnames(clr_df)[!(colnames(clr_df) %in% c("npm_research_id", 
                                                                                           corr_c$contaminants, 
                                                                                           diff_prev_contams))])

    # Batch filter
    meta_col_filt <-  meta_cols[!(meta_cols %in% c("library_prep_kit", "hiseq_xtm_flow_cell_v2_5_id"))]
    result_df <- tibble(taxa = corr_nc$non_contaminant_taxon)

    for(column in meta_col_filt) {
        res <- df_zeroed %>% 
            left_join(meta %>% select(all_of(c("npm_research_id", column)))) %>%
            select(-npm_research_id) %>%
            pivot_longer(!column, names_to = "taxa", values_to = "presence") %>%
            filter(presence) %>%
            group_by(taxa) %>%
            summarise(n_batches = n_distinct(get(column)))
        colnames(res)[2] <- column

        result_df <- result_df %>%
            left_join(res, "taxa")
    }

    result_df <- result_df %>% column_to_rownames("taxa")
    result_df[result_df <= 1] <- 0
    result_df[result_df > 1] <- 1

    row_sums <- rowSums(result_df)
    parsed_df <- tibble(taxa = names(row_sums), n_cols = row_sums)

    prev_stats <- apply(df_zeroed[, 2:ncol(df_zeroed)], 2, sum) / nrow(df_zeroed)

    overall_prev <- data.frame(taxa = names(prev_stats), overall_prevalence = as.vector(prev_stats)) %>%
        mutate(n_samples = overall_prevalence * nrow(df_zeroed))

    parsed_df %>% 
        left_join(overall_prev) %>%
        filter(n_cols == 7) %>%
        arrange(desc(overall_prevalence))

    simple_nc <- parsed_df %>%
        filter(n_cols == 7) %>%
        select(taxa)

    # Max count filter
    batch_nc <- simple_nc$taxa
    species_filt <- species_zeroed %>% 
        rownames_to_column("npm_research_id") %>%
        select(all_of(c("npm_research_id", batch_nc)))

    prev_max_filt <- species_filt %>% 
        select(-npm_research_id) %>%
        pivot_longer(everything(), names_to = "taxa", values_to = "read_count") %>%
        group_by(taxa) %>%
        summarise(max_count = max(read_count)) %>%
        arrange(desc(max_count)) %>%
        filter(max_count > 100)

    final_nc <- prev_max_filt %>% select(taxa)
    fwrite(final_nc, str_glue("results/decontamination/bootstrap_out/noncontams.iter{iter_n}.txt"))   
}

[1m[22mJoining with `by = join_by(npm_research_id)`
