In [41]:
# Load necessary libraries
library(mashr)
library(RhpcBLASctl)
library(magrittr)
library(tidyverse)
#library(ttt)

# Set number of threads for BLAS operations
blas_set_num_threads(1)

# Create a function for pairwise contrast columns
MakePairwiseContrastCols <- function(contrast_left, orig_vector) {
    orig_vector[contrast_left[1]] <- 1
    orig_vector[contrast_left[2]] <- -1
    orig_vector
}

# Function to fit contrast data
FitContrast <- function(index, orig_mean, posterior_mean, posterior_vcov) {
        population_names <- colnames(posterior_mean) %>% str_remove_all("BETA_")

        orig_mean_vector <- orig_mean[index,]
        names(orig_mean_vector) <- population_names
        orig_mean_nonzero <- as.vector(orig_mean_vector != 0)
        orig_mean_tested <- names(orig_mean_vector[orig_mean_nonzero])
        
        if(length(orig_mean_tested)>0){
            n_populations <- length(orig_mean_tested)

            pairwise_vector <- rep(0, n_populations)
            names(pairwise_vector) <- orig_mean_tested

            grouping <- grouping_all[orig_mean_tested]
            if (n_populations > 1) {
                if (n_populations > 2) {
                    #####1. deviation contrast
                    deviation_contrasts <- rep(-1, n_populations^2) %>% matrix(nrow = n_populations, ncol = n_populations)
                    diag(deviation_contrasts) <- n_populations - 1
                    rownames(deviation_contrasts) <- orig_mean_tested
                    colnames(deviation_contrasts) <- orig_mean_tested
                    deviation_contrasts_tested <- deviation_contrasts[, orig_mean_tested]

                    unique_groups <- unique(grouping)
                    for (grp in unique_groups[unique_groups > 0]) {
                       #same celltype (e.g. MIC) with different populations would get 1/n for their weight,
                        diag(deviation_contrasts_tested)[grouping == grp] <- (n_populations - 1) / length(grouping[grouping == grp])
                        deviation_contrasts_tested[grouping == grp, grouping == grp] <- (n_populations - 1) / length(grouping[grouping == grp])
                    }

                    colnames(deviation_contrasts_tested) %<>% str_c("_deviation")

                    ####2. pairwise contrast
                    two_combn <- combn(orig_mean_tested, m = 2)
                    pairwise_names <- apply(two_combn, 2, str_c, collapse = "_vs_")
                    pairwise_contrast <- apply(two_combn, 2, MakePairwiseContrastCols, pairwise_vector)

                    colnames(pairwise_contrast) <- pairwise_names

                    # Create a new matrix to store the adjusted values
                    pairwise_contrast_new <- pairwise_contrast

                    # Loop through each column to archieve such goal: e.g.
                    # microglia populations would get 1/n_Mic for their weight,
                    # and Mic vs Mic would still be 1 vs -1 to estimate the internal difference among microglia datasets
                    for (col in colnames(pairwise_contrast)) {
                      # Split column names to get group names
                      groups <- strsplit(col, "_vs_")[[1]]

                      # Get the grouping values for the two groups
                      group_values <- grouping[names(grouping) %in% groups]

                      # Identify groups with non-zero grouping values
                      relevant_groups <- names(group_values[group_values > 0])

                      # Check if there are multiple distinct groups
                      if (length(unique(group_values)) > 1 && length(relevant_groups) > 0) {
                        distinct_groups <- unique(group_values[group_values > 0])

                        for (distinct_grp in distinct_groups) {
                          # Identify rows belonging to the current group
                          rows_in_group <- names(grouping[grouping == distinct_grp])

                          # Adjust the pairwise_contrast values for each row in the group
                          pairwise_contrast_new[rows_in_group, col] <- pairwise_contrast[rows_in_group[rows_in_group %in% groups], col] / length(rows_in_group)
                        }
                      }
                    }

                    # Replace the original matrix with the new one
                    pairwise_contrast <- pairwise_contrast_new

                    #### 3. combine them
                    contrast_design <- cbind(deviation_contrasts_tested / (n_populations - 1), pairwise_contrast)

                } else {
                    pairwise_vector[orig_mean_tested[1]] <- 1
                    pairwise_vector[orig_mean_tested[2]] <- -1
                    contrast_design <- as.matrix(pairwise_vector)
                    colnames(contrast_design) <- str_c(orig_mean_tested[1], "_vs_", orig_mean_tested[2])
                }

                posterior_mean_subset <- posterior_mean[index,]
                posterior_mean_subset2 <- posterior_mean_subset[orig_mean_tested]
                posterior_vcov_subset <- posterior_vcov[,,index]
                posterior_vcov_subset2 <- posterior_vcov_subset[orig_mean_tested,orig_mean_tested]

                contrast_diff <- t(contrast_design) %*% posterior_mean_subset2
                contrast_vcov <- t(contrast_design) %*% posterior_vcov_subset2 %*% contrast_design
                contrast_se <- diag(contrast_vcov) %>% sqrt

                contrast_p <- 2 * (1 - pnorm(abs(contrast_diff) / contrast_se))

                contrast_diff_df <- t(contrast_diff) %>% as_tibble
                colnames(contrast_diff_df) %<>% str_c("mean_contrast_", .)
                contrast_se_df <- t(contrast_se) %>% as_tibble
                colnames(contrast_se_df) %<>% str_c("se_contrast_", .)
                contrast_p_df <- t(contrast_p) %>% as_tibble
                colnames(contrast_p_df) %<>% str_c("p_contrast_", .)

                contrast_df <- bind_cols(contrast_diff_df, contrast_se_df, contrast_p_df)
            } else if(grouping[orig_mean_tested][1]!=grouping[orig_mean_tested][2]){
                contrast_vector <- rep(NA, length(population_names))
                names(contrast_vector) <- str_c("mean_contrast_", population_names, "_deviation")
                contrast_df <- t(contrast_vector) %>% as_tibble
            }
         
        contrast_df <- contrast_df %>% as.data.frame
        rownames(contrast_df) <- rownames(posterior_mean)[index]
        return(contrast_df)
        }
    
    }

In [2]:
if(length("['Ast,Exc,Inh,Mic,OPC,Oli,DLPFC_pQTL,MiGA_GFM,MiGA_GTS,MiGA_SVZ,MiGA_THA']") > 0){
    # All the cells
    cells <- c("Ast,Exc,Inh,Mic,OPC,Oli,DLPFC_pQTL,MiGA_GFM,MiGA_GTS,MiGA_SVZ,MiGA_THA") %>% str_split(., ",", simplify = TRUE) %>% as.character

    # Automatically set grouping categories based on the recipe， set0 for the celltypes without multiple populations
    grouping_all <- rep(0, length(cells))
    names(grouping_all) <- cells


    # Read groupings from the recipe
    if(length("[]") > 0){
        cell_groups <- list(



        )
        if(!is.null(cell_groups)) {
          cell_groups <- map(cell_groups, ~str_split(.x, ",", simplify = TRUE) %>% as.character())
        }
    }

    if("" != ""){
        cell_groups <- readLines("")
        cell_groups <- lapply(cell_groups, function(g) strsplit(g, ",")[[1]])
    }

    if(!is.null(cell_groups)){
        for(i in seq_along(cell_groups)) {
          grouping_all[cell_groups[[i]]] <- i
        }
    }
}

In [3]:
# Read the data files
orig_data <- read_rds("/mnt/vast/hpc/csg/rf2872/Work/Multivariate/MASH/MASH_test_csg/output/RDS/ALL_Ast_End_Exc_Inh_Mic_OPC_Oli.log2cpm.bed.processed_phenotype.per_chrom_ALL_Ast_End_Exc_Inh_Mic_OPC_Oli.log2cpm.ALL_Ast_End_Exc_Inh_Mic_OPC_Oli.covariate.pca.resid.PEER.cov.chr10.norminal.cis_long_table.IDI1.rds")$bhat
posterior_data <- read_rds("/mnt/vast/hpc/csg/rf2872/Work/Multivariate/MASH/MWE/MWE_udr/cache/ALL_Ast_End_Exc_Inh_Mic_OPC_Oli.log2cpm.bed.processed_phenotype.per_chrom_ALL_Ast_End_Exc_Inh_Mic_OPC_Oli.log2cpm.ALL_Ast_End_Exc_Inh_Mic_OPC_Oli.covariate.pca.resid.PEER.cov.chr10.norminal.cis_long_table.IDI1.posterior.rds")
posterior_mean <- posterior_data$PosteriorMean
posterior_cov <- posterior_data$PosteriorCov

# Align data and clean-up NaN values
orig_data <- orig_data[, colnames(posterior_mean), drop = FALSE]
orig_data[which(is.nan(orig_data))] <- 0 # Placeholder for NaNs

In [14]:
index=390
orig_mean=orig_data
posterior_vcov= posterior_cov


In [15]:
  population_names <- colnames(posterior_mean) %>% str_remove_all("BETA_")
    orig_mean_vector <- orig_mean[index, ]
    names(orig_mean_vector) <- population_names
    orig_mean_nonzero <- as.vector(orig_mean_vector != 0)
    orig_mean_tested <- names(orig_mean_vector[orig_mean_nonzero])
    n_populations <- length(orig_mean_tested)
    pairwise_vector <- rep(0, n_populations)
    names(pairwise_vector) <- orig_mean_tested
    grouping <- grouping_all[orig_mean_tested]

In [20]:
length(orig_mean_tested)>0

In [39]:
 FitContrast(1, orig_mean, posterior_mean, posterior_vcov) 

Unnamed: 0_level_0,mean_contrast_Ast_deviation,mean_contrast_Exc_deviation,mean_contrast_Inh_deviation,mean_contrast_Mic_deviation,mean_contrast_OPC_deviation,mean_contrast_Oli_deviation,mean_contrast_Ast_vs_Exc,mean_contrast_Ast_vs_Inh,mean_contrast_Ast_vs_Mic,mean_contrast_Ast_vs_OPC,⋯,p_contrast_Exc_vs_Inh,p_contrast_Exc_vs_Mic,p_contrast_Exc_vs_OPC,p_contrast_Exc_vs_Oli,p_contrast_Inh_vs_Mic,p_contrast_Inh_vs_OPC,p_contrast_Inh_vs_Oli,p_contrast_Mic_vs_OPC,p_contrast_Mic_vs_Oli,p_contrast_OPC_vs_Oli
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
chr10:49321_T_C,5.654125e-05,-0.008451905,-0.004335873,0.008599621,0.0009120335,0.003219583,0.007090372,0.003660345,-0.007119233,-0.0007129102,⋯,0.8835892,0.8268903,0.8417774,0.837476,0.8596268,0.8879446,0.8758851,0.898417,0.9408837,0.9619575


In [36]:
rownames(posterior_mean)[1]

In [12]:
nrow(posterior_mean)

In [42]:
# Apply the FitContrast function and consolidate results
contrast_result <- map(1:nrow(posterior_mean), FitContrast, orig_data, posterior_mean, posterior_cov) %>% bind_rows %>%
    select(matches("mean_contrast.*deviation"), matches("mean_contrast.*_vs_"),
           matches("se_contrast.*deviation"), matches("se_contrast.*_vs_"),
           matches("p_contrast.*deviation"), matches("p_contrast.*_vs_"))

In [25]:
rownames(contrast_result) <- rownames(posterior_mean)

“Setting row names on a tibble is deprecated.”


ERROR: Error in `.rowNamesDF<-`(x, value = value): invalid 'row.names' length


In [44]:
contrast_result %>% nrow

In [None]:
write_rds(contrast_result,  '/mnt/vast/hpc/csg/rf2872/Work/Multivariate/MASH/MWE/test/ALL_Ast_End_Exc_Inh_Mic_OPC_Oli.log2cpm.bed.processed_phenotype.per_chrom_ALL_Ast_End_Exc_Inh_Mic_OPC_Oli.log2cpm.ALL_Ast_End_Exc_Inh_Mic_OPC_Oli.covariate.pca.resid.PEER.cov.chr10.norminal.cis_long_table.IDI1_posterior_contrast.rds')