In [None]:
library(data.table)
library(Matrix)
library(dplyr)
library(tidyr)
library(ggplot2)
library(rlang)
library(stringr)
library(coranova)

In [None]:
source("../eval_functions_refactored.R")

In [None]:
prscs_names <- paste0("prscs_", c("META_afr", "META_amr", "META_eur", "AFR_afr", "HIS_amr", "EUR_eur"))
#ldpred_names <-paste0("ldpred_", c("METAss_AFRld_inf", "METAss_AFRld_auto", "METAss_AMRld_inf", "METAss_AMRld_auto", "METAss_EURld_inf", "METAss_EURld_auto", "AFRss_AFRld_inf", "AFRss_AFRld_auto", "HISss_AMRld_inf", "HISss_AMRld_auto", "EURss_EURld_inf", "EURss_EURld_auto"))
ldpred_names <-paste0("ldpred_", c("METAss_afrld_auto", "METAss_amrld_auto", "METAss_eurld_auto", "AFRss_afrld_auto", "HISss_amrld_auto", "EURss_eurld_auto"))
x_names <- c("prscsx")

subpopPCs <- fread("../2024-05-31_global_subpop_v2_gnomad-AoU-PCs-in-AOU.tsv")

In [None]:
make_outcome_list_subpopPCs <- function (score_name, phen_name, age_sex = FALSE) 
{
    ldpred <- fread(paste0("../computed_scores/", score_name, "_LDpred.txt"))
    prscs <- fread(paste0("../computed_scores/", score_name, "_PRScs.txt"))
    prscsx <- fread(paste0("../computed_scores/", score_name, "_PRScsx.txt"))
    covars <- as.data.frame(fread("../AoU_98K_covariates.tsv"))
    phen <- fread(paste0("../outcomes/", phen_name, ".txt"))
    
    ldp_list <- compare_scores_subpopPCadj_return_list(ldpred, phen, covars, age_sex)
    cs_list <- compare_scores_subpopPCadj_return_list(prscs, phen, covars, age_sex)
    x_list <- compare_scores_subpopPCadj_return_list(prscsx, phen,  covars, age_sex)
    
    alist <- Map(cbind, cs_list, ldp_list, x_list)
    new_col_name <- c("mean", paste0("prscs_score", 1:6), "mean2", 
        paste0(c("ldp_inf", "ldp_auto"), rep(1:6, each = 2)), 
        "mean3", paste0("x_score", 1:5))
    alist <- lapply(alist, setNames, nm = new_col_name)
    cormat_list <- lapply(alist, cor)
    cormat_df <- as.data.frame(do.call(rbind, lapply(cormat_list, 
        getVals, outcome = "mean", measures = c(paste0("prscs_score", 
            1:6), paste0("ldp_auto", 1:6), "x_score5"))))
    cormat_df$anc <- rownames(cormat_df)
    colnames(cormat_df) <- c(prscs_names, ldpred_names, x_names, "anc")
    cormat_df$outcome <- phen_name
    return(list(cormat_df, cormat_list, alist))
}

In [None]:
compare_scores_subpopPCadj_return_list <- function (score_data, phen_data, covar_file, age_sex) 
{
    score_num <- length(colnames(score_data)) - 4
    print(paste("Score Number:", score_num))
    dat <- phen_data %>% 
            left_join(covar_file) %>% 
            left_join(score_data, by = c(person_id = "IID")) %>% 
            filter(unrel == 1) %>%
            left_join(subpopPCs %>% 
                  select(!c(ancestry_pred_other, 
                            contains("prob"), 
                            contains("prev_global"))), 
                  by = c("person_id" = "s"))
    
    print(paste("Data Dimensions:", dim(dat)))
    res <- list()
    for (pop in c("afr", "eur", "amr")) {
        print(paste("Population:", pop))
        
        dat_anc <- dat[dat[["ancestry_pred_other"]] == pop, ]
        dat_anc$mean <- inormal(dat_anc$mean)
        pcs <- paste0(toupper(pop), "_PC", 1:20)
        
        if(age_sex == TRUE){
            covs <- c("age", "is_male", pcs)
        }else{
            covs <- pcs
        }
        
        dat_anc <- dat_anc %>% select(mean, contains("SCORE"), all_of(covs)) %>% na.omit()
        dat_anc$mean1 <- perform_adj_PC_specify_covs(dat_anc, "mean", paste(covs, collapse = " + "))
        scores <- lapply(1:score_num, function(i) perform_adj_PC_specify_covs(dat_anc, paste0("SCORE", i, "_SUM"), paste(covs, collapse = " + ")))
                         
        res[[pop]] <- cbind(dat_anc$mean1, bind_cols(scores))
    }
    return(res)
}

In [None]:
hemo <- make_outcome_list_subpopPCs("Hemoglobin_Mean_INT", "hemoglobin", age_sex = TRUE)
hemo_cormat_df <- hemo[[1]]
hemo_cormat_list <- hemo[[2]]
hemo_list <- hemo[[3]]

bmi <- make_outcome_list_subpopPCs("mean_BMI_INT", "bmi", age_sex =TRUE)
bmi_cormat_df <- bmi[[1]]
bmi_cormat_list <- bmi[[2]]
bmi_list <- bmi[[3]]

ldl <- make_outcome_list_subpopPCs("LDLC_Mean_INT", "ldl", age_sex =TRUE)
ldl_cormat_df <- ldl[[1]]
ldl_cormat_list <- ldl[[2]]
ldl_list <- ldl[[3]]

sbp <- make_outcome_list_subpopPCs("mean_Systolic_INT", "sbp", age_sex =TRUE)
sbp_cormat_df <- sbp[[1]]
sbp_cormat_list <- sbp[[2]]
sbp_list <- sbp[[3]]

height <- make_outcome_list_subpopPCs("mean_Height_INT", "height", age_sex =TRUE)
height_cormat_df <- height[[1]]
height_cormat_list <- height[[2]]
height_list <- height[[3]]

## Is PRS-CSx better than mean META scores?

### Estimating Average Difference

In [None]:
a <- fread("dat_continuous_unrelated_refactored_PCagesex_subpopPCs.txt")
b <- a %>% 
    filter(anc %in% c("afr", "amr","eur"), phecode != "basophils") %>% 
    select(!c(score, score1)) %>% group_by(phecode,anc, method, ss) %>%  
    summarize(mean(R2))
c <- b %>% 
    filter(ss == "META" | ss == "prscsx" | toupper(anc) == ss) %>% 
    mutate(method = ifelse(ss == "prscsx", "prscsx", method))

a1 <- fread("dat_binary_unrelated_refactored_PCagesex_subpopPCs.txt") 
b1 <- a1 %>% 
    filter(anc %in% c("afr", "amr","eur"), phecode != "Phe_250_1") %>% 
    select(!c(cases, controls, score, score2, score1, R2_type, traits, names)) %>% 
    group_by(phecode,anc, method, ss) %>%  
    summarize(mean(R2))
c1 <- b1 %>% 
    filter(ss == "META" | ss == "prscsx" | toupper(anc) == ss) %>% 
    mutate(method = ifelse(ss == "prscsx", "prscsx", method))


In [None]:
print("Combined Binary and Continuous")
c_xmeta <- rbind(c, c1) %>% 
            filter(ss == "META" | ss == "prscsx") %>% 
            select(!ss) %>% 
            pivot_wider(names_from = method, values_from = `mean(R2)`) 
c_xmeta %>% 
    ungroup() %>% 
    group_by(anc) %>% 
    mutate(prop_diff_ldpred = (prscsx - ldpred)/ldpred, 
           prop_diff_prscs = (prscsx - prscs)/prscs, 
           prop_diff_both = (prscsx - (prscs + ldpred)/2)/((prscs + ldpred)/2)) %>%   
    summarize(mean(prop_diff_ldpred), mean(prop_diff_prscs), mean(prop_diff_both))

### Using Coranova

In [None]:
A_meta_prscsx <- matrix(c( 1/3, 1/3, 1/3, -1, rep(0,4), rep(0,4),
               rep(0,4), 1/3, 1/3, 1/3, -1, rep(0,4),
                rep(0,4), rep(0,4), 1/3, 1/3, 1/3, -1), byrow = T, ncol = 12, nrow = 3)

In [None]:
A_meta_prscsx

In [None]:
#PRS-CSx better than average of PRS-CSmeta scores in all traits 
print("hemo")
perform_alt_test(hemo_list[c("afr", "amr", "eur")], 
                     "mean", 
                     c(paste0("prscs_score", 1:3), "x_score5"), 
                     A_meta_prscsx, 
                     "parametric", TRUE) #2.3e-14

print("bmi")
perform_alt_test(bmi_list[c("afr", "amr", "eur")], 
                 "mean", 
                 c(paste0("prscs_score", 1:3), "x_score5"), 
                 A_meta_prscsx, 
                 "parametric", TRUE) # 2e-139

print("sbp")
perform_alt_test(sbp_list[c("afr", "amr", "eur")], 
                 "mean", 
                 c(paste0("prscs_score", 1:3), "x_score5"), 
                 A_meta_prscsx, 
                 "parametric", TRUE) #1.23e-115

print("height")
perform_alt_test(height_list[c("afr", "amr", "eur")], 
                 "mean", 
                 c(paste0("prscs_score", 1:3), "x_score5"), 
                 A_meta_prscsx, 
                 "parametric", TRUE) #0 >> 0

print("ldl")
perform_alt_test(ldl_list[c("afr", "amr", "eur")], 
                 "mean", 
                 c(paste0("prscs_score", 1:3), "x_score5"), 
                 A_meta_prscsx, 
                 "parametric", TRUE) #1.17e-21


In [None]:
#PRS-CSx better than average of LDpred meta scores in all traits 
print("hemo")
perform_alt_test(hemo_list[c("afr", "amr", "eur")], 
                 "mean", 
                 c(paste0("ldp_auto", 1:3), "x_score5"), 
                 A_meta_prscsx, 
                 "parametric", TRUE) #5.7e-16

print("bmi")
perform_alt_test(bmi_list[c("afr", "amr", "eur")], 
                 "mean", 
                 c(paste0("ldp_auto", 1:3), "x_score5"), 
                 A_meta_prscsx, 
                 "parametric", TRUE) #1.2e-104

print("sbp")
perform_alt_test(sbp_list[c("afr", "amr", "eur")], 
                 "mean", 
                 c(paste0("ldp_auto", 1:3), "x_score5"), 
                 A_meta_prscsx, 
                 "parametric", TRUE) # 1e-18

print("height")
perform_alt_test(height_list[c("afr", "amr", "eur")], 
                 "mean", 
                 c(paste0("ldp_auto", 1:3), "x_score5"), 
                 A_meta_prscsx, 
                 "parametric", TRUE) #0

print("ldl")
perform_alt_test(ldl_list[c("afr", "amr", "eur")], 
                 "mean", 
                 c(paste0("ldp_auto", 1:3), "x_score5"), 
                 A_meta_prscsx, 
                 "parametric", TRUE) #1.1e-13


#### Does prs-csx perform similarily across ancestry groups? 

In [None]:
perform_coranova_parametric(bmi_list, "mean", "x_score5")
perform_coranova_parametric(ldl_list, "mean", "x_score5") #equiv
perform_coranova_parametric(sbp_list, "mean", "x_score5")
perform_coranova_parametric(height_list, "mean", "x_score5")
perform_coranova_parametric(hemo_list, "mean", "x_score5")

## Compare LDpred v PRScs:

### Compare LDpred v PRScs: Estimating Average Difference

#### Estimating difference between LDpred2 and PRS-CS with correlation

In [None]:
a <- rbind(hemo_cormat_df, height_cormat_df, ldl_cormat_df, sbp_cormat_df, bmi_cormat_df) %>% 
        select(outcome, anc, contains("META"))
b <- a %>% pivot_longer(cols = contains("META"), names_to = "score", values_to = "corr") %>% 
        mutate(ld = ifelse(grepl("afr", score), "afr", ifelse(grepl("amr", score), "amr", "eur")),
               method = ifelse(grepl("prscs", score), "prscs", "ldpred")) %>%  
        select(!score)
c <- b %>% pivot_wider(names_from = method, values_from = corr) %>% 
    mutate(diff = ldpred - prscs, prop = (ldpred - prscs)/prscs)

c %>% summarize(mean(diff), mean(prop))

c %>% group_by(anc) %>% summarize(mean(diff), mean(prop))

#### Estimating difference between LDpred2 and PRS-CS with R2


In [None]:
a <- fread("dat_continuous_unrelated_refactored_PCagesex_subpopPCs.txt")
b <- a %>% filter(anc %in% c("afr", "amr","eur"), phecode != "basophils") %>%   filter(ss == "META") %>% dplyr::select(!c(N, score, ss, score2, score1))
c <- b %>% pivot_wider(names_from = method, values_from = R2) %>% mutate(diff = ldpred - prscs, prop = (ldpred - prscs)/prscs)

a1 <- fread("dat_binary_unrelated_refactored_PCagesex_subpopPCs.txt") 
b1 <- a1 %>% filter(anc %in% c("afr", "amr","eur"), phecode != "Phe_250_1") %>%   filter(ss == "META") %>% dplyr::select(!c(cases, controls, score, ss, score2, score1, R2_type, traits, names))
c1 <- b1 %>% pivot_wider(names_from = method, values_from = R2) %>% mutate(diff = ldpred - prscs, prop = (ldpred - prscs)/prscs)

d <- rbind(c, c1) 

print("all")
d %>% summarize(mean(diff), mean(prop))
d %>% group_by(anc) %>% summarize(mean(diff), mean(prop))

print("binary")
c1 %>% summarize(mean(diff), mean(prop))
c1 %>% group_by(anc) %>% summarize(mean(diff), mean(prop))

print("continuous")
c %>% summarize(mean(diff), mean(prop))
c %>% group_by(anc) %>% summarize(mean(diff), mean(prop))

#### Estimating difference between LDpred2 and PRS-CS with R2 only eur group

In [None]:
a <- fread("dat_continuous_unrelated_refactored.txt")
head(a)
b <- a %>% filter(anc %in% c("afr", "amr","eur"), phecode != "basophils") %>% 
            filter(ss == "EUR", anc == "eur") %>% 
            dplyr::select(!c(N, score, ss, score2, score1))

a1 <- fread("dat_binary_unrelated_refactored.txt") 
b1 <- a1 %>% filter(anc %in% c("afr", "amr","eur"), phecode != "Phe_250_1") %>% 
                filter(ss == "EUR", anc == "eur") %>% 
                dplyr::select(!c(cases, controls, score, ss, score2, score1, R2_type, traits, names))

c <- rbind(b %>% mutate(type = "cont"), 
           b1%>% mutate(type = "binary")) 

d <- c %>% pivot_wider(names_from = method, values_from = R2) %>% mutate(diff = ldpred - prscs, prop = (ldpred - prscs)/prscs)

d %>% summarize(mean(diff), mean(prop))

### Compare LDpred v PRScs:  Using Coranova

In [None]:
#Taking average of performance of PRS-CS meta - avg of performance of LDpred meta in each ancestry 
contrast_mat_ldpvprscs <- matrix(c(1, 1, 1, -1, -1, -1, rep(0, 6), rep(0, 6),
                                  rep(0, 6), 1, 1, 1, -1, -1, -1,rep(0, 6), 
                                  rep(0, 6), rep(0, 6), 1, 1, 1, -1, -1, -1), byrow = T, ncol = 18, nrow = 3)
contrast_mat_ldpvprscs_1pop <- matrix(c(1, 1, 1, -1, -1, -1), byrow = T, ncol = 6, nrow = 1)


#Taking average of performance each PRS-CS meta - LDpred meta across ancestries, what we want! 
contrast_ldpvprscs2 <- matrix(c(1, 0, 0, -1, 0, 0, 1, 0, 0, -1, 0, 0, 1, 0, 0, -1, 0, 0, 
                         0, 1, 0, 0, -1, 0, 0, 1, 0, 0, -1, 0, 0, 1, 0, 0, -1, 0,
                         0, 0, 1, 0, 0, -1, 0, 0, 1, 0, 0, -1, 0, 0, 1, 0, 0, -1), byrow = T, ncol = 18, nrow = 3)

contrast_ldpvprscs2_1pop <- matrix(c(1, 0, 0, -1, 0, 0,
                         0, 1, 0, 0, -1, 0,
                         0, 0, 1, 0, 0, -1), byrow = T, ncol = 6, nrow = 3)

contrast_ldpvprscs3 <- matrix(c(1, 1, 1, -1, -1, -1,1, 1, 1, -1, -1, -1,1, 1, 1, -1, -1, -1), byrow = T, ncol = 18, nrow = 1)


In [None]:
contrast_ldpvprscs2

In [None]:
#PRS-CS is better than LDpred for scores built with AMR and EUR panel, 
#methods are equivalent with AFR panels
print("hemo")
perform_alt_test(hemo_list, 
                     "mean", 
                     c("prscs_score1", "prscs_score2", "prscs_score3", "ldp_auto1", "ldp_auto2", "ldp_auto3"),
                    contrast_ldpvprscs2, "parametric", TRUE)  #6.4e-07
perform_coranova_parametric(hemo_list, "mean",  c("prscs_score1", "ldp_auto1")) #pW 0.6


In [None]:
contrast_ldpvprscs2

In [None]:
#methods are equivalent 
print("ldl")
perform_alt_test(ldl_list, 
                     "mean", 
                     c("prscs_score1", "prscs_score2", "prscs_score3", "ldp_auto1", "ldp_auto2", "ldp_auto3"),
                    contrast_ldpvprscs2, "parametric", TRUE)  #p = 0.4


In [None]:
#LDpred better
print("height")
perform_alt_test(height_list, 
                     "mean", 
                     c("prscs_score1","prscs_score2", "prscs_score3", "ldp_auto1", "ldp_auto2", "ldp_auto3"),
                    contrast_ldpvprscs2, "parametric", TRUE) #5.7e-173



In [None]:
print("sbp") #ldpred better
perform_alt_test(sbp_list, 
                 "mean", 
                 c("prscs_score1", "prscs_score2", "prscs_score3", "ldp_auto1", "ldp_auto2", "ldp_auto3"),
                 contrast_ldpvprscs2, "parametric", TRUE) 
#1.5e-81


In [None]:
#LDpred2 is better than PRS-CS for scores built with AFR (3e-36)
#PRS-CS is equiv with AMR panels p = 0.86
#PRS-CS is better with EUR panels p =0.0003
print("bmi")
perform_alt_test(bmi_list, 
                 "mean",
                 c("prscs_score1", "prscs_score2", "prscs_score3", "ldp_auto1", "ldp_auto2", "ldp_auto3"),
                 contrast_ldpvprscs2, "parametric", TRUE)  #2.5e-44
perform_coranova_parametric(bmi_list, "mean",  c("prscs_score1", "ldp_auto1")) # pW 3e-36

perform_coranova_parametric(bmi_list, "mean",  c("prscs_score2", "ldp_auto2")) #pW = 0.86

perform_coranova_parametric(bmi_list, "mean",  c("prscs_score3", "ldp_auto3")) #pW = 0.0003


## Comparing LDpanels:

### Comparing LDpanels: Estimating difference between LDpanels with R2

In [None]:
a <- fread("dat_continuous_unrelated_refactored_PCagesex_subpopPCs.txt")
b <- a %>% filter(anc %in% c("afr", "amr","eur"), phecode != "basophils") %>% 
            filter(ss == "META") %>% dplyr::select(!c(N, score, ss, score2, score1))
c <- b %>% pivot_wider(names_from = ld, values_from = R2)

a1 <- fread("dat_binary_unrelated_refactored_PCagesex_subpopPCs.txt") 
b1 <- a1 %>% filter(anc %in% c("afr", "amr","eur"), phecode != "Phe_250_1") %>% 
        filter(ss == "META") %>% dplyr::select(!c(cases, controls, R2_type, traits, names, score, ss, score2, score1))
c1 <- b1 %>% pivot_wider(names_from = ld, values_from = R2)


rbind(c, c1) %>% mutate(amr_v_afr = amr - afr, amr_v_afr_prop = (amr - afr)/afr,
            amr_v_eur = amr - eur, amr_v_eur_prop = (amr - eur)/eur) %>%
     group_by(anc, method) %>% summarize(mean(amr_v_afr), mean(amr_v_eur),
                                         mean(amr_v_afr_prop), mean(amr_v_eur_prop)) %>%  
    arrange(method)

In [None]:
rbind(c, c1) %>% mutate(amr_v_afr = amr - afr, amr_v_afr_prop = (amr - afr)/afr,
            amr_v_eur = amr - eur, amr_v_eur_prop = (amr - eur)/eur) %>%
     group_by(anc) %>% summarize(mean(amr_v_afr), mean(amr_v_eur),mean(amr_v_afr_prop), mean(amr_v_eur_prop)) %>%
select(anc, contains("prop"))

### Comparing LDpanels: Using Coranova

In [None]:
contrast_mat_ldpanels <- matrix(c(1, -1, 0, 1, -1, 0, 1, -1, 0, 1, -1, 0, 1, -1, 0, 1, -1, 0,
                                  1, 0, -1, 1, 0, -1, 1, 0, -1, 1, 0, -1, 1, 0, -1, 1, 0, -1), byrow = T, ncol = 18, nrow = 2)
contrast_mat_ldpanels
rankMatrix(contrast_mat_ldpanels)

contrast_mat_ldpanels_amrref <- matrix(c(-1, 1, 0, -1, 1, 0, -1, 1, 0, -1, 1, 0, -1, 1, 0, -1, 1, 0,
                                  0, 1, -1, 0, 1, -1, 0, 1, -1, 0, 1, -1, 0, 1, -1, 0, 1, -1), byrow = T, ncol = 18, nrow = 2)
contrast_mat_ldpanels_amrref
rankMatrix(contrast_mat_ldpanels_amrref)

contrast_mat_ldpanels_amrref_1pop <- matrix(c(-1, 1, 0, -1, 1, 0, 
                                        0, 1, -1, 0, 1, -1), byrow = T, ncol = 6, nrow = 2)
contrast_mat_ldpanels_amrref_1pop
rankMatrix(contrast_mat_ldpanels_amrref_1pop)


contrast_mat_ldpanels_amrref_1method <- matrix(c(-1, 1, 0, -1, 1, 0,-1, 1, 0,  
                                        0, 1, -1, 0, 1, -1, 0, 1, -1), byrow = T, ncol = 9, nrow = 2)

contrast_mat_ldpanels_amrref_1method_vafr <- matrix(c(-1, 1, 0, -1, 1, 0,-1, 1, 0), byrow = T, ncol = 9, nrow = 1)

contrast_mat_ldpanels_amrref_1method_veur <- matrix(c(0, 1, -1, 0, 1, -1, 0, 1, -1), byrow = T, ncol = 9, nrow = 1)

In [None]:
#aggregate across ancestries, and methods -> AMR is sig best

#smallest p-val is 0.00048, for hemo
perform_alt_test(hemo_list, 
                     "mean", 
                     c("prscs_score1","prscs_score2", "prscs_score3", "ldp_auto1", "ldp_auto2", "ldp_auto3"),
                    contrast_mat_ldpanels_amrref, "parametric", TRUE) #p = 0.00048
perform_alt_test(height_list, 
                     "mean", 
                     c("prscs_score1","prscs_score2", "prscs_score3", "ldp_auto1", "ldp_auto2", "ldp_auto3"),
                    contrast_mat_ldpanels_amrref, "parametric", TRUE) #p = 1.6e-128
perform_alt_test(sbp_list, 
                     "mean", 
                     c("prscs_score1","prscs_score2", "prscs_score3", "ldp_auto1", "ldp_auto2", "ldp_auto3"),
                    contrast_mat_ldpanels_amrref, "parametric", TRUE) #3e-64
perform_alt_test(ldl_list, 
                     "mean", 
                     c("prscs_score1","prscs_score2", "prscs_score3", "ldp_auto1", "ldp_auto2", "ldp_auto3"),
                    contrast_mat_ldpanels_amrref, "parametric", TRUE) #8e-09
perform_alt_test(bmi_list, 
                     "mean", 
                     c("prscs_score1","prscs_score2", "prscs_score3", "ldp_auto1", "ldp_auto2", "ldp_auto3"),
                    contrast_mat_ldpanels_amrref, "parametric", TRUE) #5e-24

In [None]:
### aggregate across ancestries, just prscs

In [None]:
print("hemo") #AMR better in both, overall sig
perform_alt_test(hemo_list, 
                     "mean", 
                     c("prscs_score1","prscs_score2", "prscs_score3"),
                    contrast_mat_ldpanels_amrref_1method, "parametric", TRUE)  #1e-07

In [None]:
print("height") #AMR better in both, p = 1e-79
perform_alt_test(height_list, 
                     "mean", 
                     c("prscs_score1","prscs_score2", "prscs_score3"),
                    contrast_mat_ldpanels_amrref_1method, "parametric", TRUE)


In [None]:
print("sbp")
perform_alt_test(sbp_list, 
                     "mean", 
                     c("prscs_score1","prscs_score2", "prscs_score3" ),
                    contrast_mat_ldpanels_amrref_1method, "parametric", TRUE)

#eur LD panel not sig better than amr LD panel p = 0.04
perform_alt_test(sbp_list, 
                     "mean", 
                     c("prscs_score1","prscs_score2", "prscs_score3" ),
                    contrast_mat_ldpanels_amrref_1method_veur, "parametric", TRUE)

In [None]:
print("ldl") #AMR better in both, overall sig (p = 2e-07)
perform_alt_test(ldl_list, 
                     "mean", 
                     c("prscs_score1","prscs_score2", "prscs_score3"),
                    contrast_mat_ldpanels_amrref_1method, "parametric", TRUE)

In [None]:
print("bmi")  #AMR better in both, overall sig (p = 6e-37)
perform_alt_test(bmi_list, 
                     "mean", 
                     c("prscs_score1","prscs_score2", "prscs_score3"),
                    contrast_mat_ldpanels_amrref_1method, "parametric", TRUE)

In [None]:
### aggregate across ancestries, just LDp

In [None]:
print("hemo") #not sig overall (p = 0.06) -> AMR better than AFR, EUR better than AMR but not sig (p = 0.06)
perform_alt_test(hemo_list, 
                     "mean", 
                     c(paste0("ldp_auto",1:3)),
                    contrast_mat_ldpanels_amrref_1method, "parametric", TRUE)
perform_alt_test(hemo_list, 
                     "mean", 
                     c(paste0("ldp_auto",1:3)),
                    contrast_mat_ldpanels_amrref_1method_veur, "parametric", TRUE)


In [None]:
print("height")  #AMR better in both, overall sig (p = 3e-97)
perform_alt_test(height_list, 
                     "mean", 
                     c(paste0("ldp_auto",1:3)),
                    contrast_mat_ldpanels_amrref_1method, "parametric", TRUE)

In [None]:
print("sbp")  #AMR better in both, overall sig (p = 6e-26)
perform_alt_test(sbp_list, 
                     "mean", 
                     c(paste0("ldp_auto",1:3)),
                    contrast_mat_ldpanels_amrref_1method, "parametric", TRUE)

In [None]:
print("ldl")  #AMR better in both, overall sig (p = 6e-06)
perform_alt_test(ldl_list, 
                     "mean", 
                     c(paste0("ldp_auto",1:3)),
                    contrast_mat_ldpanels_amrref_1method, "parametric", TRUE)

In [None]:
print("bmi")  #AMR better in both, overall sig (p = 5e-08)
perform_alt_test(bmi_list, 
                     "mean", 
                    c(paste0("ldp_auto",1:3)),
                    contrast_mat_ldpanels_amrref_1method, "parametric", TRUE)

## Comparing Multi to Single: PRS-CSx v ancestry-specific


### Estimating difference between PRS-CSx and ancestry-specific PGS with R2

In [None]:
a <- fread("dat_continuous_unrelated_refactored_PCagesex_subpopPCs.txt")
b <- a %>% filter(anc %in% c("afr", "amr","eur"), phecode != "basophils") %>% 
        select(!c(N, score1, score2, ld)) %>% mutate(type = "cont") 
a1 <- fread("dat_binary_unrelated_refactored_PCagesex_subpopPCs.txt") 
b1 <- a1 %>% filter(anc %in% c("afr", "amr","eur"), phecode != "Phe_250_1") %>% 
        select(!c(cases, controls, score2, score1, R2_type, traits, names, ld)) %>% 
        mutate(type = "binary")

c <- rbind(b,b1) %>% filter(ss != "META", ss == "prscsx" |
                            anc == "afr" & grepl("AFR", ss) | 
                            anc == "eur" & grepl("EUR", ss) | 
                            anc == "amr" & grepl("HIS", ss)) %>%  
        mutate(score = ifelse(score == "prscsx", "prscsx", 
                       ifelse(method == "ldpred", "anc_specL", "anc_specP"))) %>% 
        select(!c(ss, method)) %>%  
        pivot_wider(names_from = score, values_from = R2)

c %>% group_by(anc, type) %>% 
        mutate(prop_diffL = (prscsx - anc_specL)/anc_specL, prop_diffP = (prscsx - anc_specP)/anc_specP) %>%  
        summarize(mean(prop_diffL), mean(prop_diffP))

print("PRScsx v ancestry specific")
c %>% group_by(anc) %>% mutate(prop_diffL = (prscsx - anc_specL)/anc_specL, prop_diffP = (prscsx - anc_specP)/anc_specP) %>%  
      summarize(mean(prop_diffL), mean(prop_diffP))

print("PRScsx v ancestry specific wo Phe_428_1, only reporting for amr pop")
c %>% group_by(anc) %>% filter(phecode != "Phe_428_1", anc == "amr") %>% mutate(prop_diffL = (prscsx - anc_specL)/anc_specL, prop_diffP = (prscsx - anc_specP)/anc_specP) %>%  
      summarize(mean(prop_diffL), mean(prop_diffP))

In [None]:
a1 <- fread("dat_binary_unrelated_refactored_PCagesex_b.txt") 
a1 %>% filter(phecode == "Phe_428_1", anc == "amr", score == "HISss_AMRld_auto" | score == "prscsx")

In [None]:
a1 <- fread("dat_binary_unrelated_refactored_PCagesex_subpopPCs.txt") 
a1 %>% filter(phecode == "Phe_428_1", anc == "amr", score == "HISss_AMRld_auto" | score == "prscsx")

### Comparing Multi to Single: PRS-CSx v ancestry-specific: Using Coranova

In [None]:
contrast_x_single <- matrix(c(1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
                                 0, 0, 0, 0, 1, 0, -1, 0, 0, 0, 0, 0,
                                 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, -1), byrow = T, ncol = 12, nrow = 3)
contrast_x_single_aframr <- matrix(c(1, -1, 0, 0, 0, 0, 0, 0,
                                 0, 0, 0, 0, 1, 0, -1, 0), byrow = T, ncol = 8, nrow = 2)

score_to_comp <- "x_score5"


In [None]:
contrast_x_single

#### contrast x with PRS-CS ancestry-spec

In [None]:
print("hemo")
# x sig better (p = 7e-6)
perform_alt_test(hemo_list[c("afr", "amr", "eur")], 
                     "mean", 
                     c(score_to_comp, paste0("prscs_score", 4:6)),
                    contrast_x_single, "parametric", TRUE)

perform_alt_test(hemo_list[c("afr", "amr")],  #p = 8.2e-06
                     "mean", 
                     c(score_to_comp, paste0("prscs_score", 4:6)),
                    contrast_x_single_aframr, "parametric", TRUE)

In [None]:
print("height")
# x sig better (p =3.4e-255)
perform_alt_test(height_list[c("afr", "amr", "eur")], 
                     "mean", 
                     c(score_to_comp, paste0("prscs_score", 4:6)),
                    contrast_x_single, "parametric", TRUE)

perform_alt_test(height_list[c("afr", "amr")],  #p = 2.9e-251
                     "mean", 
                     c(score_to_comp, paste0("prscs_score", 4:6)),
                    contrast_x_single_aframr, "parametric", TRUE)

In [None]:
print("sbp")
# overall (p = 1e-42), eur better but not sig so
perform_alt_test(sbp_list[c("afr", "amr", "eur")], 
                     "mean", 
                     c(score_to_comp, paste0("prscs_score", 4:6)),
                    contrast_x_single, "parametric", TRUE)
perform_coranova_parametric(sbp_list[c("eur")],  #pW = 0.56
                     "mean", 
                     c("x_score5", paste0("prscs_score6")))
perform_alt_test(sbp_list[c("afr", "amr")],  #p = 1.1e-43
                     "mean", 
                     c(score_to_comp, paste0("prscs_score", 4:6)),
                    contrast_x_single_aframr, "parametric", TRUE)

In [None]:
print("bmi")
#overall p = 1.9e-166
#x better or equiv (EUR-only pW = 0.4)
perform_alt_test(bmi_list[c("afr", "amr", "eur")], 
                     "mean", 
                     c(score_to_comp, paste0("prscs_score", 4:6)),
                    contrast_x_single, "parametric", TRUE)
perform_coranova_parametric(bmi_list[c("eur")], 
                     "mean", 
                     c("x_score5", paste0("prscs_score6")))
perform_alt_test(bmi_list[c("afr", "amr")], #1.17e-167
                 "mean",
                 c(score_to_comp, paste0("prscs_score", 4:6)), 
                 contrast_x_single_aframr, "parametric", TRUE)

In [None]:
print("ldl")
#x sig better (p = 1.5e-14)
perform_alt_test(ldl_list[c("afr", "amr", "eur")], 
                     "mean", 
                     c(score_to_comp, paste0("prscs_score", 4:6)),
                    contrast_x_single, "parametric", TRUE)
perform_alt_test(ldl_list[c("afr", "amr")], #6e-09
                 "mean",
                 c(score_to_comp, paste0("prscs_score", 4:6)), 
                 contrast_x_single_aframr, "parametric", TRUE)

In [None]:
#Just among Europeans, comparing PRS-CSx to EUR-specific PRS-CS score
perform_coranova_parametric(bmi_list[c("eur")], 
                     "mean", 
                     c("x_score5", paste0("prscs_score6"))) #eur better, but equiv p = 0.4
perform_coranova_parametric(sbp_list[c("eur")], 
                     "mean", 
                     c("x_score5", paste0("prscs_score6"))) # eur better, but equiv p = 0.6

perform_coranova_parametric(ldl_list[c("eur")],  #prscs-x better p= 5.6e-08
                     "mean", 
                     c("x_score5", paste0("prscs_score6")))

perform_coranova_parametric(hemo_list[c("eur")],  #prscs-x better equiv 0.07
                     "mean", 
                     c("x_score5", paste0("prscs_score6")))
perform_coranova_parametric(height_list[c("eur")],  #prscs-x better 6.7e-07
                     "mean", 
                     c("x_score5", paste0("prscs_score6")))

#### contrast x with LDp ancestry-spec

In [None]:
#contrast x with LDp

# x sig better for all, p < 1.3e-16
perform_alt_test(hemo_list[c("afr", "amr", "eur")], 
                     "mean", 
                     c(score_to_comp, paste0("ldp_auto", 4:6)),
                    contrast_x_single, "parametric", TRUE)

perform_alt_test(height_list[c("afr", "amr", "eur")], 
                     "mean", 
                     c(score_to_comp, paste0("ldp_auto", 4:6)),
                    contrast_x_single, "parametric", TRUE)

perform_alt_test(sbp_list[c("afr", "amr", "eur")], 
                     "mean", 
                     c(score_to_comp, paste0("ldp_auto", 4:6)),
                    contrast_x_single, "parametric", TRUE)

perform_alt_test(bmi_list[c("afr", "amr", "eur")], 
                     "mean", 
                     c(score_to_comp, paste0("ldp_auto", 4:6)),
                    contrast_x_single, "parametric", TRUE)

perform_alt_test(ldl_list[c("afr", "amr", "eur")], 
                     "mean", 
                     c(score_to_comp, paste0("ldp_auto", 4:6)),
                    contrast_x_single, "parametric", TRUE)

#### Compute pairwise prs-csx vs ancestry-specific 

In [None]:
get_pairwise_x_v_ancestry <- function(trait_list, trait_name, method){
    if(method == "ldp"){
    afr <- perform_coranova_parametric(trait_list[c("afr")], "mean",  c(score_to_comp, paste0("ldp_auto4")))
    eur <- perform_coranova_parametric(trait_list[c("eur")], "mean",  c(score_to_comp, paste0("ldp_auto6")))
    amr <- perform_coranova_parametric(trait_list[c("amr")], "mean",  c(score_to_comp, paste0("ldp_auto5")))
    }else{
    afr <- perform_coranova_parametric(trait_list[c("afr")], "mean",  c(score_to_comp, paste0("prscs_score4")))
    eur <- perform_coranova_parametric(trait_list[c("eur")], "mean",  c(score_to_comp, paste0("prscs_score6")))
    amr <- perform_coranova_parametric(trait_list[c("amr")], "mean",  c(score_to_comp, paste0("prscs_score5")))    
    }
    afr$pop <- "afr"; eur$pop <- "eur"; amr$pop <- "amr"
    trait <- rbind(as.data.frame(afr[c("pW", "diff", "pop")]),
                   as.data.frame(amr[c("pW", "diff", "pop")]), 
                   as.data.frame(eur[c("pW", "diff", "pop")]))
    trait$outcome <- trait_name
    trait$method <- method
    return(trait)
}

In [None]:
hemo <- get_pairwise_x_v_ancestry(hemo_list, "hemo", "ldp")
height <- get_pairwise_x_v_ancestry(height_list, "height", "ldp")
sbp <- get_pairwise_x_v_ancestry(sbp_list, "sbp", "ldp")
ldl <- get_pairwise_x_v_ancestry(ldl_list, "ldl", "ldp")
bmi <- get_pairwise_x_v_ancestry(bmi_list, "bmi", "ldp")
pairwise_x_v_ancestry_ldp <- rbind(hemo, height, sbp, ldl, bmi)

hemo <- get_pairwise_x_v_ancestry(hemo_list, "hemo", "prscs")
height <- get_pairwise_x_v_ancestry(height_list, "height", "prscs")
sbp <- get_pairwise_x_v_ancestry(sbp_list, "sbp", "prscs")
ldl <- get_pairwise_x_v_ancestry(ldl_list, "ldl", "prscs")
bmi <- get_pairwise_x_v_ancestry(bmi_list, "bmi", "prscs")

pairwise_x_v_ancestry <- rbind(pairwise_x_v_ancestry_ldp, hemo, height, sbp, ldl, bmi)

In [None]:
pairwise_x_v_ancestry %>% filter(diff < 0) #only time pairwise diff is negative, not significantly different

## Comparing Multi to Single: mean of meta v ancestry-specific


### Estimating difference between mmPGS are to ancestry-specific PGS 

#### Estimating difference between mmPGS are to ancestry-specific PGS with correlation

In [None]:
a <- rbind(hemo_cormat_df, height_cormat_df, ldl_cormat_df, sbp_cormat_df, bmi_cormat_df)
a <- a %>% mutate(prscs_META_avg = (prscs_META_afr + prscs_META_amr + prscs_META_eur)/3, 
             ldpred_META_avg = (ldpred_METAss_afrld_auto + ldpred_METAss_amrld_auto + ldpred_METAss_eurld_auto)/3)
b <- a %>% select(outcome, anc, prscs_META_avg, ldpred_META_avg , contains(c("AFR", "HIS", "EUR"), ignore.case = FALSE))

c <- b %>% pivot_longer(cols = contains(c("AFR", "HIS", "EUR")), names_to = "anc_score", values_to = "anc_score2") %>%  
  filter(anc == "afr" & grepl("AFR", anc_score) | anc == "eur" & grepl("EUR", anc_score) | anc == "amr" & grepl("HIS", anc_score))


In [None]:
c_prscs <- c %>% filter( grepl("prscs", anc_score)) %>% select(!ldpred_META_avg) %>% mutate(method = "prscs")
c_ldp <- c %>% filter( grepl("ldpred", anc_score)) %>% select(!prscs_META_avg)%>% mutate(method = "ldpred")

In [None]:
colnames(c_prscs) <- c("outcome", "anc", "META_avg", "a", "anc_spec", "method")
colnames(c_ldp) <- c("outcome", "anc", "META_avg", "a", "anc_spec", "method")
d <- rbind(c_prscs %>% select(!a), c_ldp %>% select(!a))  %>% mutate(diff = META_avg - anc_spec, prop_diff = (META_avg - anc_spec)/anc_spec)
d %>% group_by( method, anc) %>% summarize(mean(diff), mean(prop_diff))

#### Estimating difference between mmPGS are to ancestry-specific PGS with R2

In [None]:
a <- fread("dat_continuous_unrelated_refactored_PCagesex_subpopPCs.txt")
b <- a %>% filter(anc %in% c("afr", "amr","eur"), phecode != "basophils") %>% 
 filter(ss != "prscsx") %>% group_by(phecode,anc, method, ss) %>%  
summarize(mean(R2)) %>% pivot_wider(names_from = ss, values_from = `mean(R2)`)  %>% 
pivot_longer(cols = contains(c("AFR", "HIS", "EUR")), names_to = "anc_score", values_to = "anc_score2") %>%  
  filter(anc == "afr" & grepl("AFR", anc_score) | anc == "eur" & grepl("EUR", anc_score) | anc == "amr" & grepl("HIS", anc_score))

#just continuous comparing avg of meta to ancestry-specific
b %>% 
mutate(diff = META - anc_score2, prop_diff = (META - anc_score2)/anc_score2) %>%  
group_by(method, anc) %>% summarize(mean(diff), mean(prop_diff)) 

In [None]:
a1 <- fread("dat_binary_unrelated_refactored_PCagesex_subpopPCs.txt") 
b1 <- a1 %>% filter(anc %in% c("afr", "amr","eur"), phecode != "Phe_250_1") %>% 
select(!c(cases, controls, score, score2, score1, R2_type, traits, names)) %>%  
filter(ss != "prscsx") %>% group_by(phecode, anc, method, ss) %>%  
summarize(mean(R2)) %>% pivot_wider(names_from = ss, values_from = `mean(R2)`)%>% 
pivot_longer(cols = contains(c("AFR", "HIS", "EUR")), names_to = "anc_score", values_to = "anc_score2") %>%  
  filter(anc == "afr" & grepl("AFR", anc_score) | anc == "eur" & grepl("EUR", anc_score) | anc == "amr" & grepl("HIS", anc_score))

#just binary comparimg avg of meta to ancestry-specific
b1 %>% 
mutate(diff = META - anc_score2, prop_diff = (META - anc_score2)/anc_score2) %>%  
group_by( method, anc) %>% summarize(mean(diff), mean(prop_diff)) 

In [None]:
#binary + continuous comparimg avg of meta to ancestry-specific
#in results
rbind(b, b1) %>% 
mutate(diff = META - anc_score2, prop_diff = (META - anc_score2)/anc_score2) %>%  
group_by( method, anc) %>% summarize(mean(diff), mean(prop_diff)) 

rbind(b, b1) %>% 
filter(phecode != "Phe_428_1", anc == "amr")%>% 
mutate(diff = META - anc_score2, prop_diff = (META - anc_score2)/anc_score2) %>%  
group_by( method, anc) %>% summarize(mean(diff), mean(prop_diff)) 

### Comparing Multi to Single: mean of meta v ancestry-specific: Using Coranova

In [None]:
#all three ancestry groups
A <- matrix(c( 1/3, 1/3, 1/3, -1, 0, 0, rep(0,6), rep(0,6),
               rep(0,6), 1/3, 1/3, 1/3, 0, -1, 0, rep(0,6),
                rep(0,6), rep(0,6), 1/3, 1/3, 1/3, 0, 0, -1), byrow = T, ncol = 18, nrow = 3)
#only. AMR and AFR
B <- matrix(c( 1/3, 1/3, 1/3, -1, 0, 0, rep(0,6),
               rep(0,6), 1/3, 1/3, 1/3, 0, -1, 0), byrow = T, ncol = 12, nrow = 2)

#only. EUR
C <- matrix(c( 1/3, 1/3, 1/3, 0, 0, -1), byrow = T, ncol = 6, nrow = 1)

E <- matrix(c( 1/3, 1/3, 1/3, -1), byrow = T, ncol = 4, nrow = 1)

A_bothmethods <-  matrix(c( 1/3, 1/3, 1/3, -1, 0, 0,  1/3, 1/3, 1/3, -1, 0, 0, rep(0,12), rep(0,12),
               rep(0,12), 1/3, 1/3, 1/3, 0, -1, 0, 1/3, 1/3, 1/3, 0, -1, 0, rep(0,12),
                rep(0,12), rep(0,12), 1/3, 1/3, 1/3, 0, 0, -1, 1/3, 1/3, 1/3, 0, 0, -1), byrow = T, ncol = 36, nrow = 3)

In [None]:
print("hemo")
#mean is sig better in AFR and AMR (p = 7.8e-05) , 
#ancestry is sig better in EUR with p 0.02
perform_alt_test(hemo_list[c("afr", "amr", "eur")], 
                 "mean", 
                 c(paste0("prscs_score", 1:6)),
                 A, "parametric", TRUE) #overall p 2e-05
perform_alt_test(hemo_list[c("afr", "amr")], 
                 "mean", c(paste0("prscs_score", 1:6)),
                 B, "parametric", TRUE) #7.8e-05
perform_alt_test(hemo_list[c("eur")], 
                 "mean", c(paste0("prscs_score", 1:3), "prscs_score6"),
                 E, "parametric", TRUE) #comparing mean to eur score #0.017

In [None]:
print("bmi")
#mean is sig better in AFR and AMR (p = 7.5e-179), 
#ancestry is sig better in EUR p 1.5e-54
perform_alt_test(bmi_list[c("afr", "amr", "eur")], 
                 "mean", c(paste0("prscs_score", 1:6)),
                 A, "parametric", TRUE) #overall p  7.5e-179
perform_alt_test(bmi_list[c("afr", "amr")], 
                 "mean", c(paste0("prscs_score", 1:6)),
                 B, "parametric", TRUE) #1.1e-127
perform_alt_test(bmi_list[c("eur")], 
                 "mean", c(paste0("prscs_score", 1:3), "prscs_score6"),
                 E, "parametric", TRUE) #1.5e-54

In [None]:
print("ldl")
#mean is sig better in AFR and AMR (p = 0.0003) , equiv in EUR (p = 0.95)
perform_alt_test(ldl_list[c("afr", "amr", "eur")], #0.001
                 "mean", c(paste0("prscs_score", 1:6)),A, "parametric", TRUE)
perform_alt_test(ldl_list[c("afr", "amr")],  #0.0003
                 "mean", c(paste0("prscs_score", 1:6)), B, "parametric", TRUE)
perform_alt_test(ldl_list[c("eur")],  #0.95
                 "mean", c(paste0("prscs_score", 1:3), "prscs_score6"), E, "parametric", TRUE)

In [None]:
print("sbp")
#mean is sig better in AFR and AMR (2.02e-21) , 
#ancestry is sig better in EUR (p = 8.2e-30)
perform_alt_test(sbp_list[c("afr", "amr", "eur")], 
                 "mean", 
                 c(paste0("prscs_score", 1:6)), A, "parametric", TRUE) #overall p 2.8e-48
perform_alt_test(sbp_list[c("afr", "amr")],  #2.02e-21
                 "mean",  
                 c(paste0("prscs_score", 1:6)), B, "parametric", TRUE)
perform_alt_test(sbp_list[c("eur")], #8.2e-30
                 "mean", 
                 c(paste0("prscs_score", 1:3), "prscs_score6"), E, "parametric", TRUE)

In [None]:
print("height")
#mean is sig better in AFR and AMR (p =7e-28) ,  
#ancestry is sig better in EUR (p = 0)
perform_alt_test(height_list[c("afr", "amr", "eur")], #0
                 "mean", c(paste0("prscs_score", 1:6)),A, "parametric", TRUE)

perform_alt_test(height_list[c("afr", "amr")], #7e-28
                 "mean", c(paste0("prscs_score", 1:6)),B, "parametric", TRUE)


perform_alt_test(height_list[c("eur")],  #0
                 "mean", 
                 c(paste0("prscs_score", 1:3), "prscs_score6"), E, "parametric", TRUE)

In [None]:
#meta LDp is significantly better than single for all traits
perform_alt_test(hemo_list[c("afr", "amr", "eur")], 
                 "mean", c(paste0("ldp_auto", 1:6)),A, "parametric", TRUE) #1.6e-06
perform_alt_test(height_list[c("afr", "amr", "eur")], 
                 "mean", c(paste0("ldp_auto", 1:6)),A, "parametric", TRUE) #4.7e-57
perform_alt_test(bmi_list[c("afr", "amr", "eur")], 
                 "mean", c(paste0("ldp_auto", 1:6)),A, "parametric", TRUE) #6.2e-96
perform_alt_test(sbp_list[c("afr", "amr", "eur")], 
                 "mean", c(paste0("ldp_auto", 1:6)),A, "parametric", TRUE) #7.5e-24
perform_alt_test(ldl_list[c("afr", "amr", "eur")], 
                 "mean", c(paste0("ldp_auto", 1:6)),A, "parametric", TRUE) #6.9e-10

