*NOTE: sumstats still in hg37*

### **Setup:**

In [1]:
.libPaths(c(.libPaths(), "/exports/eddie3_apps_local/apps/SL7/R/4.3.0/lib64/R/library"))
library("dplyr", warn.conflicts = F)
library("tidyverse")
library("stringr", warn.conflicts = F)
library("data.table", warn.conflicts = F) 
library("vroom", warn.conflicts = F)
library("ggplot2", warn.conflicts = F)

── [1mAttaching core tidyverse packages[22m ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 2.0.0 ──
[32m✔[39m [34mforcats  [39m 1.0.0     [32m✔[39m [34mreadr    [39m 2.1.5
[32m✔[39m [34mggplot2  [39m 3.5.1     [32m✔[39m [34mstringr  [39m 1.5.1
[32m✔[39m [34mlubridate[39m 1.9.3     [32m✔[39m [34mtibble   [39m 3.2.1
[32m✔[39m [34mpurrr    [39m 1.0.2     [32m✔[39m [34mtidyr    [39m 1.3.1
── [1mConflicts[22m ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()
[36mℹ[39m Use the conflicted package ([3m[34m<http://conflicted.r-lib.org/>[39m[23m) to force all

### **Change working directory:**

In [2]:
wd = "/exports/cmvm/eddie/smgphs/groups/Quantgen/Users/vasilis/PHD/"
setwd(wd)
getwd()

### **Load data:**

In [11]:
afr <-
  list.files(path="ukb.delirium.gwas/non_eur/output/afr", pattern = "ukb_afr_noqc_c.*_delirium.regenie", full.names=T) %>%
  map_df(~fread(.))

In [3]:
sas <-
  list.files(path="ukb.delirium.gwas/non_eur/output/sas", pattern = "ukb_sas_noqc_c.*_delirium.regenie", full.names=T) %>%
  map_df(~fread(.))

### QC:


In [13]:
afr_qc <- 
    afr %>% 
    # make pvalue 
    mutate(P = 10^(-1*LOG10P)) %>%
    # make maf
    mutate(MAF = ifelse(A1FREQ > 0.5, 1 - A1FREQ, A1FREQ)) %>%
    mutate(MAF_CASES = ifelse(A1FREQ_CASES > 0.5, 1 - A1FREQ_CASES, A1FREQ_CASES)) %>%
    mutate(MAF_CONTROLS = ifelse(A1FREQ_CONTROLS > 0.5, 1 - A1FREQ_CONTROLS, A1FREQ_CONTROLS)) %>%
    # make mac 
    mutate(MAC = 2*N*MAF) %>% 
    mutate(MAC_CASES = 2*N_CASES*MAF_CASES) %>%
    mutate(MAC_CONTROLS = 2*N_CONTROLS*MAF_CONTROLS) %>% 
    # filter MAC>=5 on cases and controls
    filter(MAC_CASES >= 5 & MAC_CONTROLS >= 5) %>%
    # filter INFO >= 0.5 
    filter(INFO >= 0.5) %>%
    # select
    select(CHROM, GENPOS, ID, ALLELE0, ALLELE1, A1FREQ,A1FREQ_CASES, A1FREQ_CONTROLS, MAF, MAC, MAF_CASES, MAC_CASES, MAF_CONTROLS, MAC_CONTROLS, BETA, SE, INFO, P)

In [4]:
sas_qc <- 
    sas %>% 
    # make pvalue 
    mutate(P = 10^(-1*LOG10P)) %>%
    # make maf
    mutate(MAF = ifelse(A1FREQ > 0.5, 1 - A1FREQ, A1FREQ)) %>%
    mutate(MAF_CASES = ifelse(A1FREQ_CASES > 0.5, 1 - A1FREQ_CASES, A1FREQ_CASES)) %>%
    mutate(MAF_CONTROLS = ifelse(A1FREQ_CONTROLS > 0.5, 1 - A1FREQ_CONTROLS, A1FREQ_CONTROLS)) %>%
    # make mac 
    mutate(MAC = 2*N*MAF) %>% 
    mutate(MAC_CASES = 2*N_CASES*MAF_CASES) %>%
    mutate(MAC_CONTROLS = 2*N_CONTROLS*MAF_CONTROLS) %>% 
    # filter MAC>=5 on cases and controls
    filter(MAC_CASES >= 5 & MAC_CONTROLS >= 5) %>%
    # filter INFO >= 0.5 
    filter(INFO >= 0.5) %>%
    # select
    select(CHROM, GENPOS, ID, ALLELE0, ALLELE1, A1FREQ,A1FREQ_CASES, A1FREQ_CONTROLS, MAF, MAC, MAF_CASES, MAC_CASES, MAF_CONTROLS, MAC_CONTROLS, BETA, SE, INFO, P)

In [26]:
cat(paste0("Variants in UKB AFR ancestry GWAS: ", prettyNum(nrow(afr_qc),big.mark = ",")))
cat("\n")
cat(paste0("Variants in UKB SAS ancestry GWAS: ", prettyNum(nrow(sas_qc),big.mark = ",")))


Variants in UKB AFR ancestry GWAS: 13,622,268
Variants in UKB SAS ancestry GWAS: 8,750,516

In [27]:
rm(sas, afr)

“object 'afr' not found”


### Write output:


In [5]:
#write.table(afr_qc, file="ukb.delirium.gwas/non_eur/output/afr/ukb_afr_fgQC_all.regenie", quote=F, col.names=T, row.names=F)
write.table(sas_qc, file="ukb.delirium.gwas/non_eur/output/sas/ukb_sas_fgQC_all.regenie", quote=F, col.names=T, row.names=F)