# Parse REGENIE outputs

All `.regenie` output files from `03_Run_REGENIE.ipynb` are combined here:

In [None]:
# Load libraries
library(data.table)
library(dplyr)
library(ggplot2)

In [None]:
## Filepath to all zipped .regenie files
dir_path <- "./REGENIE_results_EUR/"

# List all .gz files in the directory
gz_files <- list.files(path = dir_path, pattern = "\\.gz$", full.names = TRUE)

# Read all .gz files into a list of data.tables
data_list <- lapply(gz_files, fread)

# Combine all data.tables into one
combined_data <- rbindlist(data_list, use.names = TRUE, fill = TRUE)

rm(data_list)

# Clean up dataframe
table(combined_data$TEST) # all "ADD"
table(combined_data$EXTRA) # NA
table(combined_data$INFO) # all 1

combined_data <- combined_data %>%
  dplyr::select(-INFO, -TEST, -EXTRA)

# Add ORs
combined_data$OR <- exp(combined_data$BETA)

# How many variants are < 5e-8?
# Convert LOG10P to p value: LOG10P = -log10(p-value) -> 10^(-LOG10P) = p-value
combined_data$p_val <- 10^(-combined_data$LOG10P)

# 29838 significant variants
table(combined_data$p_val < 5e-8, combined_data$CHROM)

fwrite(combined_data, "AOU_REGENIE_combined_results.csv.gz")