In [None]:
#to create 5000 participants of fam file
shuf -n 5000 target_qc_data.fam > rand.5000

In [None]:
chmod a+x ldak6.1.linux

In [None]:
#to calculate SNP-SNP correlation
#bash
./ldak6.1.linux --calc-cors cors \
  --bfile target_qc_data \
  --break-points berisa.txt \
  --keep rand.5000

In [None]:
library(data.table)

ss <- fread("asd_beta.txt")   # has CHR SNP BP A1 A2 INFO OR SE P beta

# add Z = beta / SE (and drop non-finite)
ss[, Z := beta / SE]
ss <- ss[is.finite(Z)]

ss[, N := 46351L]
fwrite(ss, "asd_beta_withz.txt", sep = "\t", quote = FALSE, na = "NA")

In [None]:
#to estimate effect sizes
./ldak6.1.linux --mega-prs bayesr --summary asd_beta_withz.txt \
--cors cors --power -0.25 --check-sums NO --max-threads 8

filter bayesr???
awk '
FNR==NR {
  ref[$2]=toupper($5)"|"toupper($6)
  ref_flip[$2]=toupper($6)"|"toupper($5)
  next
}
FNR==1 { print; next }
{
  test=toupper($2)"|"toupper($3)
  if (ref[$1]==test || ref_flip[$1]==test) print
}
' target_qc_data.bim bayesr.effects > bayesr.effects.filtered



In [None]:
# to create plink style co variate file
library(data.table)

x <- fread("covar_prs.csv", na.strings=c("", "NA"))
stopifnot("eid" %in% names(x))

# helpers to find columns regardless of '-' vs '.' separator
find <- function(rx) grep(rx, names(x), value = TRUE)

sex_col <- find("^31[.-]0[.-]0$")         # UKB sex
age_col <- find("^21022[.-]0[.-]0$")      # UKB age
pc_cols <- find("^22009[.-]0[.-](?:[1-9]|10)$")  # PCs 1..10

# order PCs numerically, rename to PC1..PC10
pc_num  <- as.integer(sub(".*[.-]0[.-]", "", pc_cols))
pc_cols <- pc_cols[order(pc_num)]
pc_names <- paste0("PC", sort(pc_num))

cov <- data.table(FID = x$eid, IID = x$eid)

if (length(sex_col)==1) {
  sex <- as.integer(x[[sex_col]])
  # optional recode: UKB 1=male, 0/2=female in some extracts; normalize to 0/1
  sex <- fifelse(sex %in% c(1,2), fifelse(sex==1, 1L, 0L), as.integer(sex))
  cov[, Sex := sex]
}
if (length(age_col)==1) cov[, Age := as.numeric(x[[age_col]])]

if (length(pc_cols)) {
  pcs <- as.data.table(x[, ..pc_cols])
  setnames(pcs, pc_names)
  # force numeric
  for (j in names(pcs)) set(pcs, j=j, value=as.numeric(pcs[[j]]))
  cov <- cbind(cov, pcs)
}

fwrite(cov, "ldak_covariates.cov", sep="\t", quote=FALSE, na="NA")

In [None]:
./ldak6.1.linux --linear linear_covar \
  --bfile target_qc_data \
  --pheno ukb_raw.pheno \
  --covar ldak_covariates.cov \
  --max-threads 8

In [None]:
cut -d' ' -f1,2 ldak_covariates.cov > ids_in_cov.txt

In [None]:
./ldak6.1.linux --calc-scores prs_chrono \
  --bfile target_qc_data \
  --scorefile bayesr.effects.filtered \
  --pheno ukb_raw.pheno \
  --covar ldak_covariates.cov \
  --coeffsfile linear_covar.coeff \
  --keep ids_in_cov.txt \
  --max-threads 8

In [None]:
#insomnia phenotype
library(data.table)

# 1) Read your CSV
inso<- fread("insomnia_participant.csv") 

setnames(inso, old = "1200-0.0", new = "insomnia_raw")

# Quick look at coding
table(inso$insomnia_raw, useNA = "ifany")

miss_codes <- c(-1,-3,-7)
inso[insomnia_raw %in% miss_codes, insomnia_raw := NA_integer_]

#3 usually to cases, 1 never and 2 sometimes to control 
inso[, insomnia_bin := fifelse(insomnia_raw == 3L, 1L,
                        fifelse(insomnia_raw %in% c(1L,2L), 0L, NA_integer_))]

 
 
 
 
inso_pheno <- inso[, .(FID = eid, IID = eid, insomnia = insomnia_bin)]

# Drop missing phenotype rows (optional but typical)
inso_pheno <- inso_pheno[!is.na(insomnia)]

# Save as tab-delimited text
fwrite(inso_pheno, "insomnia.pheno", sep = "\t", quote = FALSE, na = "NA")                       

In [None]:
./ldak6.1.linux --linear inso_covar \
  --bfile target_qc_data \
  --pheno insomnia.pheno \
  --covar ldak_covariates.cov \
  --max-threads 8

In [None]:
./ldak6.1.linux --calc-scores prs_inso \
  --bfile target_qc_data \
  --scorefile bayesr.effects.filtered \
  --pheno insomnia.pheno \
  --covar ldak_covariates.cov \
  --coeffsfile inso_covar.coeff \
  --keep ids_in_cov.txt \
  --max-threads 8

In [None]:
#snoring phenotype
library(data.table)

# 1) Read your CSV
snor<- fread("snoring_participant.csv") 

print(head(snor))

setnames(snor, old = "1210-0.0", new = "snoring_raw")
table(snor$snoring_raw, useNA = "always")

miss_snor_codes <- c(-1,-3)
snor[snoring_raw %in% miss_snor_codes, snoring_raw := NA_integer_]

#1 has snoring, 2 has no snoring
#new code 2->0, 1->1 
snor[, snoring_bin := fifelse(snoring_raw == 1L, 1L,
                       fifelse(snoring_raw == 2L, 0L, NA_integer_))]


snor_pheno <- snor[!is.na(snoring_bin),
              .(FID = eid, IID = eid, snoring = snoring_bin)]

table(snor_pheno$snoring, useNA = "always")

fwrite(snor_pheno, "snoring.pheno", sep = "\t", quote = FALSE, na = "NA")

In [None]:
./ldak6.1.linux --linear snor_covar \
  --bfile target_qc_data \
  --pheno snoring.pheno \
  --covar ldak_covariates.cov \
  --max-threads 8

In [None]:
./ldak6.1.linux --calc-scores prs_snor \
  --bfile target_qc_data \
  --scorefile bayesr.effects.filtered \
  --pheno snoring.pheno \
  --covar ldak_covariates.cov \
  --coeffsfile snor_covar.coeff \
  --keep ids_in_cov.txt \
  --max-threads 8

In [None]:
#sleep duration phenotype
library(data.table)

# 1) Read your CSV
dura<- fread("sleep_duration_pheno_participant.csv")

print(head(dura))

setnames(dura, old = "1160-0.0", new = "dura_raw")
table(dura$dura_raw, useNA = "always")

miss_dura_codes <- c(-1,-3)
dura[dura_raw %in% miss_dura_codes, dura_raw := NA_integer_]

library(dplyr)
dura<- dura %>%
  filter(!is.na(dura$dura_raw))


table(dura$dura_raw, useNA = "always")

dura$duration_cat <- ifelse(
  dura$dura_raw < 7, "Short", 
  ifelse(
    dura$dura_raw >= 7 & dura$dura_raw <= 9, "Optimal", 
    ifelse(
      dura$dura_raw > 9, "Long", 
      NA
    )
  )
)

table(dura$duration_cat, useNA = "always")

dura[, duration_cat_clean := tolower(trimws(duration_cat))]

dura[, duration_code := fcase(
  duration_cat_clean == "optimal", 0L,
  duration_cat_clean == "short",   1L,
  duration_cat_clean == "long",    2L,
  default = NA_integer_
)]

table(dura$duration_code, useNA = "always")

dura_pheno <- dura[!is.na(duration_code),
              .(FID = eid, IID = eid, sleep_duration = duration_code)]


 fwrite(dura_pheno, "duration.pheno", sep = "\t", quote = FALSE, na = "NA")             


In [None]:
./ldak6.1.linux --linear duration_covar \
  --bfile target_qc_data \
  --pheno duration.pheno \
  --covar ldak_covariates.cov \
  --max-threads 8

In [None]:
./ldak6.1.linux --calc-scores prs_duration \
  --bfile target_qc_data \
  --scorefile bayesr.effects.filtered \
  --pheno duration.pheno \
  --covar ldak_covariates.cov \
  --coeffsfile duration_covar.coeff \
  --keep ids_in_cov.txt \
  --max-threads 8

In [None]:
./ldak6.1.linux --linear chronotype_covar \
  --bfile target_qc_data \
  --pheno binomial_chrono.pheno \
  --covar ldak_covariates.cov \
  --max-threads 8

In [None]:
./ldak6.1.linux --calc-scores prs_chronotype \
  --bfile target_qc_data \
  --scorefile bayesr.effects.filtered \
  --pheno binomial_chrono.pheno \
  --covar ldak_covariates.cov \
  --coeffsfile chronotype_covar.coeff \
  --keep ids_in_cov.txt \
  --max-threads 8

In [None]:
./ldak6.1.linux --linear dura_bino_covar \
  --bfile target_qc_data \
  --pheno duration_bino.pheno \
  --covar ldak_covariates.cov \
  --max-threads 8

In [None]:
./ldak6.1.linux --calc-scores prs_dura_bino \
  --bfile target_qc_data \
  --scorefile bayesr.effects.filtered \
  --pheno duration_bino.pheno \
  --covar ldak_covariates.cov \
  --coeffsfile dura_bino_covar.coeff \
  --keep ids_in_cov.txt \
  --max-threads 8

In [None]:
./ldak6.1.linux --calc-scores prs_autism \
  --bfile target_qc_data \
  --scorefile bayesr.effects.filtered  
  --keep ids_in_cov.txt \
  --max-threads 8
