In [1]:
library(tidyverse)
library(data.table)
library(zoo)
# library(tableone)
library(survival)
library(lmerTest)
library(metafor)
library(optimx)
library(broom)

-- [1mAttaching packages[22m --------------------------------------- tidyverse 1.3.1 --

[32mv[39m [34mggplot2[39m 3.3.5     [32mv[39m [34mpurrr  [39m 0.3.4
[32mv[39m [34mtibble [39m 3.1.6     [32mv[39m [34mdplyr  [39m 1.0.8
[32mv[39m [34mtidyr  [39m 1.2.0     [32mv[39m [34mstringr[39m 1.4.0
[32mv[39m [34mreadr  [39m 2.1.2     [32mv[39m [34mforcats[39m 0.5.1

-- [1mConflicts[22m ------------------------------------------ tidyverse_conflicts() --
[31mx[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31mx[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()


Attaching package: 'data.table'


The following objects are masked from 'package:dplyr':

    between, first, last


The following object is masked from 'package:purrr':

    transpose



Attaching package: 'zoo'


The following objects are masked from 'package:base':

    as.Date, as.Date.numeric


Loading required package: lme4

Loading required packa

In [2]:
VARS = c("STUDY_NAME", "ID", "TSTART", "DOPA", "AGONIST",'Race', 'LED', 'LDD', 'DAD', 'WGTBL', 'HTBL', 'BMI',
         "Hyposmia", "Cognitive_Impairment","Wearing_Off", "Dyskinesia", "Depression", 
         "RLS","Constipation", "pRBD", "Daytime_Sleepiness", "Insomnia", "HY", 
         "UPDRS1", "UPDRS2","UPDRS3", "UPDRS4", "oldUPDRS","MDS_UPDRS", "MMSE", "MoCA", "SEADL",'UPSIT', 'SEADL70',
         "AGEatBL", "FEMALE", "YEARSEDUC", "BLDfDIAG","AAO","AD", 'FUY')
VARSb = c('Female', "Hyposmia","Cognitive_Impairment", "Wearing_Off", 
          "Dyskinesia", "Depression", "RLS","Constipation", "pRBD", 
          "Daytime_Sleepiness", "Insomnia", "SEADL70")
VARSu = c("UPDRS_scaled", "UPDRS1_scaled", "UPDRS2_scaled", "UPDRS3_scaled", "UPDRS4_scaled")
VARSc = c(VARSu, 'HY', "MMSE", "MoCA", "SEADL")
DENOVOs=c('PPMI', 'PreCEPT_PostCEPT', 'PARKWEST', 'DATATOP')
STUDYs = c(DENOVOs, 'PICNICS', 'NET_PD_LS1', 'DIGPD',  "PDBP", "HBS", "PARKFIT", "PROPARK", "UDALL_PENN")

d = lapply(STUDYs, function(x){fread(sprintf('/data/LNG/iwakih2/MaleFemale/PDcohorts/%s/standardized.csv', x)) %>% mutate(ID=as.character(ID))}) %>% 
  bind_rows() %>% 
  mutate(STUDY_NAME = ifelse(grepl('PDBP_', STUDY_NAME), 'PDBP', STUDY_NAME)) %>%
  mutate(Age = AGEatBL + TSTART/365.25) %>%
  mutate(DiseaseDuration = BLDfDIAG + TSTART/365.25)

# recode and rename
d = d %>% 
    # set BMI but BMI >50 are highly likely to be scaling mistakes
    mutate(BMI = WGTBL/HTBL/HTBL*100*100) %>%
    mutate(BMI = ifelse(BMI>55, NA, BMI),
           WGTBL = ifelse(BMI>55, NA, WGTBL),
           HTBL = ifelse(BMI>55, NA, HTBL))%>%
    # New hyposmia threshold using UPSIT norm
    mutate(Hyposmia = case_when(
        (FEMALE==1) & (Age<25) & (UPSIT<=35) ~ 1,
        (FEMALE==1) & (Age<50) & (UPSIT<=34) ~ 1,
        (FEMALE==1) & (Age<60) & (UPSIT<=32) ~ 1,
        (FEMALE==1) & (Age<65) & (UPSIT<=31) ~ 1,
        (FEMALE==1) & (Age<70) & (UPSIT<=26) ~ 1,
        (FEMALE==1) & (Age<75) & (UPSIT<=22) ~ 1,
        (FEMALE==1) & (Age<80) & (UPSIT<=17) ~ 1,
        (FEMALE==1) & (Age>=80) & (UPSIT<=15) ~ 1,
        (FEMALE==0) & (Age<40) & (UPSIT<=33) ~ 1,
        (FEMALE==0) & (Age<50) & (UPSIT<=32) ~ 1,
        (FEMALE==0) & (Age<55) & (UPSIT<=29) ~ 1,
        (FEMALE==0) & (Age<65) & (UPSIT<=26) ~ 1,
        (FEMALE==0) & (Age<70) & (UPSIT<=22) ~ 1,
        (FEMALE==0) & (Age<75) & (UPSIT<=19) ~ 1,
        (FEMALE==0) & (Age<80) & (UPSIT<=18) ~ 1,
        (FEMALE==0) & (Age<85) & (UPSIT<=12) ~ 1,
        (FEMALE==0) & (Age>=85) & (UPSIT<=10) ~ 1,
        !is.na(UPSIT) ~ 0)) %>%
#     mutate(Anosmia = (UPSIT<18)*1) %>% 
    rename(Wearing_Off = MOTORFLUX,
          Cognitive_Impairment = MCI,
          Dementia=DEMENTIA,
          MoCA = MOCA,
          Dyskinesia = DYSKINESIAS,
          Depression = DEPR,
          RLS = RL, 
          Constipation = CONST,
          pRBD = RBD,
          Daytime_Sleepiness = SLEEP,
          Insomnia = INS)

db = d %>% filter(TSTART==0) %>% filter(RECRUIT=='PD', DX=='PD') # Only PDs
db %>% with(table(STUDY_NAME, FUY>0))
db = db %>% filter(FUY>0)  # Filter out FUY==0

# Keep the FU>0, and Standardization of UPDRS
d = semi_join(d, db, by = c('STUDY_NAME', 'ID')) # Only keep participants in db
t = db %>% group_by(STUDY_NAME) %>% 
  summarise_at(vars('UPDRS1', 'UPDRS2', 'UPDRS3', 'oldUPDRS', 'MDS_UPDRS'),
               list(~mean(., na.rm = T), ~sd(., na.rm=T))) %>% data.frame
d = left_join(d, t, by = 'STUDY_NAME') %>% 
  mutate(UPDRS1_scaled = ifelse(is.na(UPDRS1_sd), NA, (UPDRS1 - UPDRS1_mean)/UPDRS1_sd),
         UPDRS2_scaled = ifelse(is.na(UPDRS2_sd), NA, (UPDRS2 - UPDRS2_mean)/UPDRS2_sd),
         UPDRS3_scaled = ifelse(is.na(UPDRS3_sd), NA, (UPDRS3 - UPDRS3_mean)/UPDRS3_sd),
         UPDRS4_scaled = scale(UPDRS4),
         UPDRS_scaled = case_when(
           !is.na(MDS_UPDRS_sd) ~ (MDS_UPDRS - MDS_UPDRS_mean)/MDS_UPDRS_sd,
           !is.na(oldUPDRS_sd) ~ (oldUPDRS - oldUPDRS_mean)/oldUPDRS_sd)) %>% 
  select(all_of(unique(c(VARS, VARSu, 'Age', 'DiseaseDuration'))))

# Reset the data at baseline
db = d %>% filter(TSTART==0)

                  
STUDY_NAME         FALSE TRUE
  DATATOP              4  796
  DIGPD               76  350
  HBS                 98  482
  NET_PD_LS1          36 1705
  PARKFIT            120  466
  PARKWEST             6  181
  PDBP               447  485
  PICNICS             11  122
  PPMI                52  408
  PROPARK              6  327
  PreCEPT_PostCEPT     0  390
  UDALL_PENN          19  233

In [3]:
head(d)

STUDY_NAME,ID,TSTART,DOPA,AGONIST,Race,LED,LDD,DAD,WGTBL,⋯,AAO,AD,FUY,UPDRS_scaled,UPDRS1_scaled,UPDRS2_scaled,UPDRS3_scaled,UPDRS4_scaled,Age,DiseaseDuration
<chr>,<chr>,<int>,<int>,<int>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
PPMI,3001,0,0,0,White,,,,74.2,⋯,63.83299,,8,-0.7353954,0.6026279,-0.9161895,-0.94449348,-0.7548255,64.74743,0.9144422
PPMI,3001,61,0,0,White,0.0,0.0,,74.2,⋯,63.83299,,8,-0.42631343,0.3582222,-0.9161895,-0.36727158,-0.7548255,64.91444,1.0814511
PPMI,3001,153,0,0,White,0.0,0.0,,74.2,⋯,63.83299,,8,0.03730953,0.3582222,-0.6751802,0.20995032,-0.7548255,65.16632,1.3333333
PPMI,3001,245,0,0,White,0.0,0.0,,74.2,⋯,63.83299,,8,-0.11723146,0.6026279,-0.6751802,-0.13638282,-0.7548255,65.41821,1.5852156
PPMI,3001,366,0,0,White,0.0,0.0,,74.2,⋯,63.83299,,8,0.26912101,1.580251,-0.6751802,-0.02093844,-0.7548255,65.74949,1.9164956
PPMI,3001,550,0,0,White,100.0,0.0,,74.2,⋯,63.83299,,8,0.73274396,0.8470337,-0.434171,0.9026166,-0.7548255,66.25325,2.4202601


In [4]:
# CORIELL
pca1 = fread('/data/LNG/iwakih2/dataset/CORIELL/pca10.txt') %>%
    separate(IID_IID, into=c('ID', '_'), remove=F) %>% 
    mutate(STUDY_NAME='NET_PD_LS1')

# SCOPA
pca2 = fread('/data/LNG/iwakih2/dataset/SCOPA/pca10.txt') %>%
    separate(IID_IID, into=c('ID', '_'), remove=F) %>% 
    mutate(STUDY_NAME='PROPARK')

# PRECEPT
pca3 = fread('/data/LNG/iwakih2/dataset/PRECEPT/pca10.txt') %>%
    separate(IID_IID, into=c('ID', '_'), remove=F) %>% 
    mutate(STUDY_NAME='PreCEPT_PostCEPT')

pca = bind_rows(pca1, pca2, pca3) %>% 
    rename(FID=FID_FID, IID = IID_IID)


In [5]:
STUDYs = c('NET_PD_LS1', 'PROPARK', 'PreCEPT_PostCEPT')
COVs = c('STUDY_NAME', 'ID', 'TSTART', 'DOPA', 'AGONIST', 'LED', 'LDD', 'DAD', 'BMI', 'AAO', 
         'DiseaseDuration', 'HY', 'FEMALE', 'Dyskinesia')
df = d %>% 
    inner_join(., pca, by = c('STUDY_NAME', 'ID')) %>%
    select(all_of(COVs)) %>%
    filter(STUDY_NAME %in% STUDYs) %>%
    arrange(STUDY_NAME, ID, TSTART) %>%
    rename(PD_AAO = AAO, 
           DA = AGONIST,
           Disease_duration = DiseaseDuration) %>%
    mutate(Sex = if_else(FEMALE==1, 'Female', 'Male')) %>%
    select(-FEMALE)
df %>%
    group_by(STUDY_NAME) %>%
    slice(1:3)

STUDY_NAME,ID,TSTART,DOPA,DA,LED,LDD,DAD,BMI,PD_AAO,Disease_duration,HY,Dyskinesia,Sex
<chr>,<chr>,<int>,<int>,<int>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<int>,<chr>
NET_PD_LS1,10098,0,0,1,300.0,0.0,300.0,24.53287,66.6,2.4,3,0.0,Male
NET_PD_LS1,10098,85,0,1,300.0,0.0,300.0,24.53287,66.6,2.6327173,3,0.0,Male
NET_PD_LS1,10098,360,0,1,320.0,0.0,320.0,24.53287,66.6,3.3856263,3,0.0,Male
PROPARK,1,0,1,1,682.0,,,,71.0,10.8,3,1.0,Female
PROPARK,1,530,1,1,607.0,,,,71.0,12.2510609,3,1.0,Female
PROPARK,1,1014,1,0,498.75,,,,71.0,13.5761807,5,1.0,Female
PreCEPT_PostCEPT,1003,0,0,0,,,,,69.99863,0.1971253,1,,Male
PreCEPT_PostCEPT,1003,26,0,0,,,,,69.99863,0.2683094,1,,Male
PreCEPT_PostCEPT,1003,55,0,0,,,,,69.99863,0.3477071,1,,Male


In [6]:
dfb = df %>% filter(TSTART==0)
dfb %>% with(table(STUDY_NAME, Dyskinesia))

                  Dyskinesia
STUDY_NAME           0   1
  NET_PD_LS1       332   5
  PROPARK          206  77
  PreCEPT_PostCEPT   0   0

In [None]:
# BL logistic analysis is only for propark
dft = dfb %>% filter(STUDY_NAME=='PROPARK') %>%
    inner_join(., pca, by = c('STUDY_NAME', 'ID')) %>%
    select(IID, Dyskinesia, PD_AAO, Sex, HY, Disease_duration, PC1, PC2, PC3, STUDY_NAME) %>%
    filter(complete.cases(.)) %>%
    mutate(Dyskinesia=Dyskinesia+1)
dft %>% fwrite('data/SCOPA_ALL.txt', sep='\t')
dft %>% filter(Sex=='Female') %>% fwrite('data/SCOPA_FEMALE.txt', sep='\t')
dft %>% filter(Sex=='Male') %>% fwrite('data/SCOPA_MALE.txt', sep='\t')

In [8]:
# Never vs Ever analysis
df_dysky = df %>% filter(Dyskinesia==1) %>% distinct(STUDY_NAME, ID, .keep_all=T)
df_dyskn = df %>% filter(Dyskinesia==0) %>% distinct(STUDY_NAME, ID, .keep_all=T)
df_dysk = bind_rows(df_dysky, df_dyskn) %>% distinct(STUDY_NAME, ID, .keep_all=T) %>%
    arrange(STUDY_NAME, ID, TSTART)
df_dysk %>%  with(table(STUDY_NAME, Dyskinesia))
dftt = inner_join(df_dysk, pca, by = c('STUDY_NAME', 'ID')) %>%
    select(IID, Dyskinesia, PD_AAO, Sex, HY, Disease_duration, PC1, PC2, PC3, STUDY_NAME) %>%
    filter(complete.cases(.)) %>%
    mutate(Dyskinesia=Dyskinesia+1)

dft = dftt %>% filter(STUDY_NAME=='PROPARK')
dft %>% fwrite('data/SCOPA_ALL_en.txt .txt', sep='\t')
dft %>% filter(Sex=='Female') %>% fwrite('data/SCOPA_FEMALE_en.txt', sep='\t')
dft %>% filter(Sex=='Male') %>% fwrite('data/SCOPA_MALE_en.txt', sep='\t')

dft = dftt %>% filter(STUDY_NAME=='NET_PD_LS1')
dft %>% fwrite('data/CORIELL_ALL_en.txt', sep='\t')
dft %>% filter(Sex=='Female') %>% fwrite('data/CORIELL_FEMALE_en.txt', sep='\t')
dft %>% filter(Sex=='Male') %>% fwrite('data/CORIELL_MALE_en.txt', sep='\t')

dft = dftt %>% filter(STUDY_NAME=='PreCEPT_PostCEPT')
dft %>% fwrite('data/PRECEPT_ALL_en.txt', sep='\t')
dft %>% filter(Sex=='Female') %>% fwrite('data/PRECEPT_FEMALE_en.txt', sep='\t')
dft %>% filter(Sex=='Male') %>% fwrite('data/PRECEPT_MALE_en.txt', sep='\t')

                  Dyskinesia
STUDY_NAME           0   1
  NET_PD_LS1       221 117
  PROPARK          109 177
  PreCEPT_PostCEPT 181 137

In [15]:
# linear regression for those with dyskinesia
df %>% filter(Dyskinesia==1) %>% distinct(STUDY_NAME, ID, .keep_all=T) %>% with(table(STUDY_NAME, TSTART==0))

                  
STUDY_NAME         FALSE TRUE
  NET_PD_LS1         112    5
  PROPARK            100   77
  PreCEPT_PostCEPT   137    0

In [16]:
library(haven)

In [None]:
# read_sav('data/SCOPA-data 6 years complete-wide_dyskinesia.sav') %>% summary

In [None]:
# remove those with dyskinesia at the baseline
dfl = df %>% filter(Dyskinesia==1) %>% distinct(STUDY_NAME, ID, .keep_all=T) %>% filter(TSTART!=0) %>%
    inner_join(., pca, by = c('STUDY_NAME', 'ID')) %>%
    select(IID, Dyskinesia, PD_AAO, Sex, HY, Disease_duration, PC1, PC2, PC3, STUDY_NAME)
# SCOPA/PROPARK
dflt = dfl %>% filter(STUDY_NAME=='PROPARK') %>% filter(complete.cases(.))
dflt %>% fwrite('data/SCOPA_ALL.lin.txt', sep='\t')
dflt %>% filter(Sex=='Female') %>% fwrite('data/SCOPA_FEMALE.lin.txt', sep='\t')
dflt %>% filter(Sex=='Male') %>% fwrite('data/SCOPA_MALE.lin.txt', sep='\t')
# CORIELL/NET_PD_LS1
dflt = dfl %>% filter(STUDY_NAME=='NET_PD_LS1') %>% filter(complete.cases(.))
dflt %>% fwrite('data/CORIELL_ALL.lin.txt', sep='\t')
dflt %>% filter(Sex=='Female') %>% fwrite('data/CORIELL_FEMALE.lin.txt', sep='\t')
dflt %>% filter(Sex=='Male') %>% fwrite('data/CORIELL_MALE.lin.txt', sep='\t')
# PreCEPT/PostCEPT
dflt = dfl %>% filter(STUDY_NAME=='PreCEPT_PostCEPT') %>% filter(complete.cases(.))
dflt %>% fwrite('data/PRECEPT_ALL.lin.txt', sep='\t')
dflt %>% filter(Sex=='Female') %>% fwrite('data/PRECEPT_FEMALE.lin.txt', sep='\t')
dflt %>% filter(Sex=='Male') %>% fwrite('data/PRECEPT_MALE.lin.txt', sep='\t')

In [None]:
dflt %>% filter(STUDY_NAME=='NET_PD_LS1') %>% with(hist(TSTART))
dfs %>% filter(TSTART!=0) %>% filter(STUDY_NAME=='PROPARK') %>% with(hist(TSTART))
dfs %>% filter(TSTART!=0) %>% filter(STUDY_NAME=='PreCEPT_PostCEPT') %>% with(hist(TSTART))

In [None]:
# All 3 cohorts are ok for linear reg
removeID = df %>% filter(Dyskinesia==1, TSTART==0) %>% distinct(STUDY_NAME, ID)
print(dim(removeID))
dfs = anti_join(df, removeID, by = c('STUDY_NAME', 'ID')) %>%
    inner_join(., pca, by = c('STUDY_NAME', 'ID')) %>%
    select(IID, Dyskinesia, PD_AAO, Sex, HY, Disease_duration, PC1, PC2, PC3, TSTART, STUDY_NAME) 
dfs %>% with(table(STUDY_NAME, Dyskinesia))

In [None]:
dfs1 = dfs %>% filter(Dyskinesia==1) %>% distinct(STUDY_NAME, IID, .keep_all=T)
dfs0 = dfs %>% arrange(STUDY_NAME, IID, desc(TSTART)) %>% filter(Dyskinesia==0) %>% distinct(STUDY_NAME, IID, .keep_all=T)
dfsgo = bind_rows(dfs1, dfs0) %>% distinct(STUDY_NAME, IID, .keep_all=T) %>% arrange(STUDY_NAME, IID)
dfsgo %>% with(table(STUDY_NAME, Dyskinesia))

In [None]:
dfsgo %>% head

In [None]:
# SCOPA/PROPARK
dfst = dfsgo %>% filter(STUDY_NAME=='PROPARK') %>% filter(complete.cases(.))
dfst %>% fwrite('data/SCOPA_ALL.surv.txt', sep='\t')
dfst %>% filter(Sex=='Female') %>% fwrite('data/SCOPA_FEMALE.surv.txt', sep='\t')
dfst %>% filter(Sex=='Male') %>% fwrite('data/SCOPA_MALE.surv.txt', sep='\t')
# CORIELL/NET_PD_LS1
dfst = dfsgo %>% filter(STUDY_NAME=='NET_PD_LS1') %>% select(-HY) %>% filter(complete.cases(.))
dfst %>% fwrite('data/CORIELL_ALL.surv.txt', sep='\t')
dfst %>% filter(Sex=='Female') %>% fwrite('data/CORIELL_FEMALE.surv.txt', sep='\t')
dfst %>% filter(Sex=='Male') %>% fwrite('data/CORIELL_MALE.surv.txt', sep='\t')
# PreCEPT/PostCEPT
dfst = dfsgo %>% filter(STUDY_NAME=='PreCEPT_PostCEPT') %>% filter(complete.cases(.))
dfst %>% fwrite('data/PRECEPT_ALL.surv.txt', sep='\t')
dfst %>% filter(Sex=='Female') %>% fwrite('data/PRECEPT_FEMALE.surv.txt', sep='\t')
dfst %>% filter(Sex=='Male') %>% fwrite('data/PRECEPT_MALE.surv.txt', sep='\t')

In [None]:
SNPset 

In [None]:
# args <- commandArgs(trailingOnly = TRUE)
args='NOADJ;Dyskinesia;Sex+PC1+PC2+PC3;/data/LNG/iwakih2/dataset/PRECEPT/maf01rsq3_20Kcut/cut11.0.txt.gz;data/PRECEPT_ALL.surv.txt;/data//CARD/projects/dysk_prog/surv/PRECEPT'
t = strsplit(args, ";")[[1]]
MODEL=t[1];OUTCOME=t[2];COVPC=t[3];SLICE=t[4];PHENO=t[5];OUTPUT=t[6]
library(tidyr);library(dplyr);library(data.table);library(survival)
COVs = strsplit(COVPC, "\\+")[[1]]
data = fread(PHENO) 
SNPset = fread(cmd = paste('gzcat -f', SLICE)) # %>% .[,1:10]
SNPs = names(SNPset)[-c(1:2)] # IID, DOSE
cohort = data %>% mutate(Sex = if_else(Sex=='Male', 0, 1)) %>% arrange(IID, TSTART) %>% data.frame()
cohort[COVs] = as.data.frame(scale(cohort[COVs]))
cohort_snp = inner_join(cohort, SNPset, by = "IID")
cohort_snp$SurvObj1 = with(cohort_snp, Surv(Disease_duration, Dyskinesia))
# ANALYSIS
test.listfunc = function(x){
  # Models
  MODEL = paste("SurvObj1~" , "`", SNPs[x], "` + ", COVPC, sep = "")
  testCox = try(coxph(eval(parse(text = MODEL)), data = cohort_snp),silent = T)
  if(class(testCox)[1]=="try-error"){
    sumstat=c(SNPs[x], "NoConverge", rep(NA,4))
  }else{
    temp= summary(testCox)$coefficients
    if(grep(substr(SNPs[x],1,3), rownames(temp)) %>% length == 0){ # In this case, SNP is dropeed from the model
      sumstat=c(SNPs[x], "NoVforSNP", rep(NA, 4))
    }else{
      RES = temp[1,]
      EVENT_OBS = paste(testCox$nevent, testCox$n, sep="_")
      s = cohort_snp[,c("IID", SNPs[x])] %>% distinct(IID, .keep_all = T) %>% filter(!is.na(SNPs[x]))
      sumstat <- c(SNPs[x], EVENT_OBS, as.numeric(RES[4]), RES[1], RES[3], RES[5], nrow(s), mean(s[,SNPs[x]])/2)
    }
  }
  return(sumstat)
}

temp = lapply(1:length(SNPs), test.listfunc)
temp2 = do.call(rbind, temp) %>% data.frame%>%filter(complete.cases(.))
names(temp2)=c("POS_A2_A1", "EVENT_OBS", "Tvalue", "BETA", "SE", "P", "N", "ALT_Frq") 
temp3 = temp2 %>% separate(POS_A2_A1, c("SNP", "A2", "A1"),sep="_")
dir.create(OUTPUT, recursive = T, showWarnings = F)
FILENAME = strsplit(SLICE, "cut/")[[1]][2] %>% sub("txt.gz", paste(OUTCOME,"cox.txt", sep='.'), .)
FILENAME2 = sub('_', '.', basename(PHENO)) %>% sub('surv.txt', paste(MODEL, FILENAME, sep='.'), .)
write.table(temp3, paste(OUTPUT, FILENAME2, sep="/"), row.names = F, quote = F, sep = "\t")

In [None]:

require("qqman");require("data.table");require("tidyr");require("dplyr")
# t = commandArgs(trailingOnly = TRUE)
t = c('/data/CARD/projects/dysk_prog/RES/PRECEPT.ALL.ADJ.Dyskinesia.cox.txt',
     'lambdas.txt')
FILE=t[1]
LAMBDA_OUTPUT=t[2]
NAME=basename(FILE)
MH = fread(FILE) 
if('ID' %in% names(MH)){
    MH = MH %>% rename(SNP=ID)
}
MH = MH %>% separate(SNP, c("CHR", "BP"), remove = F) %>%
  mutate_at(vars("CHR", "BP", "P", "ALT_Frq"), as.numeric) %>% 
  filter(ALT_Frq>0.05 & ALT_Frq<0.95)
# lambda
chisq <- qchisq(MH$P,1, lower.tail = F)
lambda = median(chisq)/qchisq(0.5,1)
print(lambda)
newchisq=chisq/lambda
MH$newp=pchisq(newchisq,1, lower.tail = F)
write(paste(NAME, lambda, sep=","), LAMBDA_OUTPUT, append = T)
# # QQ plot
png(paste(NAME, "QQ.png", sep="_"))
qq(MH$P, main = TITLE)
text(4, 1, sprintf("lambda = %.3f", lambda))
text(4, 2, paste("N of variants =", nrow(MH)))
dev.off()
MH plot if lambda is not too large
if(lambda < 2){
    MH = MH %>% filter(P<0.05)
    maxlogP=-log10(min(MH$newp))
    png(paste(NAME,"MH_Adj.png", sep="_"), width=2000, height=1000, pointsize=18)
    manhattan(MH, col = c("blue4", "orange3"), p = "newp", main=NAME, cex.axis = 0.6, annotatePval = 0.00001, ylim=c(0, max(10, maxlogP)))
    dev.off()
}
