In [1]:
#Mendelian Randomization

#Description: Two-sample MR between the 18-SNP insulin resistance score and kidney disease
#Run analyses in the diabetics, non-diabetics, and in the overall sample. 

#install packages
#library("MendelianRandomization") #package could not be installed
library(utils)
library(psych)
library(rms)
library(naniar)
library(tidyverse)
library(table1)
library(readr)
library(stringr)
library(R.utils)
library(data.table)
library(dplyr)

“package ‘psych’ was built under R version 3.6.3”
Loading required package: Hmisc

Loading required package: lattice

“package ‘lattice’ was built under R version 3.6.3”
Loading required package: survival

Loading required package: Formula

Loading required package: ggplot2


Attaching package: ‘ggplot2’


The following objects are masked from ‘package:psych’:

    %+%, alpha



Attaching package: ‘Hmisc’


The following object is masked from ‘package:psych’:

    describe


The following objects are masked from ‘package:base’:

    format.pval, units


Loading required package: SparseM

“package ‘SparseM’ was built under R version 3.6.3”

Attaching package: ‘SparseM’


The following object is masked from ‘package:base’:

    backsolve


── [1mAttaching packages[22m ─────────────────────────────────────── tidyverse 1.3.1 ──

[32m✔[39m [34mtibble [39m 3.1.4     [32m✔[39m [34mdplyr  [39m 1.0.7
[32m✔[39m [34mtidyr  [39m 1.1.3     [32m✔[39m [34mstringr[39m 1.4.0
[32m✔[3

In [2]:
#PGS score (T2D, individual-level)
PGS_2IR <- read.table("~/jupyter/IRKD_SNP/UKBB_T2D_MVMR_GRS_REV_20OCT21.txt", header = TRUE, na.strings=c("",".","NA"))
#Just keep the id and the dosage info
PGS_2IR <- subset(PGS_2IR,select=-c(IID,GRS_RAW,GRS_WT)) #no quotes
names(PGS_2IR)

In [3]:
dim(PGS_2IR)

In [8]:
#2.Keep the dosage file and add the linker file 
#Add the linker file for the PGS
linker <- read.table('~/jupyter/UKBiobank_genoQC_allancestry_linker.txt', header = TRUE, na.strings=c("",".","NA"))
#Merge linker to PGS
PGS <- merge(linker,PGS_2IR,by="FID")
names(PGS)[names(PGS)=="FID_Salem"] <- "f.eid" #magic!
#Drop the variables
PGS <- subset(PGS,select=-c(IID,IID_Salem,FID))
names(PGS)

In [None]:
#3. Generate the beta coefficients for each of the kidney disease outcomes
#Code adapted from this program (3_Multivariate_Analysis-Restrict4-Primary-IR-PRS)

#Read the complete set (no imputation)
UKBB_AG2_m <- fread("~/jupyter/UKBB_AG2_12Jan21.txt", header = TRUE, na.strings=c("",".","NA")) %>% select(f.eid,T2D_status,ALBUMINERIA.0.0,
                                 ESKD.0.0,CKD.0.0,DN.0.0,ALL.0.0,NONESKD.0.0,DNCKD.0.0,
                                 CTRL_DNCKD.0.0,ACR.0.0,EGFR.0.0,PC1,PC2,PC3,PC4,PC5,PC6,PC7,PC8,PC9,PC10,SEX.0.0,
                                 IDEAL_DIET2.0.0,LIFESCORE,AGE.0.0, SES_TDI.0.0,BMI.0.0,EDUYEARS,SBP.0.0,HYP_POS1,STATIN,
                                 WHR.0.0,GRS_WT_LIRd2,GRS_WT_LIRt,GRS_WT_LIRq,GRS_WT_LIRf3,GRS_WT_IRd2,
                                 GRS_WT_IRt,GRS_WT_IRq,GRS_WT_IRf3,GRS_WT_IR53d2,GRS_WT_IR53t,
                                 GRS_WT_IR53q,GRS_WT_IR53f3,GRS_WT_T2DIRd2,GRS_WT_T2DIRt,
                                 GRS_WT_T2DIRq,GRS_WT_T2DIRf3,GRS_WT_L5E8IRd2,GRS_WT_L5E8IRt,
                                 GRS_WT_L5E8IRq,GRS_WT_L5E8IRf3,GRS_WT_L1E5IRd2,GRS_WT_L1E5IRt,
                                 GRS_WT_L1E5IRq,GRS_WT_L1E5IRf3)
UKBB_AG2=as.data.frame(UKBB_AG2_m)
dim(UKBB_AG2)
rm(UKBB_AG2_m)

#Dichotomize Outcomes for Logistic Regression
#1_CKD
UKBB_AG2$CKD_only.0.0 <- factor(ifelse(UKBB_AG2$CKD.0.0=="CKD controls","CKD controls",
                              ifelse(UKBB_AG2$CKD.0.0=="CKD","CKD",NA)),
                levels = c("CKD controls", "CKD"))
#Set the refernece
UKBB_AG2$CKD_only.0.0 <- relevel(UKBB_AG2$CKD_only.0.0, ref = "CKD controls")

#2_CKD Extreme
UKBB_AG2$CKD_ex.0.0 <- factor(ifelse(UKBB_AG2$CKD.0.0=="CKD controls","CKD controls",
                              ifelse(UKBB_AG2$CKD.0.0=="CKD extreme","CKD extreme",NA)),
                levels = c("CKD controls", "CKD extreme"))
#Set the refernece
UKBB_AG2$CKD_ex.0.0 <- relevel(UKBB_AG2$CKD_ex.0.0, ref = "CKD controls")

#3_Micro
UKBB_AG2$micro.0.0 <- factor(ifelse(UKBB_AG2$ALBUMINERIA.0.0=="micro","micro",
                              ifelse(UKBB_AG2$ALBUMINERIA.0.0=="normo","normo",NA)),
                levels = c("normo", "micro"))
#Set the reference
UKBB_AG2$micro.0.0 <- relevel(UKBB_AG2$micro.0.0, ref = "normo")

#4_Macro
UKBB_AG2$macro.0.0 <- factor(ifelse(UKBB_AG2$ALBUMINERIA.0.0=="macro","macro",
                              ifelse(UKBB_AG2$ALBUMINERIA.0.0=="normo","normo",NA)),
                levels = c("normo", "macro"))
#Set the reference
UKBB_AG2$macro.0.0 <- relevel(UKBB_AG2$macro.0.0, ref = "normo")

#5_Macro
UKBB_AG2$macro.0.0 <- factor(ifelse(UKBB_AG2$ALBUMINERIA.0.0=="macro","macro",
                              ifelse(UKBB_AG2$ALBUMINERIA.0.0=="normo","normo",NA)),
                levels = c("normo", "macro"))

#6_ESKD vs. Macro
UKBB_AG2$ESKD_macro.0.0 <- factor(ifelse(UKBB_AG2$ESKD.0.0=="yes","ESKD",
                              ifelse(UKBB_AG2$ALBUMINERIA.0.0=="macro","macro",NA)),
                levels = c("macro","ESKD"))

#7_DNCKD vs. Control DNCKD
UKBB_AG2$DNCKD2.0.0 <- factor(ifelse(UKBB_AG2$DNCKD.0.0=="yes","DNCKD",
                              ifelse(UKBB_AG2$CTRL_DNCKD.0.0=="yes","DNCKD Control",NA)),
                levels = c("DNCKD Control","DNCKD"))

#8_ESKD vs. Normo, Macro, Micro
UKBB_AG2$ESKD_Albu.0.0 <- factor(ifelse(UKBB_AG2$ESKD.0.0=="yes","ESKD",
                              ifelse(UKBB_AG2$ALBUMINERIA.0.0 %in% c("normo","macro","micro"),"albu",NA)),
                levels = c("albu","ESKD"))

#Set the reference
UKBB_AG2$macro.0.0 <- relevel(UKBB_AG2$macro.0.0, ref = "normo")

#merge the PGS file
UKBB_AG2 <- merge(UKBB_AG2,PGS,by='f.eid')
print('Dosage merged to regular file')
dim(UKBB_AG2)

#Initialize the model
  model_id <- 2  #model id
  i1 <- 4 #total models
  i2 <- 5 #t2d models
  i3 <- 6 #nd models
  i4 <- 'Model 1'
  #lrt
  i7 <- 7 #total models
  i8 <- 8 #t2d models
  i9 <- 9 #nd models 


#Previously evaluated non-linearity in prior section#
#Current section evalautes the continuous form:
coef_all5 <- data.frame()
coef_all5_t2d <- data.frame()
coef_all5_nd <- data.frame()

for (ii in names(PGS)[2:129]) { #CHANGE AS NEEDED (depends on the total number of SNPs, -1 for feid), 
    for(kk in c('Model1')){
          for (jj in c('relevel(as.factor(CKD_only.0.0),"CKD controls")',
#                       'relevel(as.factor(CKD_ex.0.0),"CKD controls")'
                        'relevel(as.factor(micro.0.0),"normo")',
                        'relevel(as.factor(macro.0.0),"normo")',
#                         'relevel(as.factor(ESKD.0.0),"no")',
                         'relevel(as.factor(DN.0.0),"no")','relevel(as.factor(ALL.0.0),"no")',
#                         'relevel(as.factor(ESKD_macro.0.0),"macro")',
                        'relevel(as.factor(DNCKD2.0.0),"DNCKD Control")'
#                         'relevel(as.factor(ESKD_Albu.0.0),"albu")'
                      )) {
              
        #Used across formulas
        RESULTS_CONT <- data.frame() #legacy code
        RESULTS_OR <- data.frame() #legacy code
        term <- ii   
              
        #Model 1 - Age, gender, PCI
        if(kk == "Model1"){
            
        fmla <- as.formula(paste0(jj," ~ ",term, "+ AGE.0.0 + SEX.0.0 + PC1 +
                                        PC2 + PC3 + PC4 + PC5 + PC6 + PC7 + PC8 + PC9 + PC10"), env = environment()) }
    
        M1 <- glm(fmla, data=UKBB_AG2, binomial(link="logit"))                                                                                
        M1_2 <-  M1 %>% summary()
        M1_3 <- anova(M1,test="LRT")
        
        #Model 1 - Sub-groups
        #Type 2 Diabetics
        M1_T2D <- UKBB_AG2 %>% filter(T2D_status == 1) %>% glm(formula=fmla,family=binomial(link="logit"))                                                                                
        M1_T2D2 <- M1_T2D  %>% summary()
        M1_T2D3 <- anova(M1_T2D ,test="LRT")
            
        #Non-Diabetes
        M1_ND <- UKBB_AG2 %>% filter(T2D_status == 0) %>% glm(formula=fmla, family=binomial(link="logit"))                                                                                
        M1_ND2 <- M1_ND  %>% summary()
        M1_ND3 <- anova(M1_ND ,test="LRT")
        
        TEMP<- list(model=kk, var=jj,var2=ii, total=M1_2,t2d=M1_T2D2,nd=M1_ND2,lrt_tot=M1_3,
                    lrt_t2d=M1_T2D3,lrt_nd=M1_ND3)
                     
        #Confidence Intervals
        #Entire Sample
        TABLE2 <- round(cbind(beta=coefficients(TEMP$total)[,1],se=coefficients(TEMP$total)[,2]))   
        #Diabetics
        TABLE2_T2D <- round(cbind(beta=coefficients(TEMP$t2d)[,1],se=coefficients(TEMP$t2d)[,2]))
        #Non-Diabetics
        TABLE2_ND <- round(cbind(beta=coefficients(TEMP$nd)[,1],se=coefficients(TEMP$nd)[,2]))
            
        #Summary Statistics
        OR_CI <- list(model=kk, var=jj,var2=ii,nd_ci=TABLE2_ND,t2d_ci=TABLE2_T2D,all_ci=TABLE2)
        RESULTS_OR <- c(RESULTS_OR,OR_CI)    
        #Combined results
        RESULTS_CONT <- c(RESULTS_CONT,TEMP)
        
        #Store the coefficients from the mendelian randomization
        #ENTIRE SAMPLE 
          #pull coefficients and convert to OR
          coef_all <- data.frame(round((coefficients(RESULTS_CONT[[i1]])),4)[2,1])
          id_model <- row.names(coefficients(RESULTS_CONT[[i1]]))[2]
          model_id2 <- RESULTS_CONT[[model_id]]
          rep_model <- rep(model_id2,dim(coef_all)[1])
          rep_adj <- rep(i4,dim(coef_all)[1])

          #pull coefficients and calculate 95% CI
          se_coef <- data.frame(round(coefficients(RESULTS_CONT[[i1]])[,2],4)[2])[1,1]
          #likelihood ratio
          lrt_total <- RESULTS_CONT[[i7]][2,5]
          lrt_total2 <- rep(as.character(lrt_total),dim(coef_all)[1])

          coef_all2 <- cbind(rep_model,id_model) #model outcome to model var
          coef_all2 <- cbind(coef_all2,coef_all) #coefficients
          coef_all2 <- cbind(coef_all2,rep_adj)
          coef_all2 <- cbind(coef_all2,se_coef) 
          coef_all2 <- cbind(coef_all2,lrt_total2)
          coef_all3_all <- coef_all2

        #reformat table
        #rename
        names(coef_all3_all) <- c("rep_model","id_model","OR","model_adj","se_coef","lrt")
        #combine HR and 95% CI
        coef_all3_all$combo <- paste0(coef_all3_all$OR,";",coef_all3_all$se_coef)
        coef_all3_all <- subset(coef_all3_all, select = -c(3,5,6))
        #high/low
        coef_all3_all$category <- coef_all3_all$id_model
        #substring
        coef_all3_all$sub <- substr(coef_all3_all$id_model,19,32)
        coef_all3_all$rep_model2 <- substr(coef_all3_all$rep_model,19,35)
        #key
        coef_all3_all$key <- paste0(coef_all3_all$sub,"-",coef_all3_all$rep_model2)
        #spread
        coef_all3_all <- subset(coef_all3_all, select = -c(1,2))
        coef_all4 <- spread(coef_all3_all,key=category,value=combo)

        #T2D Sample Score
        coef_all3 <- data.frame()
          #ENTIRE SAMPLE 
          #pull coefficients and convert to OR
          coef_all <- data.frame(round((coefficients(RESULTS_CONT[[i2]])),4)[2,1])
          id_model <- row.names(coefficients(RESULTS_CONT[[i2]]))[2]
          model_id2 <- RESULTS_CONT[[model_id]]
          rep_model <- rep(model_id2,dim(coef_all)[1])
          rep_adj <- rep(i4,dim(coef_all)[1])

          #pull coefficients and calculate 95% CI
          se_coef <- data.frame(round(coefficients(RESULTS_CONT[[i2]])[,2],4)[2])[1,1]
          #likelihood ratio
          lrt_total <- RESULTS_CONT[[i8]][2,5]
          lrt_total2 <- rep(as.character(lrt_total),dim(coef_all)[1])

          coef_all2 <- cbind(rep_model,id_model) #model outcome to model var
          coef_all2 <- cbind(coef_all2,coef_all) #coefficients
          coef_all2 <- cbind(coef_all2,rep_adj)
          coef_all2 <- cbind(coef_all2,se_coef) 
          coef_all2 <- cbind(coef_all2,lrt_total2)
          coef_all3 <- rbind(coef_all3,coef_all2)

        #reformat table
        #rename
        names(coef_all3) <- c("rep_model","id_model","OR","model_adj","se_coef","lrt")
        print(coef_all3$id_model)
        #combine HR and 95% CI
        coef_all3$combo <- paste0(coef_all3$OR,";",coef_all3$se_coef)
        coef_all3 <- subset(coef_all3, select = -c(3,5,6))
        #high/low
        coef_all3$category <- coef_all3$id_model
        #substring
        coef_all3$sub <- substr(coef_all3$id_model,19,32)
        coef_all3$rep_model2 <- substr(coef_all3$rep_model,19,35)
        #key
        coef_all3$key <- paste0(coef_all3$sub,"-",coef_all3$rep_model2)
        #spread
        coef_all3 <- subset(coef_all3, select = -c(1,2))
        coef_all4_t2d <- spread(coef_all3,key=category,value=combo)

        #Non-Diabetic Sample Score
        coef_all3 <- data.frame()

          #ENTIRE SAMPLE 
          #pull coefficients and convert to OR
          coef_all <- data.frame(round((coefficients(RESULTS_CONT[[i3]])),4)[2,1])
          id_model <- row.names(coefficients(RESULTS_CONT[[i3]]))[2]
          model_id2 <- RESULTS_CONT[[model_id]]
          rep_model <- rep(model_id2,dim(coef_all)[1])
          rep_adj <- rep(i4,dim(coef_all)[1])

          #pull coefficients and calculate 95% CI
          se_coef <- data.frame(round(coefficients(RESULTS_CONT[[i3]])[,2],4)[2])[1,1]
          #likelihood ratio
          lrt_total <- RESULTS_CONT[[i9]][2,5]
          lrt_total2 <- rep(as.character(lrt_total),dim(coef_all)[1])

          coef_all2 <- cbind(rep_model,id_model) #model outcome to model var
          coef_all2 <- cbind(coef_all2,coef_all) #coefficients
          coef_all2 <- cbind(coef_all2,rep_adj)
          coef_all2 <- cbind(coef_all2,se_coef) 
          coef_all2 <- cbind(coef_all2,lrt_total2)
          coef_all3 <- rbind(coef_all3,coef_all2)

        #reformat table
        #rename
        names(coef_all3) <- c("rep_model","id_model","OR","model_adj","se_coef","lrt")
        #combine HR and 95% CI
        coef_all3$combo <- paste0(coef_all3$OR,";",coef_all3$se_coef)
        coef_all3 <- subset(coef_all3, select = -c(3,5,6))
        #high/low
        coef_all3$category <- coef_all3$id_model
        #substring
        coef_all3$sub <- substr(coef_all3$id_model,19,32)
        coef_all3$rep_model2 <- substr(coef_all3$rep_model,19,35)
        #key
        coef_all3$key <- paste0(coef_all3$sub,"-",coef_all3$rep_model2)
        #spread
        coef_all3 <- subset(coef_all3, select = -c(1,2))
        coef_all4_nd <- spread(coef_all3,key=category,value=combo)      

        
        #concatenate all of the files at the end
        coef_all4$var <- colnames(coef_all4)[5] 
        colnames(coef_all4)[5] <- 'beta;se'
        coef_all4_t2d$var <- colnames(coef_all4_t2d)[5] 
        colnames(coef_all4_t2d)[5] <- 'beta;se'
        coef_all4_nd$var <- colnames(coef_all4_nd)[5] 
        colnames(coef_all4_nd)[5] <- 'beta;se'
              
        coef_all5 <- rbind(coef_all5,coef_all4)
        coef_all5_t2d <- rbind(coef_all5_t2d,coef_all4_t2d)
        coef_all5_nd <- rbind(coef_all5_nd,coef_all4_nd)
    
        #remove old dataframes
        rm(RESULTS_OR)
        rm(RESULTS_CONT)
    
       }
    }
}

In [32]:
#Export the Sheet
write.csv(coef_all5,'/cellar/users/agarduno/jupyter/Analysis/Mendelian/All_T2D_13Oct21.txt')
write.csv(coef_all5_t2d,'/cellar/users/agarduno/jupyter/Analysis/Mendelian/T2D_T2D_13Oct21.txt')
write.csv(coef_all5_nd,'/cellar/users/agarduno/jupyter/Analysis/Mendelian/ND_T2D_13Oct21.txt')