In [1]:
#Import the Data

#Read the complete set (no imputation)
#Effect Modification by Lifestyle Score
library(ggpubr)
library(dplyr)
library(mice)
library(miceadds)
library(lme4)
library(ggplot2)
library(rms)
library(mgcv)
library(tidymv)
library("ggthemes")
library(broom)
library(erer)
library(stringr)
library(tidyverse)
library(table1)

UKBB_AG2 <- read.delim("~/jupyter/UKBB_AG2_07NOV20.txt", header = TRUE, na.strings=c("",".","NA")) %>% select(f.eid,T2D_status,GRS_WT_LIR,GRS_WT_IR,GRS_RAW_T2DIR,
                                 GRS_RAW_T2DIR2d2,GRS_WT_IR2d2,GRS_RAW_LIR2d2,GRS_WT_L5E8IR,GRS_RAW_LIR,GRS_RAW_IR,
                                 GRS_WT_L5E8IRd,GRS_WT_L5E8IR2d2,GRS_WT_L1E5IR,GRS_WT_L1E5IRd,GRS_WT_L1E5IR2d2,ALBUMINERIA.0.0,
                                 GRS_RAW_T2DIRd,GRS_RAW_IR2d,GRS_WT_IR2d,GRS_RAW_LIR2d,GRS_WT_T2DIRd, 
                                 ESKD.0.0,CKD.0.0,DN.0.0,ALL.0.0,NONESKD.0.0,DNCKD.0.0,
                                 CTRL_DNCKD.0.0,ACR.0.0,EGFR.0.0,PC1,PC2,PC3,PC4,PC5,PC6,PC7,PC8,PC9,PC10,SEX.0.0,
                                        IDEAL_DIET2.0.0,LIFESCORE,AGE.0.0,
                                        SES_TDI.0.0,BMI.0.0,EDUYEARS,SBP.0.0)

#Dichotomize Outcomes for Logistic Regression
#1_CKD
UKBB_AG2$CKD_only.0.0 <- factor(ifelse(UKBB_AG2$CKD.0.0=="CKD controls","CKD controls",
                              ifelse(UKBB_AG2$CKD.0.0=="CKD","CKD",NA)),
                levels = c("CKD controls", "CKD"))
#Set the refernece
UKBB_AG2$CKD_only.0.0 <- relevel(UKBB_AG2$CKD_only.0.0, ref = "CKD controls")

#2_CKD Extreme
UKBB_AG2$CKD_ex.0.0 <- factor(ifelse(UKBB_AG2$CKD.0.0=="CKD controls","CKD controls",
                              ifelse(UKBB_AG2$CKD.0.0=="CKD extreme","CKD extreme",NA)),
                levels = c("CKD controls", "CKD extreme"))
#Set the refernece
UKBB_AG2$CKD_ex.0.0 <- relevel(UKBB_AG2$CKD_ex.0.0, ref = "CKD controls")

#3_Micro
UKBB_AG2$micro.0.0 <- factor(ifelse(UKBB_AG2$ALBUMINERIA.0.0=="micro","micro",
                              ifelse(UKBB_AG2$ALBUMINERIA.0.0=="normo","normo",NA)),
                levels = c("normo", "micro"))
#Set the reference
UKBB_AG2$micro.0.0 <- relevel(UKBB_AG2$micro.0.0, ref = "normo")

#4_Macro
UKBB_AG2$macro.0.0 <- factor(ifelse(UKBB_AG2$ALBUMINERIA.0.0=="macro","macro",
                              ifelse(UKBB_AG2$ALBUMINERIA.0.0=="normo","normo",NA)),
                levels = c("normo", "macro"))
#Set the reference
UKBB_AG2$macro.0.0 <- relevel(UKBB_AG2$macro.0.0, ref = "normo")

#5_Macro
UKBB_AG2$macro.0.0 <- factor(ifelse(UKBB_AG2$ALBUMINERIA.0.0=="macro","macro",
                              ifelse(UKBB_AG2$ALBUMINERIA.0.0=="normo","normo",NA)),
                levels = c("normo", "macro"))

#6_ESKD vs. Macro
UKBB_AG2$ESKD_macro.0.0 <- factor(ifelse(UKBB_AG2$ESKD.0.0=="yes","ESKD",
                              ifelse(UKBB_AG2$ALBUMINERIA.0.0=="macro","macro",NA)),
                levels = c("macro","ESKD"))

#7_DNCKD vs. Control DNCKD
UKBB_AG2$DNCKD2.0.0 <- factor(ifelse(UKBB_AG2$DNCKD.0.0=="yes","DNCKD",
                              ifelse(UKBB_AG2$CTRL_DNCKD.0.0=="yes","DNCKD Control",NA)),
                levels = c("DNCKD Control","DNCKD"))

#8_ESKD vs. Normo, Macro, Micro
UKBB_AG2$ESKD_Albu.0.0 <- factor(ifelse(UKBB_AG2$ESKD.0.0=="yes","ESKD",
                              ifelse(UKBB_AG2$ALBUMINERIA.0.0 %in% c("normo","macro","micro"),"albu",NA)),
                levels = c("albu","ESKD"))

#Set the reference
UKBB_AG2$macro.0.0 <- relevel(UKBB_AG2$macro.0.0, ref = "normo")

#Summarize Counts of Disease Outcomes
table(UKBB_AG2$CKD_only.0.0) #1
table(UKBB_AG2$CKD_ex.0.0) #2
table(UKBB_AG2$micro.0.0) #3
table(UKBB_AG2$macro.0.0) #4
table(UKBB_AG2$ESKD.0.0) #5
table(UKBB_AG2$DN.0.0) #6
table(UKBB_AG2$ALL.0.0) #7
table(UKBB_AG2$ESKD.0.0) #8
table(UKBB_AG2$ESKD_macro.0.0) #8
table(UKBB_AG2$ESKD_Albu.0.0) #9
table(UKBB_AG2$DNCKD2.0.0) #10

Loading required package: ggplot2


Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union



Attaching package: ‘mice’


The following objects are masked from ‘package:base’:

    cbind, rbind


* miceadds 3.10-28 (2020-07-29 21:56:24)

Loading required package: Matrix

Registered S3 methods overwritten by 'lme4':
  method                          from
  cooks.distance.influence.merMod car 
  influence.merMod                car 
  dfbeta.influence.merMod         car 
  dfbetas.influence.merMod        car 

Loading required package: Hmisc

Loading required package: lattice

Loading required package: survival

Loading required package: Formula


Attaching package: ‘Hmisc’


The following objects are masked from ‘package:dplyr’:

    src, summarize


The following objects are masked from ‘package:base’:

    format.pval, units


Loading required p


CKD controls          CKD 
      349669         6108 


CKD controls  CKD extreme 
      349669          984 


 normo  micro 
348496  14070 


 normo  macro 
348496   1120 


    no    yes 
356332    447 


    no    yes 
345597   1469 


    no    yes 
332345  15439 


    no    yes 
356332    447 


macro  ESKD 
  963   447 


  albu   ESKD 
346560    447 


DNCKD Control         DNCKD 
       326513           645 

In [7]:
#Cross-Tabulation
#Association between PGS and Diabetes

x <- table(UKBB_AG2$ALL.0.0,UKBB_AG2$T2D_status) #7
x2 <- rbind(x,table(UKBB_AG2$CKD_ex.0.0,UKBB_AG2$T2D_status)) #2
x2<- rbind(x2,table(UKBB_AG2$CKD_only.0.0,UKBB_AG2$T2D_status)) #1
x2<- rbind(x2,table(UKBB_AG2$DN.0.0,UKBB_AG2$T2D_status)) #6
x2<- rbind(x2,table(UKBB_AG2$DNCKD2.0.0,UKBB_AG2$T2D_status)) #10) #6
x2<- rbind(x2,table(UKBB_AG2$ESKD_Albu.0.0,UKBB_AG2$T2D_status)) #10) #6
x2<- rbind(x2,table(UKBB_AG2$ESKD_macro.0.0,UKBB_AG2$T2D_status)) #10) #6
x2<- rbind(x2,table(UKBB_AG2$ESKD.0.0,UKBB_AG2$T2D_status)) #10) #6          
x2 <- rbind(x2,table(UKBB_AG2$macro.0.0,UKBB_AG2$T2D_status)) #3
x2 <- rbind(x2,table(UKBB_AG2$micro.0.0,UKBB_AG2$T2D_status)) #3
x2



Unnamed: 0,0,1
no,315327,17018
yes,12718,2721
CKD controls,330309,19360
CKD extreme,772,212
CKD controls,330309,19360
CKD,5348,760
no,326290,19307
yes,1158,311
DNCKD Control,310174,16339
DNCKD,525,120


In [14]:
#Summary of Insulin Resistance Scores by Diabetes
#### Table 1 ####

####Biomarkers####

#### Continuous Variables ####
continuousVars <- c('AGE.0.0','BMI.0.0','GRS_RAW_T2DIR','GRS_WT_IR','GRS_WT_L1E5IR','GRS_RAW_LIR')
#### Categorical Variables ####
catVars<- c('SEX.0.0','GRS_WT_T2DIR2d2','GRS_RAW_IR2d2','GRS_WT_L1E5IR2d2','GRS_RAW_LIR2d2')
labels <- list(variables=list(SEX.0.0="Gender",AGE.0.0="Age",
                              'BMI.0.0'="Body Mass Index",'GRS_RAW_T2DIR'='T2D PGS, Score',
                              'GRS_WT_IR'='IR 2013 PGS, Score','GRS_WT_L1E5IR'='IR 2019 PGS, Score',
                              'GRS_RAW_LIR'='Cluster IR, PGS','GRS_RAW_T2DIR2d2'='T2D PGS, Cat.',
                               'GRS_WT_IR2d2'='IR 2013 PGS, Cat.','GRS_WT_L1E5IR'='IR 2019 PGS, Cat.',
                               'GRS_RAW_LIR'='Cluster IR, PGS Cat.'), 
               groups=list("","Overall"))

#strata, total sample to start
#non-diabetic (LIR)
ndb_strata <- c(list(Total=UKBB_AG2[UKBB_AG2$T2D_status==0,]))
#diabetic (LIR)
db_strata <- c(list(Total=UKBB_AG2[UKBB_AG2$T2D_status==1,]))
#overall
all_strata <- c(list(Total=UKBB_AG2))

my.render.cont <- function(x) {
  with(stats.apply.rounding(stats.default(x), digits=2), c("","Mean (SD)"=sprintf("%s (&plusmn; %s)", MEAN, SD)))
}
my.render.cat <- function(x) {
  c("", sapply(stats.default(x), function(y) with(y,sprintf("%d (%0.0f)", FREQ, PCT))))
}

#Summary Table
Table_PGS <- table1(ndb_strata,labels=labels,groupspan=c(1),render.continuous=my.render.cont,render.categorical=my.render.cat)
Table_PGS
Table_PGS2 <- table1(db_strata,labels=labels,groupspan=c(1),render.continuous=my.render.cont,render.categorical=my.render.cat)
Table_PGS2  
Table_PGS3 <- table1(all_strata,labels=labels,groupspan=c(1),render.continuous=my.render.cont,render.categorical=my.render.cat)
Table_PGS3
