## import packages

In [2]:
library(odbc)
library(dplyr)
library(lubridate)
library(tableone)
library(DescTools)
library(biostat3)
library(mgcv)
library(mitools)
library(stringr)
library(R.utils)

## import data

In [3]:
con <- dbConnect(odbc(), DSN = 'shadow', database = 'SCREAM')
dta <- dbGetQuery(con, 'select * from DEV.analysis_dataset_discontinuation', stringsAsFactors = F)

In [4]:
dta <- dta %>% 
    mutate(index_year = factor(index_year, levels = c(2007, 2008, 2009, 2010, 2011)), 
           duration_less_than_3 = ifelse(duration < 3, 1, 0), 
           potassium_cat = case_when(
               potassium_value > 5 & potassium_value <= 5.5 ~ 'mild', 
               potassium_value > 5.5 & potassium_value <= 6.0 ~ 'moderate', 
               potassium_value > 6.0 ~ 'severe'), 
           potassium_cat = factor(potassium_cat, levels = c('mild', 'moderate', 'severe')), 
           cov_egfr_cat = case_when(
               cov_egfr >= 90 ~ 'G1', 
               cov_egfr >= 60 & cov_egfr < 90 ~ 'G2', 
               cov_egfr >= 45 & cov_egfr < 60 ~ 'G3a', 
               cov_egfr >= 30 & cov_egfr < 45 ~ 'G3b', 
               cov_egfr < 30 ~ 'G4-5'), 
           cov_egfr_cat = factor(cov_egfr_cat, levels = c('G1', 'G2', 'G3a', 'G3b', 'G4-5')), 
           cov_acr_cat = case_when(
               cov_acr < 3 ~ 'normal to mildly increased', 
               cov_acr >= 3 & cov_acr <= 30 ~ 'moderately increased', 
               cov_acr > 30 ~ 'severely increased'), 
           cov_acr_cat = factor(cov_acr_cat, levels = c('normal to mildly increased', 'moderately increased', 'severely increased'))
          ) %>% group_by(lopnr) %>% 
    mutate(X_6 = ifelse(sum(treatment) >= 1, 'reinitiation', 'discontinuation'))

In [5]:
NA_table <- function(data = dta) {
    p_count <- sapply(dta, function(y) length(unique(dta[is.na(y), ]$lopnr)))
    p_count <- p_count[p_count > 0]
    na_count <- sapply(data, function(y) sum(length(which(is.na(y)))))
    na_count <- na_count[na_count > 0]
    na_count <- data.frame(na_count)
    na_count$covariate <- rownames(na_count)
    na_count <- cbind(p_count, na_count) %>% 
        mutate(na_percentage = round(na_count / nrow(data), 4) * 100) %>% dplyr::select(covariate, p_count, na_count, na_percentage)
    return(na_count)
}

## data summary

In [6]:
NA_table()

covariate,p_count,na_count,na_percentage
migr_dt,5656,151295,99.77
RASSi_date_after,1313,35367,23.32
RASSi_type_after,3782,39963,26.35
relative_strength_after,3782,39963,26.35
relative_strength_simple_after,3782,39963,26.35
cov_acr,3222,74444,49.09
cov_acr_cat,3222,74444,49.09


## baseline characteristics

In [7]:
xvars <- c('age', 'female', 'index_year', 'duration', 'duration_less_than_3', 
           'cov_diabetes', 'cov_hypertension', 
           'cov_MI', 'cov_CHF', 'cov_cerebrovascular_disease', 'cov_PVD', 
           'cov_cancer', 'cov_COPD',  
           'cov_beta_blocker', 'cov_CCB', 'cov_diuretic', 'cov_MRA', 'cov_SPS', 
           'cov_statin', 
           'cov_antiplatelet_agent', 
           'potassium_value', 'potassium_cat', 
           'cov_egfr', 'cov_egfr_cat', 'cov_acr', 'cov_acr_cat', 
           'primary_care_num', 'cov_hospitalization', 'outpatient_num', 'inpatient_num')

In [8]:
xfactorvars <- c('female', 'index_year', 'duration_less_than_3', 
                 'cov_diabetes', 'cov_hypertension', 
                 'cov_MI', 'cov_CHF', 'cov_cerebrovascular_disease', 'cov_PVD', 
                 'cov_cancer', 'cov_COPD',  
                 'cov_beta_blocker', 'cov_CCB', 'cov_diuretic', 'cov_MRA', 'cov_SPS', 
                 'cov_statin', 
                 'cov_antiplatelet_agent', 
                 'potassium_cat', 
                 'cov_egfr_cat', 'cov_acr_cat', 'cov_hospitalization')

In [9]:
xnonnormvars <- c('duration', 'potassium_value', 'cov_egfr', 'cov_acr', 
                  'primary_care_num', 'outpatient_num', 'inpatient_num')

In [10]:
dta.baseline <- dta %>% filter(rank == 1)

In [11]:
NA_table(data = dta.baseline)

covariate,p_count,na_count,na_percentage
migr_dt,5656,5656,99.77
RASSi_date_after,1313,1313,23.16
RASSi_type_after,3782,3782,66.71
relative_strength_after,3782,3782,66.71
relative_strength_simple_after,3782,3782,66.71
cov_acr,3222,3222,56.84
cov_acr_cat,3222,3222,56.84


In [12]:
tb1.all <- CreateTableOne(xvars, data = dta.baseline, factorVars = xfactorvars, includeNA = T)
tb1.all <- print(tb1.all, nonnormal = xnonnormvars, printToggle = F)
tb1.part <- CreateTableOne(xvars, strata = 'X_6', data = dta.baseline, factorVars = xfactorvars, includeNA = T)
tb1.part <- print(tb1.part, nonnormal = xnonnormvars, test = F, smd = T, printToggle = F)
N = c(nrow(dta.baseline), colSums(!is.na(dta.baseline[xvars])))
num_NA = c(5, 3, 5, 4)
j = 1
cum_num_NA = 0
for (i in str_which(names(N), '(_year)|(_cat)|(_cause)')) {
    N = insert(N, values = rep(NA, num_NA[j]), ats = i + 1 + cum_num_NA)
    j = j + 1 
    cum_num_NA = cum_num_NA + num_NA[j - 1]
}
tb1 <- cbind(N, tb1.all, tb1.part)
rownames(tb1) <- str_replace_all(str_replace(rownames(tb1), 'cov_', ''), '_', ' ')
rownames(tb1)[1] <- 'N'
rownames(tb1)[(which(rownames(tb1) == 'index year') + 1) : 
              (which(rownames(tb1) == 'index year') + 4)] <- c(2007, 2008, 2009, 2010)
rownames(tb1)[(which(rownames(tb1) == 'potassium cat') + 1) : 
              (which(rownames(tb1) == 'potassium cat') + 3)] <- c('mild', 'moderate', 'severe')
rownames(tb1)[(which(rownames(tb1) == 'egfr cat') + 1) : 
              (which(rownames(tb1) == 'egfr cat') + 5)] <- c('G1', 'G2', 'G3a', 'G3b', 'G4-5')
rownames(tb1)[(which(rownames(tb1) == 'acr cat') + 1) : 
              (which(rownames(tb1) == 'acr cat') + 4)] <- c('normal to mildly increased', 'moderately increased', 'severely increased', 'missing')

In [13]:
tb1

Unnamed: 0,N,Overall,discontinuation,reinitiation,SMD
N,5669.0,5669,1425,4244,
age,5669.0,72.01 (13.30),75.45 (12.82),70.86 (13.26),0.352
female,5669.0,2507 (44.2),690 (48.4),1817 (42.8),0.113
index year,5669.0,,,,0.193
2007,,543 ( 9.6),137 ( 9.6),406 ( 9.6),
2008,,1140 (20.1),264 (18.5),876 (20.6),
2009,,1351 (23.8),294 (20.6),1057 (24.9),
2010,,1331 (23.5),317 (22.2),1014 (23.9),
,,1304 (23.0),413 (29.0),891 (21.0),
duration,5669.0,"9.00 [2.00, 21.00]","8.00 [2.00, 22.00]","9.00 [2.00, 21.00]",0.045


In [14]:
write.csv(tb1, 'tb1.csv')

## clone and censoring generation

In [15]:
dta <- dta %>% 
    ungroup() %>% 
    arrange(lopnr, index_date) %>% 
    group_by(lopnr) %>% 
    mutate(cov_diabetes_t0 = first(cov_diabetes), 
           cov_hypertension_t0 = first(cov_hypertension), 
           cov_MI_t0 = first(cov_MI), 
           cov_CHF_t0 = first(cov_CHF), 
           cov_cerebrovascular_disease_t0 = first(cov_cerebrovascular_disease), 
           cov_PVD_t0 = first(cov_PVD), 
           cov_cancer_t0 = first(cov_cancer), 
           cov_COPD_t0 = first(cov_COPD), 
           cov_cataract_t0 = first(cov_cataract), 
           cov_beta_blocker_t0 = first(cov_beta_blocker), 
           cov_CCB_t0 = first(cov_CCB), 
           cov_diuretic_t0 = first(cov_diuretic), 
           cov_MRA_t0 = first(cov_MRA), 
           cov_SPS_t0 = first(cov_SPS), 
           cov_statin_t0 = first(cov_statin), 
           cov_antiplatelet_agent_t0 = first(cov_antiplatelet_agent), 
           cov_egfr_t0 = first(cov_egfr), 
           cov_acr_t0 = first(cov_acr), 
           cov_egfr_cat_t0 = first(cov_egfr_cat), 
           cov_acr_cat_t0 = first(cov_acr_cat), 
           cov_hospitalization_t0 = first(cov_hospitalization), 
           cov_fracture_t0 = first(cov_fracture))

In [16]:
dta_discontinuation <- dta %>% mutate(X = 'discontinuation')
dta_reinitiation <- dta %>% mutate(X = 'reinitiation') %>% mutate(ID = ID + length(unique(dta$ID)))
dta_clone <- rbind(dta_discontinuation, dta_reinitiation) %>% arrange(lopnr, index_date, X)

In [17]:
dta_clone <- dta_clone %>% 
    arrange(X, lopnr, index_date) %>% 
    group_by(X, lopnr) %>% 
    mutate(censor = ifelse(
        (X == 'discontinuation' & treatment == 1) | (X == 'reinitiation' & rank == 6 & treatment == 0), 
        1, 0
    )) %>% 
    mutate(censor = ifelse(X == 'reinitiation', cumsum(censor), censor)) %>% 
    mutate(rank = as.numeric(rank))

In [18]:
## check
## write.csv(dta_clone, file = 'see.csv')

In [19]:
save(dta_clone, file = 'dta_tidied.R')

In [20]:
dbDisconnect(con)

In [21]:
names(dta_clone)