## import packages

In [3]:
library(odbc)
library(dplyr)
library(lubridate)
library(DescTools)
library(biostat3)
library(mgcv)
library(mitools)
library(stringr)
library(R.utils)

## import data

In [4]:
con <- dbConnect(odbc(), DSN = 'shadow', database = 'SCREAM')
dta <- dbGetQuery(con, 'select * from DEV.analysis_dataset_PP_discontinuation', stringsAsFactors = F)

In [5]:
dta <- dta %>% 
    mutate(index_year = factor(index_year, levels = c(2007, 2008, 2009, 2010, 2011)), 
           duration_less_than_3 = ifelse(duration < 3, 1, 0), 
           potassium_cat = case_when(
               potassium_value > 5 & potassium_value <= 5.5 ~ 'mild', 
               potassium_value > 5.5 & potassium_value <= 6.0 ~ 'moderate', 
               potassium_value > 6.0 ~ 'severe'), 
           potassium_cat = factor(potassium_cat, levels = c('mild', 'moderate', 'severe')), 
           cov_egfr_cat = case_when(
               cov_egfr >= 90 ~ 'G1', 
               cov_egfr >= 60 & cov_egfr < 90 ~ 'G2', 
               cov_egfr >= 45 & cov_egfr < 60 ~ 'G3a', 
               cov_egfr >= 30 & cov_egfr < 45 ~ 'G3b', 
               cov_egfr < 30 ~ 'G4-5'), 
           cov_egfr_cat = factor(cov_egfr_cat, levels = c('G1', 'G2', 'G3a', 'G3b', 'G4-5')), 
           cov_acr_cat = case_when(
               cov_acr < 3 ~ 'normal to mildly increased', 
               cov_acr >= 3 & cov_acr <= 30 ~ 'moderately increased', 
               cov_acr > 30 ~ 'severely increased'), 
           cov_acr_cat = factor(cov_acr_cat, levels = c('normal to mildly increased', 'moderately increased', 'severely increased'))
          ) %>% group_by(lopnr)

## clone and censoring generation

In [6]:
dta <- dta %>% 
    ungroup() %>% 
    arrange(lopnr, index_date) %>% 
    group_by(lopnr) %>% 
    mutate(cov_diabetes_t0 = first(cov_diabetes), 
           cov_hypertension_t0 = first(cov_hypertension), 
           cov_MI_t0 = first(cov_MI), 
           cov_CHF_t0 = first(cov_CHF), 
           cov_cerebrovascular_disease_t0 = first(cov_cerebrovascular_disease), 
           cov_PVD_t0 = first(cov_PVD), 
           cov_cancer_t0 = first(cov_cancer), 
           cov_COPD_t0 = first(cov_COPD), 
           cov_cataract_t0 = first(cov_cataract), 
           cov_beta_blocker_t0 = first(cov_beta_blocker), 
           cov_CCB_t0 = first(cov_CCB), 
           cov_diuretic_t0 = first(cov_diuretic), 
           cov_MRA_t0 = first(cov_MRA), 
           cov_SPS_t0 = first(cov_SPS), 
           cov_statin_t0 = first(cov_statin), 
           cov_antiplatelet_agent_t0 = first(cov_antiplatelet_agent), 
           cov_egfr_t0 = first(cov_egfr), 
           cov_acr_t0 = first(cov_acr), 
           cov_egfr_cat_t0 = first(cov_egfr_cat), 
           cov_acr_cat_t0 = first(cov_acr_cat), 
           cov_hospitalization_t0 = first(cov_hospitalization), 
           cov_fracture_t0 = first(cov_fracture))

In [7]:
dta_discontinuation <- dta %>% mutate(X = 'discontinuation')
dta_reinitiation <- dta %>% mutate(X = 'reinitiation') %>% mutate(ID = ID + length(unique(dta$ID)))
dta_clone <- rbind(dta_discontinuation, dta_reinitiation) %>% 
    arrange(X, lopnr, index_date) %>% 
    group_by(X, lopnr) %>% 
    mutate(whether_treated = cumsum(treatment))

In [8]:
dta_clone <- dta_clone %>% 
    arrange(X, lopnr, index_date) %>% 
    group_by(X, lopnr) %>% 
    mutate(censor = ifelse(
        (X == 'discontinuation' & treatment == 1) | 
        (X == 'reinitiation' & rank == 6 & whether_treated == 0) | 
        (X == 'reinitiation' & rank >= 2 & whether_treated >= 1 & treatment == 0), 
        1, 0
    )) %>% 
    mutate(censor = cumsum(censor)) %>% 
    mutate(rank = as.numeric(rank))

In [9]:
## check
## write.csv(dta_clone %>% arrange(X, lopnr, index_date), file = 'see.csv')

In [10]:
save(dta_clone, file = 'dta_tidied_PP.R')

In [11]:
dbDisconnect(con)

In [12]:
names(dta_clone)