# ORCHID Clinical Trial: statistical analysis reproduction

# Version 0.4

**Disclaimer** 

This is a development version of the notebook reproducing the statistical analysis of the ORCHID trial. Even though the code producing the analyses and the plots will change and be refined, the libraries required to run the notebook will not, hence it can already be tested to check that the packages installation process and overall notebook execution works as expected.

Some of the results do not match with what is exposed in the results in the [ORCHID trial article](https://jamanetwork.com/journals/jama/fullarticle/10.1001/jama.2020.22240?utm_campaign=articlePDF&utm_medium=articlePDFlink&utm_source=articlePDF&utm_content=jama.2020.22240) (namely the count of 'cytopenia', and the Fine and Gray model to analyse the 'discharge from hospital' secondary outcome). The analyses will be refined in order to identify where the discrepancies come from and eventually correct the analysis.

# Data Access using PIC-SURE API

User access authentication works through a security token, which is passed to the API using the token.txt file (file to be created by the user). In order to know how to get your security token, please see [the README of the PIC-SURE API GitHub repo](https://github.com/hms-dbmi/Access-to-Data-using-PIC-SURE-API/tree/master/NHLBI_BioData_Catalyst).

# ORCHID Clinical Trial

Multi-center, double blinded, randomized clinical trial conducted to assess the efficacy of hydroxychloroquine in the treatment of COVID-19. Results published in JAMA on November 9th 2020, [paper available here](https://jamanetwork.com/journals/jama/fullarticle/10.1001/jama.2020.22240?utm_campaign=articlePDF&utm_medium=articlePDFlink&utm_source=articlePDF&utm_content=jama.2020.22240)

NHLBI made available the data to every authorized investigators. Hence, this notebook enables anybody with authorized credentials to reproduce the ORCHID clinical trial results by showing how to:
1. Access the data using the PIC-SURE API
2. Reproduce the results of this study using the open-source R programming languages

# Packages Installation 

In [None]:
options(repr.plot.width=18, repr.plot.height=9, warn=-1)

In [None]:
required_libraries <- c(
  'tidyverse',
  'devtools',
  'kableExtra',
  'survival',
  'survminer',
  'MASS',
  'quantreg',
  'DescTools', 
  'IRdisplay', 
  'arsenal')
for (package in required_libraries) {
  if (!package %in% row.names(installed.packages())) install.packages(package)
  library(package, character.only = TRUE)
}
devtools::install_github("hms-dbmi/pic-sure-r-client", force = TRUE)
devtools::install_github("hms-dbmi/pic-sure-r-adapter-hpds", force = TRUE)

# Installing the library and connecting to the database 

In [None]:
PICSURE_network_URL <- "https://picsure.biodatacatalyst.nhlbi.nih.gov/picsure"
token_file <- "token.txt"
token <- scan(token_file, what = "character")
httr::set_config(httr::config(ssl_verifypeer=0L, ssl_verifyhost=0L))
myconnection <- picsure::connect(url = PICSURE_network_URL, token = token)
resource_id <- picsure::list.resources(myconnection)
resource <- hpds::get.resource(myconnection, resourceUUID = resource_id)

# Querying the data

In [None]:
dictionary_results <- hpds::find.in.dictionary(resource, 'ORCHID')
list_variables <- hpds::extract.keys(dictionary_results)
query <- hpds::new.query(resource)
hpds::query.anyof.add(query, list_variables)
raw_df <- hpds::query.run(query, result.type = "dataframe") %>%
  as_tibble()

# Variable names 

In [None]:
primary_outcome_name <- "d_covid15"
secondary_outcomes_binary <- c("d_mort15",
                               "d_mort29",
                               "d_ecmo_death")
secondary_outcomes_ccs <- c('d_covid3',
                            'd_covid8',
                            'd_covid29')
secondary_outcomes_daysfree <- c(
  'd_time_to_recovery',
  "d_hospfreedays",
  "d_oxyfreedays",
  "d_ventfreedays",
  "d_vasofreedays",
  "d_icufreedays"
)
safety_outcomes <- c(
  'safe_cytopenia',
  'safe_seizure',
  'safe_astalt',
  'safe_hypogly',
  'safe_ca',
  'safe_vtach'
)
covariates <- c('rand_trt',
                'bl_age',
                'bl_sex',
                'covid_ooscale_1',
                'd_sofa_gcs',
                'bl_symptomdt')

# Data Management

In [None]:
simplified_names <- names(raw_df) %>%
  str_extract('((?<=X\\.COVID19\\.ORCHID\\.\\.\\.phs002299\\.\\.\\.)\\w+(?=\\.))|(Patient\\.ID)')
orchid_data <- raw_df[!is.na(simplified_names)]
names(orchid_data) <- simplified_names[!is.na(simplified_names)]

orchid_data$rand_trt <- relevel(orchid_data$rand_trt, 'Placebo')
orchid_data$safe_cytopenia <- apply(
  orchid_data[, c('safe_neutrop',
                  'safe_lympho',
                  'safe_anemia',
                  'safe_thombo')],
  1,
  function(row) ifelse(any(row == 'Yes', na.rm = TRUE), 'Yes', 'No'))
    
respiratory_symptoms_variables <- c(
    'ie_arisymptoms___cou',
    'ie_arisymptoms___fev',
    'ie_arisymptoms___sob',
    'ie_arisymptoms___st'
)
orchid_data[respiratory_symptoms_variables] <- lapply(orchid_data[respiratory_symptoms_variables], 
                                                      function(variable)
                                                      ifelse(variable == "Checked",
                                                             TRUE,
                                                             ifelse(variable == "Unchecked", 
                                                                    FALSE,
                                                                    NA)
                                                            )
                                                      )

dm_factors <- function(factor) {
  factor <- as.character(factor)
  ifelse(factor == '', NA, ifelse(
    factor %in% c('Yes', '1'), 'Yes', ifelse(
      factor %in% c('No', '0'), 'No', NA
    )
  )) %>% as.factor()
}
orchid_data[c(safety_outcomes, secondary_outcomes_binary)] <- lapply(
  orchid_data[c(safety_outcomes, secondary_outcomes_binary)],
  dm_factors) %>%
  as_tibble()


create_ordered_daysfree <- function(variable) {
  factor_levels <- as.character(0:28)
  factor_variable <- factor(as.character(variable), ordered = TRUE, levels = factor_levels)
  return(factor_variable)
}
orchid_data[secondary_outcomes_daysfree] <- lapply(
  orchid_data[secondary_outcomes_daysfree], create_ordered_daysfree) %>%
  as_tibble()


create_ordered_covid_clinical_scale <- function(variable) {
  levels <- c(
    '1, Dead',
    '2, Hospitalized on invasive mechanical ventilation or ECMO',
    '3, Hospitalized on non-invasive ventilation or high flow nasal cannula',
    '4, Hospitalized on supplemental oxygen,',
    '5, Hospitalized not on supplemental oxygen',
    '6, Not hospitalized with limitation in activity (continued symptoms)',
    '7, Not hospitalized without limitation in activity (no symptoms)'
  )
  factor(variable, levels = levels, ordered = TRUE)
}
orchid_data[grep('(^d_covid)|(^covid_ooscale)', names(orchid_data))] <- lapply(
  orchid_data[grep('(^d_covid)|(^covid_ooscale)', names(orchid_data))],
  create_ordered_covid_clinical_scale) %>%
  as_tibble()

In [None]:
orchid_data$vs_died[orchid_data$vs_died == ''] <- NA
orchid_data$vs_died <- fct_recode(orchid_data$vs_died, '0' = 'No', '1' = 'Yes') %>%
  as.character() %>%
  as.numeric()

orchid_data$iho_dischyn[orchid_data$iho_dischyn == ''] <- NA
orchid_data$iho_dischyn <- fct_recode(orchid_data$iho_dischyn, '0' = 'No', '1' = 'Yes') %>%
  as.character() %>%
  as.numeric()


    
    # Ethnicity
orchid_data$multi_race <- apply(
  orchid_data[c('bl_race___1', 'bl_race___othcat', 'bl_race___5')],
  1, 
  function(row) ifelse(sum(row == 'Checked', na.rm = TRUE) > 1, TRUE, FALSE))
  
orchid_data$race_ethnicity <- 
  ifelse(orchid_data$bl_ethnic == 'Hispanic or Latino',
         'Hispanic or Latino', 
         ifelse(orchid_data$multi_race == TRUE, 
                'Multirace', 
                ifelse(orchid_data$bl_race___1 == 'Checked', 
                       'Black or African American',
                       ifelse(orchid_data$bl_race___5 == 'Checked', 
                              'White', 
                              ifelse(orchid_data$bl_race___othcat == 'Checked', 
                                     'American Indian or Alaska Native/Asian/Native Hawaiian or Other Pacific Islander',
                                     NA)
                       )
                )
         )
  )
orchid_data$bmi <- round(orchid_data$bl_weight / (orchid_data$bl_height / 100)^2, 1)

chronic_conditions <- c(
    'charl_contis',
    'charl_ulcer',
    'charl_hypertension',
    'charl_cad',
    'charl_fatal',
    'charl_liver',
    'charl_diabetes',
    'charl_kidney',
    'charl_copd'
)
orchid_data$diabetes_bool <- ifelse(orchid_data$charl_diabetes %in%
                                      c('DM with end organ damage present (excludes diet controlled alone)',
                                        'Uncomplicated DM present (no end organ damage present)'),
                                    TRUE, FALSE)
orchid_data$kidney_bool <- ifelse(orchid_data$charl_kidney %in% c(
  'Moderate to severe kidney disease present (Cr > 3, ESRD, chart diagnosis of CKD stage 5 (eGFR < 15 mL/min/1.73m²) not on dialysis)',
  'Moderate to severe kidney disease present and patient is dialysis dependent'
), 
TRUE, 
FALSE)
orchid_data$cad_bool <- ifelse(orchid_data$charl_cad == 'Yes', TRUE, FALSE)
orchid_data$hypertension_bool <- ifelse(orchid_data$charl_hypertension == 'Yes', TRUE, FALSE)
orchid_data$copd_bool <- ifelse(orchid_data$charl_copd == 'Yes', TRUE, FALSE)


# Table 1 Description

In [None]:
table1_labels <- c(
    'bl_sex' = 'Sex',
    'bl_age' = 'Age',
    'race_ethnicity' = 'Ethnicity',
    'bl_prehosp' = 'Location at hospitalization',
    'bmi' = 'BMI' ,
    'diabetes_bool' = 'Diabetes',
    'kidney_bool' = 'Chronic Kidney Disease',
    'cad_bool' = 'Coronary Artery Disease',
    'hypertension_bool' = 'Hypertension',
    'copd_bool' = 'Chronic Obstructive Pulmonary Disease',
    'bl_randloc' = 'Location at randomization',
    'covid_ooscale_1' = 'COVID Clinical Scale at Baseline',
    'vit_vasop_1' = 'Vasopressor',
    'd_sofa_gcs' = 'SOFA at baseline',
    'ie_arisymptoms___cou'= 'Cough',
    'ie_arisymptoms___fev'= 'Fever (temperature >37.5 °C)',
    'ie_arisymptoms___sob'= 'Sobbering',
    'ie_arisymptoms___st'= 'Shortness of breath',
    'd_onset_duration'= 'Symptoms onset duration',
    'labs_wbch_1' = 'Lowest white cells blood count at Day 1(/mm3)',
    'labs_wbcl_1' = 'Highest white cells blood count at Day 1(/mm3)',
    'sofa_platl_1' = 'Lowest platelet count at Day 1(k/mm3)',
    'labs_asth_1' = 'Highest AST - Aspartate aminotransferase (units per liter) (Day 1)'
)


In [None]:
orchid_table1 <- orchid_data
orchid_table1$rand_trt <- relevel(orchid_table1$rand_trt, 'Hydroxychloroquine')

In [None]:
for (label_name in names(table1_labels)) {
    attr(orchid_table1[[label_name]], 'label') <- table1_labels[[label_name]]
}

In [None]:
demographics_table <- tableby(rand_trt ~ 
                             bl_sex +
                             bl_age +
                             race_ethnicity 
                             , data = orchid_table1)

In [None]:
baseline_table <- tableby(rand_trt ~ 
                             bl_randloc +
                             covid_ooscale_1 +
                             vit_vasop_1 
                             , data = orchid_table1)

In [None]:
comorbidity_table <- tableby(rand_trt ~ 
                             diabetes_bool +
                             kidney_bool +
                             cad_bool +
                             hypertension_bool +
                             copd_bool
                             , data = orchid_table1)

In [None]:
symptoms_table <- tableby(rand_trt ~
                          ie_arisymptoms___cou +
                          ie_arisymptoms___fev +
                          ie_arisymptoms___sob +
                          ie_arisymptoms___st,  
                         data = orchid_table1)

In [None]:
admission_table <- tableby(rand_trt ~ d_sofa_gcs + d_onset_duration,
                              data = orchid_table1
)

In [None]:
table1_df <- merge(demographics_table, baseline_table) %>% 
merge(symptoms_table) %>%
merge(admission_table) %>%
merge(comorbidity_table) %>%
as_tibble() %>%
dplyr::select(all_of(c('label', 'Hydroxychloroquine', 'Placebo'))) %>%
rename('Characteristic' = label) %>%
filter(!Characteristic %in% c('FALSE', 'Range'))


In [None]:
table1_df[, c('Hydroxychloroquine', 'Placebo')] <- apply(table1_df[, c('Hydroxychloroquine', 'Placebo')],
       c(1, 2), 
       function(elem) {
           elem_bis <- unlist(elem)
           ifelse(elem != '', 
                  unlist(elem_bis) %>% round(1) %>% paste0( collapse=' (') %>% paste0(')'),
                  elem_bis
                  )
     }
) %>% as_tibble()

In [None]:
indices_cat <- which(table1_df['Hydroxychloroquine'] == '')
indices_results <- which(table1_df['Hydroxychloroquine'] != '')

In [None]:
kbl(table1_df) %>%
kable_minimal() %>% kable_styling(bootstrap_options = c("striped"), 
                                  full_width = FALSE,
                                  position = "left", 
                                 font_size = 12) %>%
  column_spec(1, width = "20em") %>%
  column_spec(2, width = "10em") %>% 
  column_spec(3, width = "10em") %>% 
add_header_above(c(" ", "No. (%)" = 2), align = 'left') %>%
row_spec(0, bold = TRUE, extra_css = "font-size: medium") %>%
row_spec(indices_cat, underline = FALSE, bold = TRUE, align = 'left') %>%
column_spec(1, background = 'FDEBD0') %>%
kableExtra:::add_indent_html(positions = indices_results) %>%
pack_rows('Demographics', 1, 20, indent = FALSE, label_row_css = "text-align: left; font-style: italic") %>%
pack_rows('COVID-19 Symptoms', 20, 39, indent = FALSE, label_row_css = "text-align: left; font-style: italic") %>%
pack_rows('Comorbidities', 40, 48, indent = FALSE, label_row_css = "text-align: left; font-style: italic") %>%
as.character() %>%
#str_replace_all(paste0('(', str_c(table1_labels, collapse = ')|('), ')'),
str_replace_all(c("Comorbidities"),
            replacement = function(string) paste0(string, '<hr>')) %>%
str_replace_all(c("Demographics"),
            replacement = function(string) paste0(string, '<hr>')) %>%
str_replace_all(c("COVID-19 Symptoms"),
            replacement = function(string) paste0(string, '<hr>')) %>%
display_html()

# Statistical Analysis

## COVID-19 Ordered Clinical Scale Description

In [None]:
coos_df <- orchid_data %>%
  dplyr::select(subject_id, rand_trt, starts_with("d_covid")) %>%
  pivot_longer(cols = starts_with("d_covid"),
               names_to = "date",
               values_to = "COOS") %>%
  mutate(COOS =  fct_rev(COOS))

barplot_width <- 0.6
plot_count_COOS <- coos_df %>%
  filter(date %in% c("d_covid15", "d_covid29")) %>%
  mutate(rand_trt = fct_recode(relevel(rand_trt, 'Hydroxychloroquine'),
                               "Hydroxychloroquine\n(n=242)" = "Hydroxychloroquine",
                               "Placebo\n(n=237)" = "Placebo"),
         date = fct_recode(date,
                           "14 d After randomization\n(primary outcome)" = "d_covid15",
                           "28 d After randomization\n(secondary outcome)" = "d_covid29")) %>%
  ggplot(aes(x = rand_trt, fill = COOS)) +
  geom_bar(width = barplot_width, position="fill", colour = "black") +
  scale_y_continuous(labels = scales::percent,
                     limits = c(0, 1)) +
  facet_grid( ~ date) +
  scale_fill_brewer(palette = "Greens") +
  theme_bw() +
  theme(legend.background = element_blank(),
        legend.box.background = element_rect(colour = "black"),
        panel.spacing = grid::unit(c(0), "lines"),
        axis.line = element_line(colour = "black"),
        panel.grid.major = element_blank(),
        text = element_text(size=20),
        axis.text.x = element_text(angle=0, hjust=0.5),
        # panel.grid.minor = element_blank(),
        panel.border = element_blank(),
        panel.background = element_blank(),
        strip.background = element_rect(colour = "white", fill = "white")) +
  labs(title="Figure 1: Clinical Status on the Coronavirus Disease (COVID) Outcomes Scale 14 Days and 28 Days After Randomization",
       fill = "Clinical status (COVID Outcomes Scale category)"
       ) +
  xlab("Treatment group") +
  ylab("Patients with clinical status, %")
plot_count_COOS

## Primary and secondary outcomes comparison

In [None]:
#orchid_data$bl_sex <- ifelse(orchid_data$bl_sex == 'Other', 
#                            orchid_data$bl_sex )
fitted_polr <- function(outcome, covariates, df) {
  formula <- paste(outcome,
                   paste(covariates, collapse = " + "),
                   sep = " ~ ")
  fitted_model <- MASS::polr(formula, Hess = TRUE, dat = df)
  return(fitted_model)
}

dm_polr_results <- function(fitted_model) {
  coef_table <- coef(summary(fitted_model))['rand_trtHydroxychloroquine',]
  OR <- exp(coef(fitted_model))['rand_trtHydroxychloroquine']
  coeff_names <- names(OR)
  ci <- exp(coef_table['Value'] + qnorm(c(0.025, 0.975))*coef_table['Std. Error'])
  p <- pnorm(abs(coef_table["t value"]), lower.tail = FALSE) * 2
  results <- round(c(OR, ci, p), 2)
  names(results) <- c('OR', '2.5%', '97.5%', 'pvalue')
  return(results)
}

ci_quantreg <- function(outcome, covariates, df) {
  formula <- paste0(outcome, ' ~ rand_trt')
  df[[outcome]] <- as.numeric(df[[outcome]])
  non_nan <- !is.na(df[[outcome]])
  qr_b <- boot.rq(cbind(1, df[['rand_trt']][non_nan]), df[[outcome]][non_nan], tau = 0.5, R = 10000)
  ci <- t(apply(qr_b$B, 2, quantile, c(0.025,0.975)))[2, ]
  return(ci)
}


list_results_outcomes <- list()
for (outcome_name in c(primary_outcome_name, secondary_outcomes_ccs, secondary_outcomes_daysfree)) {
  fitted_model <- fitted_polr(outcome_name, covariates, orchid_data)
  result_model_polyr <- dm_polr_results(fitted_model)
  adjusted_OR <- paste0(result_model_polyr[["OR"]], ' (', result_model_polyr[["2.5%"]], ' - ',
                        result_model_polyr[["97.5%"]], ')'
  )
  quartiles <- by(orchid_data, relevel(orchid_data$rand_trt, 'Hydroxychloroquine'),
     function(df) {
       numeric_var <- as.numeric(df[[outcome_name]])
       trunc(quantile(numeric_var, probs = c(0.25, 0.5, 0.75), na.rm = TRUE))
     })
  median <- quartiles %>%
    lapply(function(x) paste0(x[['50%']], ' (', x[['25%']], ' to ', x[['75%']], ')'))
  unadjusted_diff <- quartiles[[1]][['50%']] - quartiles[[2]][['50%']]
  if (unadjusted_diff == 0) {
    ci_unadjusted_diff <- as.character(unadjusted_diff)
  } else {
    ci_diff_num <- ci_quantreg(outcome = outcome_name, covariates = covariates, df = orchid_data)
    ci_unadjusted_diff <- paste0(unadjusted_diff, ' (', ci_diff_num[[1]], ' to ', ci_diff_num[[2]], ')')
  }
  list_results_outcomes[[outcome_name]] <- c(median,
                                                     "Unadjusted difference" = ci_unadjusted_diff,
                                                     "Adjusted odds ratio or odds ratio" = adjusted_OR
  )
}


dm_result_logistr <- function(fitted_model) {
  coef_table <- coef(summary(fitted_model))['rand_trtHydroxychloroquine',]
  OR <- exp(coef(fitted_model))['rand_trtHydroxychloroquine']
  ci <- exp(coef_table['Estimate'] + qnorm(c(0.025, 0.975))*coef_table['Std. Error'])
  results <- round(c(OR, ci), 2)
  names(results) <- c('OR', '2.5%', '97.5%')
  return(results)
}


for (safety_outcome in c(secondary_outcomes_binary, safety_outcomes)) {
  formula_safety <- paste(safety_outcome,
                          "rand_trt",
                          sep = " ~ ")
  non_na <- ! (is.na(orchid_data[[safety_outcome]]) | orchid_data[[safety_outcome]] == '')
  OR <- tryCatch({
    fitted_model <- glm(formula_safety, family = binomial(link = "logit"), data = orchid_data[non_na, ])
    OR <- dm_result_logistr(fitted_model)
    if (is.infinite(OR[['97.5%']])) OR <- NA
    OR
  },
  error = function(e) e,
  warning = function(w) NA
  )
  table_count <- table(orchid_data[non_na, c('rand_trt', safety_outcome)]) %>%
    as.data.frame() %>%
    pivot_wider(names_from = all_of(safety_outcome), values_from = 'Freq') %>%
    mutate(freq = round(Yes/(Yes + No), 3) * 100)
  table_count <- table_count[c(2, 1), ]
  count_safety = table_count[['Yes']]
  freq_safety = table_count[['freq']]
  names(count_safety) = table_count[['rand_trt']]
  names(freq_safety) = table_count[['rand_trt']]
  diff <- BinomDiffCI(x1 = table_count[[1, "Yes"]],
              n1 = sum(table_count[1, c('No', 'Yes')]),
              x2 = table_count[[2, "Yes"]],
              n2 = sum(table_count[2, c('No', 'Yes')]),
              method = "wald") %>% round(3) * 100
  list_results_outcomes[[safety_outcome]] <- c(
    paste0(count_safety, ' (', freq_safety, ')'),
    paste0(diff[[1]], ' (', diff[[2]], ' to ', diff[[3]], ')'),
    if (length(OR) == 1) NA else paste0(OR[['OR']], ' (', OR[['2.5%']], ' to ', OR[['97.5%']], ')')
  )
}


output_results <- do.call(rbind, list_results_outcomes)

list_renaming_outcomes <- c(
  'safe_seizure' = 'Seizure',
  'safe_vtach' = 'Ventricular tachyarrhythmia',
  'safe_ca' = 'Cardiac arrest treated with CPR',
  'safe_astalt' = 'AST or ALT ≥2 times upper limit of normal',
  'safe_hypogly' = 'Symptomatic hypoglycemia',
  'd_covid3' = 'COVID Outcomes Scale score, median (IQR) at Day 2',
  'd_covid8' = 'COVID Outcomes Scale score, median (IQR) at Day 7',
  'd_covid15' = 'COVID Outcomes Scale score, median (IQR) at Day 14',
  'd_covid29' = 'COVID Outcomes Scale score, median (IQR) at Day 28',
  'd_mort15' = 'All-cause, all-location death',
  'd_mort29' = 'All-cause, all-location death',
  'd_ecmo_death' = 'Ecmo or Death',
  'd_hospfreedays' = 'Hospital Free Days',
  'd_icufreedays' = 'ICU Free Days',
  'd_oxyfreedays' = 'Oxygen free days',
  'd_time_to_recovery' = 'Time to recovery',
  'd_vasofreedays' = 'Vasopressor Free Days',
  'd_ventfreedays' = 'Ventilator Free Days',
  'safe_cytopenia' = 'Cytopenia'
)
row.names(output_results) <- list_renaming_outcomes[row.names(output_results)]

output_results %>%
  kable(caption="Effect of Hydroxychloroquine on COVID Outcomes Scale") %>%
  kable_minimal() %>%
  pack_rows('Primary Outcome', 1, 1, indent = FALSE, label_row_css = "text-align: left; font-style: italic") %>%
  pack_rows('Secondary Outcomes', 2, 13, indent = FALSE, label_row_css = "text-align: left; font-style: italic") %>%
  pack_rows('Safety Events', 14, 20, indent = FALSE, label_row_css = "text-align: left; font-style: italic") %>%
  as.character() %>%
str_replace_all(c("Primary Outcome"),
            replacement = function(string) paste0(string, '<hr>')) %>%
str_replace_all(c("Secondary Outcomes"),
            replacement = function(string) paste0(string, '<hr>')) %>%
str_replace_all(c("Safety Events"),
            replacement = function(string) paste0(string, '<hr>')) %>%
  as.character() %>%
  IRdisplay::display_html()

## Survival Outcomes

## All-cause survival

In [None]:
orchid_data$death_fu <- orchid_data$d_lastalivedt + 1

In [None]:
orchid_data$discharged_within_28 <- ifelse(
    orchid_data$iho_dischdt <= 28,
    1, 
    0
) %>% replace_na(0)

orchid_data$death_within_28 <- ifelse(
    orchid_data$vs_deathdt <= 28,
    1, 
    0
) %>% replace_na(0)
        

In [None]:
no_fu_individuals <- apply(orchid_data[c('vs_alivedt', 'vs_deathdt')],
                                      1,
                                      function(x) sum(is.na(x)) == 2)

survival_data <- orchid_data[!no_fu_individuals, ]

survival_data$fu_death_28 <- pmin(28, 
                            survival_data$vs_alivedt,
                            survival_data$vs_deathdt,
                           na.rm = TRUE)

coxph_death <- coxph(Surv(time = survival_data$fu_death_28,
                          event = survival_data$death_within_28) ~
                       rand_trt + bl_age + bl_sex + covid_ooscale_1 + d_sofa_gcs_s + bl_symptomdt,
                     data = survival_data)


survfit_death <- survfit(Surv(time = survival_data$fu_death_28,
                              event = survival_data$death_within_28) ~ rand_trt,
                         data = survival_data)

In [None]:
death_plot <- ggsurvplot(survfit_death,
           xlim = c(1, 28),
           size = 1.3,                 
           palette = c("#275057", "#F4890B"),
           conf.int = FALSE,          
           pval = TRUE,              
           risk.table = TRUE,        
           risk.table.col = "strata",
           legend.labs =c("Placebo", "Hydroxychloroquine"),
           risk.table.height = 0.25,
           ggtheme = theme_bw() +  theme(
        text = element_text(size=20),
        axis.text.x = element_text(angle=0, hjust=0.5)),
           combine = TRUE,
           keep_data = TRUE,
           type="cuminc",
           title = "Figure 2: Treatment Effect on Survival at 28 Days",
)

death_plot

### Fine and Gray Model for Hospital discharge 

In [None]:
# Time to discharge
discharge_data <- orchid_data
discharge_data$bl_sex <- as.factor(
    if_else(
    as.character(discharge_data$bl_sex) == 'Other/Unknown',
    'Female',
    as.character(discharge_data$bl_sex)
    )
)

discharge_data$death_discharge <- as.factor(ifelse(
    discharge_data$discharged_within_28 == 1 & !is.na(discharge_data$discharged_within_28),
    'discharged', 
    ifelse(
        discharge_data$death_within_28 & !is.na(discharge_data$death_within_28), 
        'dead', 
        'censored')
    )
)

discharge_data$fu_discharge_28 <- pmin(28, 
                            discharge_data$vs_deathdt,
                            discharge_data$iho_dischdt,
                            discharge_data$vs_deathdt,
                           na.rm = TRUE)

finegray_data <- finegray(Surv(discharge_data$fu_discharge_28, discharge_data$death_discharge) ~ .,
                           data = discharge_data,
                           etype = "discharged")

discharge_model <- coxph(Surv(fgstart, fgstop, fgstatus) ~
                              rand_trt + bl_age + bl_sex + covid_ooscale_1 + d_sofa_gcs_s + bl_symptomdt,
                            data = finegray_data,
                            weight = fgwt)

discharge_model <- coxph(Surv(fgstart, fgstop, fgstatus) ~
                              rand_trt ,
                            data = finegray_data,
                            weight = fgwt)

pvalue_hydroxychloroquine_discharge <- summary(discharge_model)$coefficients['rand_trtHydroxychloroquine', 'Pr(>|z|)']

survfit_discharge <- survfit(
  Surv(fu_discharge_28, discharged_within_28) ~ rand_trt,
  data = discharge_data)

In [None]:
discharge_plot <- ggsurvplot(
  survfit_discharge,
  xlim = c(1, 28),
  size = 1.3,                 
  palette = c("#275057", "#F4890B"),
  conf.int = FALSE,          
  pval = TRUE,              
  risk.table = TRUE,        
  risk.table.col = "strata",
  legend.labs =c("Hydroxychloroquine", "Placebo"),
  risk.table.height = 0.25,
  ggtheme = theme_bw() +  theme(
  text = element_text(size=20),
  axis.text.x = element_text(angle=0, hjust=0.5)),
  combine = TRUE,
  keep_data = TRUE,
  linetype = 'strata',
  fun = 'event',
  type = "cuminc",
  title = "Figure 3: Treatment Effect on Hospital Discharge"
)

discharge_plot
