In [None]:
library(tableone)
library(coxphw)
library(survival)
library(dplyr)
library(ggsurvfit)
library(cobalt)
library(survminer)

# Import data

In [None]:
df = read.csv('/home/jovyan/work/RALES TRIAL/1A/masterfiles/masterfile_wide_format.csv')

In [None]:
head(df)

In [None]:
nrow(df)

In [None]:
options(scipen=999) # disable printing results in scientific notation

In [None]:
# Replace all values equal to 732 with 731 in the 'time' column

df$time[df$time == 732] <- 731

# Descriptive table  

In [None]:
# List of variables to be included in the table

myVars <- c("age", "gender","creatinine", "lvef", "potassium", "ace_inhibitors", "ethnicity")

# List of categorical variables
catVars <- c("gender", "ace_inhibitors", "ethnicity")

# List of continuous variables which should be displayed as median (IQR)
medVars <- c("age","creatinine", "lvef", "potassium")

# Create table 1
tab1 <- CreateTableOne(vars = myVars, # set descriptive variables
                       strata = "trt", # define stratifying variable
                       data = df, 
                       factorVars = catVars) # define categorical variables

In [None]:
# Print descriptive table

print(tab1,
      nonnormal = medVars,
      formatOptions = list(big.mark = ","),
      test = FALSE)

# Kaplan-Meier Analysis

In [None]:
# Fit the Kaplan-Meier survival curves to the 'df' dataset

km_fit <- survfit(Surv(time, status) ~ 1, data = df)
times_to_check <- seq(0, 731, by = 43) # define the specific time points to check survival probabilities, every 43 days up to 731 days
summary(km_fit, times = times_to_check) # generate a summary of the Kaplan-Meier survival curves at the specified time points

In [None]:
# Kaplan-Meier survival curve at a single time point

summary(survfit(Surv(time, status) ~ 1, data = df), times = 731)

In [None]:
# Fit a Kaplan-Meier survival curve to the entire dataset '' and create a survival plot using the 'ggsurvfit' package.


survfit2(Surv(time, status) ~ 1, data = df) %>% 
  ggsurvfit() +
  labs(
    x = "Days",
    y = "Overall survival probability"
  ) + 
  add_confidence_interval() + # add confidence interval to the plot
  add_risktable() # add a risk table to the plot

In [None]:
# Fit the Kaplan-Meier survival curves to the 'df' dataset stratified by the 'trt' variabale 

summary(survfit(Surv(time, status) ~ trt, data = df), times = 731)

In [None]:
# Fit a Kaplan-Meier survival curve stratified by the 'trt' variable and create a survival plot using 'ggsurvplot'.


fit.obj <- survfit(Surv(time, status) ~ trt, 
                   data = df)

p1 <- ggsurvplot(
  fit.obj,                     
  data = df, 
  risk.table = FALSE,   # do not include a risk table in the plot
  pval = FALSE, # do not include p-values in the plot
  conf.int = FALSE, # do not display confidence intervals
  legend.labs = c("No Spiro","Spiro"), # customize legend labels
  font.legend = c(10), # set font size for the legend
  xlim = c(0,731), # set the x-axis limits
  break.time.by = 100,  # specify time breaks on the x-axis every 43 days
  xlab="Time in days", # customize the x-axis label
  ylab="Survival probability", # customize the y-axis label
  font.xlab = c(12), # set font size for the x-axis label
  font.ylab = c(12),  # set font size for the y-axis label
  ylim=c(0.65,1), # set the y-axis limits
  risk.table.height = 0.3, # specify the height of the risk table
  palette = c("slategrey","dodgerblue4")) # customize the color palette

p1 # display the survival plot

In [None]:
gg_default <-
  survfit2(Surv(time, status) ~ trt, data = df) %>%
  ggsurvfit() +
  add_confidence_interval() +
  scale_ggsurvfit() +
  labs(title = "Default")

gg_styled <-
  gg_default +
  coord_cartesian(xlim = c(0, 8)) +
  scale_color_manual(values = c('#54738E', '#82AC7C')) +
  scale_fill_manual(values = c('#54738E', '#82AC7C')) +
  theme_minimal() +
  theme(legend.position = "bottom") +
  guides(color = guide_legend(ncol = 1)) +
  labs(
    title = "Modified",
    y = "Percentage Survival"
  )

gg_default

# Cox Proportional Hazards Model (without confounders)

In [None]:
# Fit the Cox Model (without confounders)

cox <- coxph(Surv(time, status) ~ as.factor(trt), data = df) 
summary(cox)

#### Testing proportional Hazards assumption

In [None]:
test.ph <- cox.zph(cox)
test.ph

In [None]:
ggcoxzph(test.ph)

# Cox Proportional Hazards Model (adjusting for confounders)

In [None]:
cox <- coxph(Surv(time,status) ~ as.factor(trt) + lvef + creatinine + potassium + age + as.factor(gender) + as.factor(ace_inhibitors), data=df)
summary(cox)

#### Testing proportional Hazards assumption

In [None]:
test.ph <- cox.zph(cox)
test.ph

In [None]:
ggcoxzph(test.ph, var = 'as.factor(trt)')

In [None]:
# Load necessary library
library(survival)

# Calculate Martingale residuals
Y <- resid(cox, type = "martingale")

# List of continuous predictors
predictors <- c("age", "lvef", "creatinine", "potassium")

# Set up a 2x2 plotting layout
par(mfrow = c(2, 2), mar = c(4, 4, 2, 1) + 0.1, cex = 1.2, mgp = c(2, 0.5, 0))  

# Create plots
for (predictor in predictors) {
  X <- df[[predictor]]
  
  # Create the scatter plot
  plot(X, Y, pch = 20, col = "darkgrey",
       ylab = "Martingale Residuals",
       xlab = predictor,
       main = paste("Residuals vs.", predictor),
       cex.lab = 1, cex.main = 1, cex.axis = 1)
  
  # Add horizontal line at zero
  abline(h = 0, col = "black", lty = 2)
  
  # Add smoothed spline with confidence intervals
  smooth_fit <- smooth.spline(X, Y, df = 7)
  lines(smooth_fit, lty = 2, lwd = 2, col = "black")
  

}

# Reset par settings if needed
par(mfrow = c(1, 1))

# Cox Proportional Hazards Model (adjusting for confounders using IPTW) - non stabilized weights

### Fit a propensity score model to estimate the denominator of the weights 

In [None]:
# Estimation of propensity scores with a logistic model
ps.treatment <- glm(formula = trt==1 ~  lvef + creatinine + potassium + age + as.factor(gender) +  as.factor(ace_inhibitors),
                family = binomial(link = 'logit'),
                data = df)

summary(ps.treatment)

In [None]:
#  Estimate propensity scores
df$ps_treatment <- predict(ps.treatment, df, type = "response")

In [None]:
head(df)

In [None]:
summary(df$ps_treatment[df$trt==1])
summary(df$ps_treatment[df$trt==0])

In [None]:
# Mirror plot that encompasses the entire participant population

mirror.plot1 <- ggplot(subset(df, trt == 1),
                       aes(x = ps_treatment, fill = factor(trt))) +
                       geom_histogram(aes(y = after_stat(density))) +
                       geom_histogram(data = subset(df, trt == 0),
                       aes(x = ps_treatment, y = after_stat(density), fill = factor(trt))) +
                       ylab("Density (%)") +
                       xlab("Probability of Receiving Spironolactone") +
                       ggtitle("Propensity Scores in Treated and Untreated\n(Density Histogram)") +
                       scale_fill_discrete(name = "Treatment")  

mirror.plot1

In [None]:
mirror.plot1 <- ggplot(data = df, aes(x = ps_treatment, fill = factor(trt))) +
  geom_histogram(data = subset(df, trt == 1), binwidth = 0.05, position = "identity", alpha = 0.5) +
  geom_histogram(data = subset(df, trt == 0), binwidth = 0.05, position = "identity", alpha = 0.5) +
  facet_grid(.~trt) +
  ylab("Density (%)") +
  xlab("Probability of Receiving Spironolactone") +
  ggtitle("Propensity Scores in Treated and Untreated\n(Density Histogram)") +
  scale_fill_discrete(name = "Treatment")

mirror.plot1

### Estimate the inverse probability weights 

In [None]:
df$w_a <- ifelse(df$trt==1,
                 1/df$ps_treatment,
                 1/(1-df$ps_treatment))

In [None]:
# Min, 25th percentile, median, mean, SD, 75th percentile, and max 
summary(df$w_a)
sd(df$w_a)

In [None]:
# Create the histogram
hist(df$w_a, main = "IPTW Weights Distribution", xlab = "IPTW Weights")

###  Assessing the balance of covariates between treatment groups 

In [None]:
covariates <- subset(df, select = c("age", "gender","creatinine", "lvef", "potassium", "ace_inhibitors"))
bal.tab(covariates, treat =df$trt, weights=df$w_a, un=TRUE)

###  Fit th weighted Cox proportional hazards model


In [None]:
# Fit Weighted Cox Model 

fit1 <- coxphw(Surv(time, status) ~ as.factor(trt), 
               data = df,
               template = 'PH',
               caseweights = df$w_a)

summary(fit1)

In [None]:
cox_model <- coxph(Surv(time, status) ~ as.factor(trt), data = df, weights = df$w_a)
summary(cox_model)

#  From a conditional HR to a marginal HR

In [None]:
# Fit Cox proportional hazards model
cox_model <- coxph(Surv(time, status) ~  as.factor(trt) + lvef + creatinine + potassium + age + as.factor(gender) + as.factor(ace_inhibitors), data = df)

# Create synthetic population 
synthetic_data <- df  

# Simulate risk scores under each treatment scenario
synthetic_data$trt <- 1  # Simulating under treatment scenario
risk_scores_treatment <- predict(cox_model, newdata = synthetic_data, type ='risk')

synthetic_data$trt <- 0  # Simulating under no-treatment scenario
risk_scores_no_treatment <- predict(cox_model, newdata = synthetic_data, type ='risk')

# Calculate the marginal hazard ratio 
marginal_HR <- exp(mean(risk_scores_treatment) - mean(risk_scores_no_treatment))

marginal_HR

In [None]:
mean(risk_scores_treatment)

In [None]:
mean(risk_scores_no_treatment)

In [None]:
library(boot)

In [None]:
marginal_HR_boot <- function(data, indices) {
  # Select the bootstrap sample
  d <- data[indices, ]
  
  # Fit the Cox proportional hazards model
  cox_model <- coxph(Surv(time, status) ~ as.factor(trt) + lvef + creatinine + potassium + age + as.factor(gender) + as.factor(ace_inhibitors), data = d)
  
  # Create synthetic population
  synthetic_data <- d
  
  # Simulate risk scores under each treatment scenario
  synthetic_data$trt <- 1  # Simulating under treatment scenario
  risk_scores_treatment <- predict(cox_model, newdata = synthetic_data, type = 'risk')
  
  synthetic_data$trt <- 0  # Simulating under no-treatment scenario
  risk_scores_no_treatment <- predict(cox_model, newdata = synthetic_data, type = 'risk')
  
  # Calculate the marginal hazard ratio
  marginal_HR <- exp(mean(risk_scores_treatment) - mean(risk_scores_no_treatment))
  
  return(marginal_HR)
}

# Run 2000 bootstrap samples
set.seed(865)
boot_results <- boot(data = df, statistic = marginal_HR_boot, R = 2000)

# Compute the bootstrapped confidence intervals
boot_ci <- boot.ci(boot_results, type = "perc")

# Print the results
print("Bootstrapped Marginal Hazard Ratio:")
print(boot_results$t0)  # Original estimate
print("Bootstrapped Confidence Intervals:")
print(boot_ci)