### Install libraries

In [None]:
#library(ggfortify)
#install.packges("xtable") for latex table

### Load libraries

In [None]:
library(aftsem)
library(survival)
library(ggplot2)
library(ggfortify)
library(xtable)

### Load our data

In [None]:
load("stan.RData") # change if necessary

In [None]:
# lets look at the first few observations
head(stan)

### Simple visualizations
<ul>
    <li>Look at <b>age</b> covariate and compare it to survival time</li>
    <li>Look at <b>t5</b> covariate and compare it to survival time</li>
</ul>    

In [None]:
# plot age on x-axis and time on y-axis
plot1<-ggplot(stan, aes(x = age, y = time, shape = factor(status))) +
  geom_point(aes(shape = ifelse(status == 1, "x", "o")), size = 4) +
  scale_shape_manual(values = c("o" = 16, "x" = 4)) + # shape 16 is a circle, shape 4 is an x
  labs(title = "", x = "Age", y = "Time", shape = "Status") +
  theme_minimal() +
  theme(legend.position = "none")

ggsave("plot_age_time.png", plot1, width = 8, height = 6, dpi = 300)

In [None]:
# plot t5 on x-axis and time on y-axis
plot2<-ggplot(stan, aes(x = t5, y = time, shape = factor(status))) +
  geom_point(aes(shape = ifelse(status == 1, "x", "o")), size = 4) +
  scale_shape_manual(values = c("o" = 16, "x" = 4)) + # shape 16 is a circle, shape 4 is an x
  labs(title = "", x = "T5", y = "Time", shape = "Status") +
  theme_minimal() +
  theme(legend.position = "none")
ggsave("plot_t5_time.png", plot2, width = 8, height = 6, dpi = 300)

### Look at some important observations
We are interested in
<ul>
    <li>Maximal observed survival time</li>
    <li>Minimal observed survival time</li>
    <li>Maximal observed age</li>
    <li>Minimal observed age </li>
    <li>Maximal observed t5</li>
    <li>Minimal observed t5</li>
    <li>Mean of age</li>
    <li>Mean of t5</li>
    <li>Percent of censoring</li>
</ul>    

In [None]:
print(max(stan$time))
print(min(stan$time))
print(max(stan$age))
print(min(stan$age))
print(max(stan$t5))
print(min(stan$t5))

In [None]:
print(mean(stan$age))
print(mean(stan$t5))

In [None]:
# number of censored
print(157 - sum(stan$status))

In [None]:
# oldest pacient
row_index <- which(stan$age == 64)
specific_row <- stan[row_index, ]
print(specific_row)

In [None]:
#youngest pacient
row_index <- which(stan$age == 12)
specific_row <- stan[row_index, ]
print(specific_row)

In [None]:
#lowest t5
row_index <- which(stan$t5 == 0)
specific_row <- stan[row_index, ]
print(specific_row)

In [None]:
#highest t5
row_index <- which(stan$t5 == 3.05)
specific_row <- stan[row_index, ]
print(specific_row)

In [None]:
#highest time
row_index <- which(stan$time == 3695)
specific_row <- stan[row_index, ]
print(specific_row)

In [None]:
#lowest time
row_index <- which(stan$time == 1)
specific_row <- stan[row_index, ]
print(specific_row)

### Kaplan-Meier survival curve

That gives of non-parametric survival function estimation

In [None]:
png("survival_plot_with_lines.png", width = 800, height = 600) # save config
fit.stan = survfit(Surv(stan$time, stan$status) ~ 1) # we are using implementation from package survival

plot(fit.stan, xlab="time", ylab="Funkce přežití", col="black", cex.lab=1.5, cex.axis=1.5, cex.main=1.5, lwd=2)
abline(h = 0.5, col = "red", lwd = 2)
time_at_50_percent <- 1000 
segments(0, 0.5, time_at_50_percent, 0.5, col = "red", lwd = 2)

# now add a vertical line at the same x-value up to the point of intersection with the survival curve
# since the survival plot doesn't typically extend below 0, the y0 argument is set to 0
segments(time_at_50_percent, 0, time_at_50_percent, 0.5, col = "red", lwd = 2)

### Semiparametric AFT model

<b>Lets use our implemented model!!</b> <br>
The next cells show the usage of package <b>aftsem</b>

### --------------------------------------------------
#### First model => log(T)  = age + t5 + eps
### --------------------------------------------------

In [None]:
# fit call, we are specifiing arguments formula, method and resample
fit <- aftsem(Surv(log(stan$time),stan$status) ~ stan$age + stan$t5, method = "jin", resample = 500)

In [None]:
# lets look at model summary
summary(fit)

In [None]:
# what was the initial beta guess?
print(fit$betafirst)

In [None]:
#fit$resid

In [None]:
# now lets use gehan-heller method, notice that we must change the control list if we want to get variance estimation
fit2 <- aftsem(Surv(log(stan$time),stan$status) ~ stan$age + stan$t5, method = "gehan-heller", control = list(variance.estimation = TRUE, use.grad = FALSE, optimx.alg = "BFGS"))

In [None]:
# lets look at model summary using hellers
summary(fit2)

In [None]:
# get estimated regression parameters from all methods
# we are using default settings, that means binit = "auto" and no variance estimation
b1 <- aftsem(Surv(log(stan$time),stan$status) ~ stan$age + stan$t5, method = "gehan-poly")$beta
b2 <- aftsem(Surv(log(stan$time),stan$status) ~ stan$age + stan$t5, method = "gehan-heller", binit = "gehan")$beta
b3 <- aftsem(Surv(log(stan$time),stan$status) ~ stan$age + stan$t5, method = "gehan")$beta
b4 <- aftsem(Surv(log(stan$time),stan$status) ~ stan$age + stan$t5, method = "jin")$beta
b5 <- aftsem(Surv(log(stan$time),stan$status) ~ stan$age + stan$t5, method = "buckley")$beta

In [None]:
# save our results to dataframe
res <- data.frame(
  `gehan-poly` = as.vector(b1),
  `gehan-heller` = as.vector(b2),
  gehan = as.vector(b3),
  jin = as.vector(b4),
  buckley = as.vector(b5)
)

In [None]:
# save to latex, no need to execute this cell
latex_table <- xtable(res)
print(latex_table, include.rownames = FALSE, hline.after = c(-1, 0, nrow(res)), comment = FALSE)

### --------------------------------------------------
#### Second model => log(T)  = age + age^2 + eps
### --------------------------------------------------

In [None]:
# fit of our second model
fit3 <- aftsem(Surv(log(time),status) ~ age + I(age^2), data = stan, method = "gehan-heller", binit="gehan", control = list(variance.estimation = TRUE, use.grad = FALSE, optimx.alg = "BFGS", quantile.method = "br"))

In [None]:
summary(fit3)

In [None]:
# again, we will save estimated regression parameters from all of the methods
# we are still using default settings
b1 <- aftsem(Surv(log(time),status) ~ age + I(age^2),data = stan, method = "gehan-poly")$beta
b2 <- aftsem(Surv(log(time),status) ~ age + I(age^2),data = stan,  method = "gehan-heller", binit = "gehan")$beta
b3 <- aftsem(Surv(log(time),status) ~ age + I(age^2),data = stan,  method = "gehan")$beta
b4 <- aftsem(Surv(log(time),status) ~ age + I(age^2),data = stan,  method = "jin")$beta
b5 <- aftsem(Surv(log(time),status) ~ age + I(age^2),data = stan,  method = "buckley")$beta

In [None]:
# save to dataframe
res <- data.frame(
  `gehan-poly` = as.vector(b1),
  `gehan-heller` = as.vector(b2),
  gehan = as.vector(b3),
  jin = as.vector(b4),
  buckley = as.vector(b5)
)

In [None]:
#save to latex
latex_table <- xtable(res)
print(latex_table, include.rownames = FALSE, hline.after = c(-1, 0, nrow(res)), comment = FALSE)