In [None]:
library(tidyverse)

# Combine all drug categories

In [None]:
data = do.call(
  rbind,
  lapply(
    c("C02", "C03", "C07", "C08", "C09"),
#    c("C10AA", "C10AB", "C10AC", "C10AD", "C10AX06", "C10AX09"),
#    c("A10A", "A10BA", "A10BB", "A10BF", "A10BG", "A10BH", "A10BJ", "A10BK"),
    function (x) {
      foo = readRDS(paste0("scriptseries.", x, ".rds"))
      foo$drug = x
      return(foo)
    }))

In [None]:
foo = read_tsv(
    "hypertension_srWGS.earliest_issue_date.txt"
#    "lipidaemia_srWGS_withC10B.earliest_issue_date.txt"
#    "diabetes_srWGS.earliest_issue_date.txt"
)
foo = foo[!is.na(foo$earliest_issue_date), ]
foo = foo[foo$PERSON_ID %in% data$person_id, ]

data = rbind(
  data.frame(
    person_id = foo$PERSON_ID,
    scriptseries = 1,
    start = foo$earliest_issue_date,
    stop = foo$earliest_issue_date,
    drug = "earliest_issue_date"),
  data)

In [None]:
library(dtplyr)

data <- as_tibble(data)
data <- data %>%
  lazy_dt() %>%
  arrange(start, stop) %>%
  group_by(person_id) %>%
  mutate(
#    preceding = start - lag(stop),
    preceding = as.numeric(start) - cummax(lag(as.numeric(stop), default=-Inf)), #numeric
#    following = lead(start) - stop
    following = lead(start, default=Inf) - stop #difftime
  ) %>%
  as_tibble()

In [None]:
# antihypertensive, hypolipidemics
mindrugfree    = 180 # drug-free period before baseline-measurement
maxbeforestart = 730 # baseline-measurement to drug-start
minafterstart  =  28 # drug-start to post-measurement
maxafterstart  = 730 # drug-start to post-measurement

In [None]:
# antidiabetic
mindrugfree    = 180 # drug-free period before baseline-measurement
maxbeforestart = 730 # baseline-measurement to drug-start
minafterstart  =  90 # drug-start to post-measurement
maxafterstart  = 730 # drug-start to post-measurement

In [None]:
data2 = data %>%
  filter(drug != 'earliest_issue_date') %>%
#  filter(!is.na(preceding) & as.integer(preceding) >= mindrugfree) %>%
  filter(preceding >= mindrugfree) %>%
#  filter(as.integer(stop + pmin(following, 0, na.rm=TRUE) - start) >= minafterstart)
  filter(as.integer(stop + pmin(following, 0) - start) >= minafterstart)

# Combine with observation

In [None]:
obs = read.table(
    "DBPSBP_srWGS.SBP.txt",
#    "HDLLDL_srWGS.LDL.txt",
#    "HbA1c_srWGS.HbA1c.txt",
    header=TRUE, sep="\t") %>%
    mutate(measurement_datetime = as.Date(measurement_datetime))

In [None]:
obs = obs %>%
  group_by(person_id, measurement_datetime) %>%
  summarize(
    value_as_number=mean(value_as_number),
   .groups="drop")

In [None]:
head(obs)

In [None]:
head(data2)

In [None]:
# baseline-measurement
data3 = inner_join(data2, obs, by='person_id', relationship="many-to-many")

data3 = data3 %>%
#  filter(as.integer(measurement_datetime - start + preceding) >= mindrugfree) %>%
  filter(as.integer(measurement_datetime - start) + preceding >= mindrugfree) %>%
  filter(as.integer(start - measurement_datetime) <= maxbeforestart) %>%
  filter(measurement_datetime <= start)

data3 = data3 %>%
  mutate(w = (maxbeforestart - as.integer(start - measurement_datetime) + 1)^1) %>%
  group_by(person_id, scriptseries, start, stop, drug, preceding, following) %>%
  summarize(
    # value.drugfree=mean(value),
    value.drugfree=sum(value_as_number * w) / sum(w),
    n.drugfree=n(),
    # n.drugfree=sum(w),
    actualminbeforestart=min(as.integer(start - measurement_datetime)),
    .groups="drop")

# post-measurement
data4 = inner_join(data3, obs, by='person_id', relationship="many-to-many")

data4 = data4 %>%
  filter(as.integer(measurement_datetime - start) >= minafterstart) %>%
#  filter(measurement_datetime <= stop + pmin(0, following, na.rm=TRUE)) %>%
  filter(measurement_datetime <= stop + pmin(0, following)) %>%
  filter(as.integer(measurement_datetime - start) <= maxafterstart)

data4 = data4 %>%
  mutate(w = (maxafterstart - as.integer(measurement_datetime - start) + 1)^1) %>%
  group_by(person_id, scriptseries, start, stop, drug, preceding, following,
           value.drugfree, n.drugfree, actualminbeforestart) %>%
  summarize(
    # value.drugtake=mean(value),
    value.drugtake=sum(value_as_number * w) / sum(w),
    n.drugtake=n(),
    # n.drugtake=sum(w),
    actualminafterstart=min(as.integer(measurement_datetime - start)),
    .groups="drop")

# delta
data4 = data4 %>%
  mutate(value.delta = value.drugtake - value.drugfree)

In [None]:
head(data4)

In [None]:
dim(data4)
table(data4$drug)

data4 %>%
  group_by(drug) %>%
  summarize(
    n=n(),
    base=mean(value.drugfree), base_sd=sd(value.drugfree),
    post=mean(value.drugtake), post_sd=sd(value.drugtake),
    delta=mean(value.delta),   delta_sd=sd(value.delta))

In [None]:
write.table(
  data4,
  file="antihypertensive.SBP.minafterstart28.wpower1.txt",
  #file="hypolipidemics.LDL.minafterstart28.wpower1.txt",
  #file="antidiabetic.HbA1c.wpower1.txt",
  sep="\t",
  row.names=FALSE,
  quote=FALSE)