In [85]:
library(magrittr)

#### Edit MRD and insert TTE column 

In [7]:
RPATH1 <- "data/GSE67684/processed/metadata/metadata-label_mrd_subtype.tsv"
RPATH2 <- "data/GSE67684/processed/metadata/metadata-edited_labels.tsv"
RPATH3 <- "data/GSE67684/processed/metadata/lab_id.tsv"
RPATH4 <- "data/GSE67684/README/survival_data.tsv"

metadata <- read.table(RPATH1, sep = "\t", header = T)
mrd_info <- read.table(RPATH2, sep = "\t", header = T)
id <- read.table(RPATH3, sep = "\t", header = T, row.names = 2)
tte <- read.table(RPATH4, sep = "\t", header = T, row.names = 1)

In [24]:
# Edit MRD values
mrd <- as.character(mrd_info$d33_mrd)
mrd[mrd %in% c("<1x10-4(+)", "5E-05", "1E-05")] <- 1E-6
mrd[mrd == "<1x10-4"] <- 1E-8
metadata$d33_mrd <- as.numeric(mrd)

# Insert time-to-event column
lab_id <- as.character(id[rownames(metadata), ])
metadata1 <- cbind(metadata, years = tte[lab_id, "years"])
metadata2 <- metadata1[, c(1:5,8,6,7)]

In [27]:
# Just need one file instead of two!!
WPATH <- "data/GSE67684/processed/metadata/pid-mrd_label_tte_subtype.tsv"
write.table(metadata2, WPATH, quote = F, sep = "\t")

### V4
- Added BMT (bone marrow transplant) column

### V5
- Added extra comments to status column
- Added maspore and treatment columns (minor changes in maspore column from original: "as per 2003" changed to 2003)

### V6
- Added "treatment_processed" column
    1. SR
    2. IR
    3. HR1
        - HR subtypes (BCR-ABL1, MLL, Hypodiploid)
        - PPR
    4. HR2
    5. BMT
- Added "treatment" column (treatment + BMT)

Note: Down syndrome (SR), MLL: Patients classified as IR are not changed.

In [247]:
RPATH <- "../data/GSE67684/processed/metadata/pid-metadata_v5.tsv"
annot_v5 <- read.table(
  RPATH, header = T, sep = "\t", quote='"',
  row.names = 1, strip.white = T, stringsAsFactors = F
)

In [248]:
treatment_processed <- substring(annot_v5$treatment_raw, 1, 2)
annot_v5 <- cbind(
  annot_v5[, 1:4],
  treatment_processed,
  annot_v5[, 5:11],
  stringsAsFactors = F
)

# Assign HR (PPR) -> HR1
annot_v5$treatment_processed[annot_v5$treatment_raw == "HR (PPR)"] <- "HR1"
# Assign HR subtypes and "treatment_raw" that starts with HR -> HR1
hr_subtypes <- subset(
  annot_v5,
  subtype %in% c("BCR-ABL", "MLL", "Hypodiploid") &
  startsWith(treatment_raw, "HR"),
  select = c("subtype", "treatment_raw", "treatment_processed")
)
annot_v5[rownames(hr_subtypes), "treatment_processed"] <- "HR1"
annot_v5$treatment_processed[annot_v5$treatment_processed == "HR"] <- "HR2"

In [249]:
treatment <- annot_v5$treatment_processed
treatment[annot_v5$BMT] <- "BMT"
annot_v6 <- cbind(
  annot_v5[, 1:5],
  treatment,
  annot_v5[, 6:12],
  stringsAsFactors = F
)

In [254]:
# subset(
#   annot_v6,
#   treatment_processed %in% c("HR1", "HR2"),
#   select = c("subtype", "treatment_raw", "treatment_processed")
# )

In [255]:
WPATH <- "../data/GSE67684/processed/metadata/pid-metadata_v6.tsv"
write.table(annot_v6, WPATH, quote = F, sep = "\t")

### V7
- Add in Week 12 MRD column (same changes as Day 33 MRD)
- <1x10-3 (+): NA
- <1x10-3: NA
- <5x10-4 (+): NA
- <5x10-4: NA
- <1x10-4 (+): 10^-6
- Between 10^-5 and 10^-4: 10^-6
- 1x10-5: 10^-8
- <1x10-4: 10^-8
- <1x10-5: 10^-8
- <5x10-6: 10^-8

In [258]:
rpath <- "../data/GSE67684/processed/metadata/pid-metadata_v6.tsv"
annot_v6 <- read.table(rpath, sep = "\t", header = T, row.names = 1, quote='"')

In [259]:
rpath <- "../data/GSE67684/README/patient_metadata_v3.txt"
raw_v3 <- read.table(
  rpath, sep = "\t", header = T, row.names = 2,
  strip.white = T, comment.char = "", quote='"'
)

In [260]:
wk12_mrd <- as.character(raw_v3[, 4])

# Replace matched patterns
mrd_levels1 <- c("<1x10-3", "<1x10-3 (+)", "<5x10-4", "<5x10-4 (+)")
mrd_levels2 <- c("<1x10-4 (+)", "<1x10-4(+)")
mrd_levels3 <- c("<1x10-4", "<1x10-5", "<5x10-6")
wk12_mrd[wk12_mrd %in% mrd_levels1] <- NA
wk12_mrd[wk12_mrd %in% mrd_levels2] <- 1E-6
wk12_mrd[wk12_mrd %in% mrd_levels3] <- 1E-8
# Standardise all NA MRD
wk12_mrd[substring(wk12_mrd, 1, 2) == "NA"] <- NA

# Change to compatible exponential notation
idx <- grepl("x10", wk12_mrd)
wk12_mrd[idx] <- sub("x10", "e", wk12_mrd[idx])

wk12_mrd <- as.numeric(wk12_mrd)

In [264]:
annot_v7 <- cbind(
  annot_v6[, 1:2],
  wk12_mrd,
  wk12_mrd_raw = raw_v3[, 4],
  annot_v6[, 3:13]
)

In [265]:
wpath <- "../data/GSE67684/processed/metadata/pid-metadata_v7.tsv"
write.table(annot_v7, wpath, quote = F, sep = "\t")

In [269]:
# Assign HR1 (subtype)
hr1_subtype <- subset(
  annot_v7,
  subtype %in% c("BCR-ABL", "MLL", "Hypodiploid"),
  c("subtype", "treatment_raw", "BMT", "treatment")
)