# Draw ROC curve

## EBV+MSI vs other, with data augmentation by random color change and blurring

In [None]:
library(caret)
library(openxlsx)
library(pROC)

df <- read.xlsx("../results/prediction/training2/tma.xlsx", sheet = "EBV+MSI vs other")
label <- ifelse((df$Molecular3 == "EBV") | (df$Molecular3 == "MSI"), 1, 0)
df <- cbind(df, label)
ground_truth = factor(ifelse(df$label == 1, "EBV+MSI", "other"))
df <- cbind(df, ground_truth)
predicted_label = factor(ifelse(df$Prediction >= 0.5, "EBV+MSI", "other"))
df <- cbind(df, predicted_label)

confusionMatrix(df$predicted_label, df$ground_truth)

ROC <- roc(label ~ Prediction, data = df, ci = TRUE)
print(ROC)

plot(ROC, legacy.axes = TRUE, print.auc = TRUE)

## EBV vs MSI vs other, with data augmentation by random color change and blurring

In [None]:
library(caret)
library(openxlsx)
library(pROC)

df <- read.xlsx("../results/prediction/training2/tma.xlsx", sheet = "EBV vs MSI vs other")
label <- ifelse((df$Molecular3 == "EBV") | (df$Molecular3 == "MSI"), 1, 0)
df <- cbind(df, label)
ebv_msi <- 1.0 - df$other
df <- cbind(df, ebv_msi)

df2 <- data.frame(df$EBV, df$MSI, df$other)

ground_truth = factor(ifelse(df$label == 1, "EBV+MSI", "other"))
df <- cbind(df, ground_truth)
predicted_label = apply(df2, 1, which.max)
predicted_label = factor(ifelse((predicted_label == 1) | (predicted_label == 2),
        "EBV+MSI", "other"))
df <- cbind(df, predicted_label)

confusionMatrix(df$predicted_label, df$ground_truth)

ROC <- roc(label ~ ebv_msi, data = df, ci = TRUE)
print(ROC)

plot(ROC, legacy.axes = TRUE, print.auc = TRUE)

## EBV vs MSI + other, with data augmentation by random color change and blurring

In [None]:
library(caret)
library(openxlsx)
library(pROC)

df <- read.xlsx("../results/prediction/training2/tma.xlsx", sheet = "EBV vs MSI+other")
label <- ifelse(df$Molecular3 == "EBV", 1, 0)
df <- cbind(df, label)
ground_truth = factor(ifelse(df$label == 1, "EBV", "MSI+other"))
df <- cbind(df, ground_truth)
predicted_label = factor(ifelse(df$Prediction >= 0.5, "EBV", "MSI+other"))
df <- cbind(df, predicted_label)

confusionMatrix(df$predicted_label, df$ground_truth)
ROC <- roc(label ~ Prediction, data = df, ci = TRUE)
print(ROC)

plot(ROC, legacy.axes = TRUE, print.auc = TRUE)

## MSI vs EBV + other, with data augmentation by random color change and blurring

In [None]:
library(caret)
library(openxlsx)
library(pROC)

df <- read.xlsx("../results/prediction/training2/tma.xlsx", sheet = "MSI vs EBV+other")
label <- ifelse(df$Molecular3 == "MSI", 1, 0)
df <- cbind(df, label)
ground_truth = factor(ifelse(df$label == 1, "MSI", "EBV+other"))
df <- cbind(df, ground_truth)
predicted_label = factor(ifelse(df$Prediction >= 0.5, "MSI", "EBV+other"))
df <- cbind(df, predicted_label)

confusionMatrix(df$predicted_label, df$ground_truth, positive="MSI")

ROC <- roc(label ~ Prediction, data = df, ci = TRUE)
print(ROC)

plot(ROC, legacy.axes = TRUE, print.auc = TRUE)

## Combination of EBV vs MSI + other and MSI vs EBV + other, with data augmentation by random color change and blurring

In [None]:
library(caret)
library(openxlsx)

dfebv <- read.xlsx("../results/prediction/training2/tma.xlsx", sheet = "EBV vs MSI+other")
dfmsi <- read.xlsx("../results/prediction/training2/tma.xlsx", sheet = "MSI vs EBV+other")
id <- dfmsi$id
Prediction_MSI <- dfmsi$Prediction
dfmsi <- data.frame(id, Prediction_MSI)
df <- merge(dfebv, dfmsi, by = "id")

label <- ifelse((df$Molecular3 == "EBV") | (df$Molecular3 == "MSI"), 1, 0)
df <- cbind(df, label)

ground_truth = factor(ifelse(df$label == 1, "EBV+MSI", "other"))
df <- cbind(df, ground_truth)
predicted_label = factor(ifelse(df$Prediction >= 0.5 | df$Prediction_MSI >= 0.5,
        "EBV+MSI", "other"))
df <- cbind(df, predicted_label)

confusionMatrix(df$predicted_label, df$ground_truth)

## EBV + MSI vs other, with data augmentation by random color change and blurring
### TCGA cases

In [None]:
library(caret)
library(openxlsx)
library(pROC)

df <- read.xlsx("../results/prediction/training3/tcga.xlsx", sheet = "with aug no tcga")
label <- ifelse((df$Molecular.Subtype == "EBV") | (df$Molecular.Subtype == "MSI"), 1, 0)
df <- cbind(df, label)
ground_truth = factor(ifelse(df$label == 1, "EBV+MSI", "other"))
df <- cbind(df, ground_truth)
predicted_label = factor(ifelse(df$Prediction >= 0.5, "EBV+MSI", "other"))
df <- cbind(df, predicted_label)

confusionMatrix(df$predicted_label, df$ground_truth)

ROC <- roc(label ~ Prediction, data = df, ci = TRUE)
print(ROC)

plot(ROC, legacy.axes = TRUE, print.auc = TRUE)

## EBV + MSI vs other, with data augmentation by random color change and blurring and a part of TCGA cases used for training

### UTokyo cases

In [None]:
library(caret)
library(openxlsx)
library(pROC)

df <- read.xlsx("../results/prediction/training3/tma.xlsx", sheet = "with aug with tcga")
label <- ifelse((df$Molecular3 == "EBV") | (df$Molecular3 == "MSI"), 1, 0)
df <- cbind(df, label)
ground_truth = factor(ifelse(df$label == 1, "EBV+MSI", "other"))
df <- cbind(df, ground_truth)
predicted_label = factor(ifelse(df$Prediction >= 0.5, "EBV+MSI", "other"))
df <- cbind(df, predicted_label)

confusionMatrix(df$predicted_label, df$ground_truth)

ROC <- roc(label ~ Prediction, data = df, ci = TRUE)
print(ROC)

plot(ROC, legacy.axes = TRUE, print.auc = TRUE)

tiff(filename="../results/roc_curve/utokyo.tiff", width=3.4, height=3.4, units="in", res=500)
plot(ROC, legacy.axes = TRUE, print.auc = TRUE, print.auc.x = 0.95)
dev.off()

### TCGA cases

In [None]:
library(caret)
library(openxlsx)
library(pROC)

df <- read.xlsx("../results/prediction/training3/tcga.xlsx", sheet = "with aug with tcga")
label <- ifelse((df$Molecular.Subtype == "EBV") | (df$Molecular.Subtype == "MSI"), 1, 0)
df <- cbind(df, label)
ground_truth = factor(ifelse(df$label == 1, "EBV+MSI", "other"))
df <- cbind(df, ground_truth)
predicted_label = factor(ifelse(df$Prediction >= 0.5, "EBV+MSI", "other"))
df <- cbind(df, predicted_label)

confusionMatrix(df$predicted_label, df$ground_truth)

ROC <- roc(label ~ Prediction, data = df, ci = TRUE)
print(ROC)

plot(ROC, legacy.axes = TRUE, print.auc = TRUE)

tiff(filename="../results/roc_curve/tcga.tiff", width=3.4, height=3.4, units="in", res=500)
plot(ROC, legacy.axes = TRUE, print.auc = TRUE, print.auc.x = 0.95)
dev.off()

## EBV + MSI vs other, without data augmentation by random color change and blurring

### UTokyo cases

In [None]:
library(caret)
library(openxlsx)
library(pROC)

df <- read.xlsx("../results/prediction/training3/tma.xlsx", sheet = "no aug no tcga")
label <- ifelse((df$Molecular3 == "EBV") | (df$Molecular3 == "MSI"), 1, 0)
df <- cbind(df, label)
ground_truth = factor(ifelse(df$label == 1, "EBV+MSI", "other"))
df <- cbind(df, ground_truth)
predicted_label = factor(ifelse(df$Prediction >= 0.5, "EBV+MSI", "other"))
df <- cbind(df, predicted_label)

confusionMatrix(df$predicted_label, df$ground_truth)

ROC <- roc(label ~ Prediction, data = df, ci = TRUE)
print(ROC)

plot(ROC, legacy.axes = TRUE, print.auc = TRUE)

### TCGA cases

In [None]:
library(caret)
library(openxlsx)
library(pROC)

df <- read.xlsx("../results/prediction/training3/tcga.xlsx", sheet = "no aug no tcga")
label <- ifelse((df$Molecular.Subtype == "EBV") | (df$Molecular.Subtype == "MSI"), 1, 0)
df <- cbind(df, label)
ground_truth = factor(ifelse(df$label == 1, "EBV+MSI", "other"))
df <- cbind(df, ground_truth)
predicted_label = factor(ifelse(df$Prediction >= 0.5, "EBV+MSI", "other"))
df <- cbind(df, predicted_label)

confusionMatrix(df$predicted_label, df$ground_truth)

ROC <- roc(label ~ Prediction, data = df, ci = TRUE)
print(ROC)

plot(ROC, legacy.axes = TRUE, print.auc = TRUE)

## EBV + MSI vs other, without data augmentation by random color change and blurring, and with a part of TCGA cases used for training

### UTokyo cases

In [None]:
library(caret)
library(openxlsx)
library(pROC)

df <- read.xlsx("../results/prediction/training3/tma.xlsx", sheet = "no aug with tcga")
label <- ifelse((df$Molecular3 == "EBV") | (df$Molecular3 == "MSI"), 1, 0)
df <- cbind(df, label)
ground_truth = factor(ifelse(df$label == 1, "EBV+MSI", "other"))
df <- cbind(df, ground_truth)
predicted_label = factor(ifelse(df$Prediction >= 0.5, "EBV+MSI", "other"))
df <- cbind(df, predicted_label)

confusionMatrix(df$predicted_label, df$ground_truth)

ROC <- roc(label ~ Prediction, data = df, ci = TRUE)
print(ROC)

plot(ROC, legacy.axes = TRUE, print.auc = TRUE)

### TCGA cases

In [None]:
library(caret)
library(openxlsx)
library(pROC)

df <- read.xlsx("../results/prediction/training3/tcga.xlsx", sheet = "no aug with tcga")
label <- ifelse((df$Molecular.Subtype == "EBV") | (df$Molecular.Subtype == "MSI"), 1, 0)
df <- cbind(df, label)
ground_truth = factor(ifelse(df$label == 1, "EBV+MSI", "other"))
df <- cbind(df, ground_truth)
predicted_label = factor(ifelse(df$Prediction >= 0.5, "EBV+MSI", "other"))
df <- cbind(df, predicted_label)

confusionMatrix(df$predicted_label, df$ground_truth)

ROC <- roc(label ~ Prediction, data = df, ci = TRUE)
print(ROC)

plot(ROC, legacy.axes = TRUE, print.auc = TRUE)

## EBV vs MSI vs other, with data augmentation by random color change and blurring, and with a part of TCGA cases used for training

### TCGA cases

In [None]:
library(caret)
library(openxlsx)
library(pROC)

df <- read.xlsx("../results/prediction/training4/tcga.xlsx", sheet = "with aug with tcga")
label3 <- factor(ifelse((df$Molecular.Subtype == "EBV") | (df$Molecular.Subtype == "MSI"),
        df$Molecular.Subtype, "other"))
df <- cbind(df, label3)
df2 <- data.frame(df$EBV, df$MSI, df$other)
predicted_label3 <- apply(df2, 1, which.max)
predicted_label3 <- factor(ifelse(predicted_label3 == 1, "EBV", ifelse(predicted_label3 == 2,
        "MSI", "other")))
df <- cbind(df, predicted_label3)

confusionMatrix(df$predicted_label3, df$label3)

label <- ifelse((df$Molecular.Subtype == "EBV") | (df$Molecular.Subtype == "MSI"), 1, 0)
df <- cbind(df, label)
ebv_msi <- 1.0 - df$other
df <- cbind(df, ebv_msi)

ground_truth <- factor(ifelse(df$label == 1, "EBV+MSI", "other"))
df <- cbind(df, ground_truth)
predicted_label <- apply(df2, 1, which.max)
predicted_label <- factor(ifelse((predicted_label == 1) | (predicted_label == 2),
        "EBV+MSI", "other"))
df <- cbind(df, predicted_label)

confusionMatrix(df$predicted_label, df$ground_truth)

ROC <- roc(label ~ ebv_msi, data = df, ci = TRUE)
print(ROC)

plot(ROC, legacy.axes = TRUE, print.auc = TRUE)