# Assess the correlation between prediction and tumor mutational burden (EBV + MSI vs other)

In [None]:
library(openxlsx)

df <- read.xlsx("../results/prediction/training3/tcga.xlsx", sheet = "with aug with tcga")
df$Molecular.Subtype <- factor(df$Molecular.Subtype)

ground_truth <- factor(ifelse(df$Molecular.Subtype == "EBV" | df$Molecular.Subtype == "MSI",
        "EBV+MSI", "other"))
df <- cbind(df, ground_truth)
predicted_label <- factor(ifelse(df$Prediction >= 0.5, "EBV+MSI", "other"))
df <- cbind(df, predicted_label)
print('*** all ***')
summary(df)

dfebvmsi <- subset(df, df$predicted_label == "EBV+MSI")
dfother <- subset(df, df$predicted_label == "other")
print('*** prediction: ebv+msi ***')
summary(dfebvmsi)
print('*** prediction: other ***')
summary(dfother)

## TMB and prediction

In [None]:
boxplot(dfebvmsi$Tumor.Mutational.Burden, dfother$Tumor.Mutational.Burden,
        names = c("EBV+MSI", "other"))

In [None]:
var.test(dfebvmsi$Tumor.Mutational.Burden, dfother$Tumor.Mutational.Burden, conf.level=0.95)

In [None]:
t.test(dfebvmsi$Tumor.Mutational.Burden, dfother$Tumor.Mutational.Burden, var.equal = FALSE)

In [None]:
dfebvmsi_truepositive <- subset(df, df$ground_truth == "EBV+MSI" & df$predicted_label == "EBV+MSI")
dfebvmsi_falsenegative <- subset(df, df$ground_truth == "EBV+MSI" & df$predicted_label != "EBV+MSI")
boxplot(dfebvmsi_truepositive$Tumor.Mutational.Burden,
        dfebvmsi_falsenegative$Tumor.Mutational.Burden)

In [None]:
var.test(dfebvmsi_truepositive$Tumor.Mutational.Burden,
        dfebvmsi_falsenegative$Tumor.Mutational.Burden, conf.level=0.95)

In [None]:
t.test(dfebvmsi_truepositive$Tumor.Mutational.Burden,
        dfebvmsi_falsenegative$Tumor.Mutational.Burden, var.equal = TRUE)

In [None]:
dfother_truepositive <- subset(df, df$ground_truth == "other" & df$predicted_label == "other")
dfother_falsenegative <- subset(df, df$ground_truth == "other" & df$predicted_label != "other")
summary(dfother_truepositive)
summary(dfother_falsenegative)
boxplot(dfother_truepositive$Tumor.Mutational.Burden,
        dfother_falsenegative$Tumor.Mutational.Burden)

In [None]:
var.test(dfother_truepositive$Tumor.Mutational.Burden,
        dfother_falsenegative$Tumor.Mutational.Burden, conf.level=0.95)

In [None]:
t.test(dfother_truepositive$Tumor.Mutational.Burden,
        dfother_falsenegative$Tumor.Mutational.Burden, var.equal = TRUE)