# Validation on Cancer Predictions

Feb 28th, 2022

## Setup

In [1]:
import pandas as pd
from IPython.display import display

from utils import CancerValidation

BRCA_PREDICTION_PATH = "../../../../data/predictions_datasets/brca_prediction_2021-11-17/558297b6/predictions_soft_2021-11-17.csv"
COAD_PREDICTION_PATH = "../../../../data/predictions_datasets/coad_prediction_2022-01-06/9789393f/predictions_soft_2022-01-06.csv"
ESCA_PREDICTION_PATH = "../../../../data/predictions_datasets/esca_prediction_2021-11-17/59544312/predictions_soft_2021-11-17.csv"
GBM_PREDICTION_PATH = "../../../../data/predictions_datasets/gbm_prediction_2021-11-17/b9cfc7ea/predictions_soft_2021-11-17.csv"
HNSC_PREDICTION_PATH = "../../../../data/predictions_datasets/hnsc_prediction_2021-11-17/53930f30/predictions_soft_2021-11-17.csv"
OV_PREDICTION_PATH = "../../../../data/predictions_datasets/ov_prediction_2021-11-17/37bf1637/predictions_soft_2021-11-17.csv"

H_SAPIENS_INTERFACES_HQ_PATH = "../data/H_sapiens_interfacesHQ.txt"

# Reflect changes in the modules immediately.
%load_ext autoreload
%autoreload 2

## Initialize Cancer Validation

In [2]:
cancer_validation = CancerValidation(interfaces_data_path=H_SAPIENS_INTERFACES_HQ_PATH)

In [3]:
brca_output = cancer_validation.load_output_dict("outputs/BRCA_output_2022-02-28.pickle")
coad_output = cancer_validation.load_output_dict("outputs/COAD_output_2022-02-28.pickle")
esca_output = cancer_validation.load_output_dict("outputs/ESCA_output_2022-02-28.pickle")
gbm_output = cancer_validation.load_output_dict("outputs/GBM_output_2022-02-28.pickle")
hnsc_output = cancer_validation.load_output_dict("outputs/HNSC_output_2022-02-28.pickle")
ov_output = cancer_validation.load_output_dict("outputs/OV_output_2022-02-28.pickle")

### BRCA

In [7]:
brca_output_0 = cancer_validation.validate_single_class("BRCA", brca_output, single_class=0)
brca_output_1 = cancer_validation.validate_single_class("BRCA", brca_output, single_class=1)

Using the class 0 only.
Using the class 1 only.


  _warn_prf(average, modifier, msg_start, len(result))
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


### COAD

In [16]:
coad_output_0 = cancer_validation.validate_single_class("COAD", coad_output, single_class=0)
coad_output_1 = cancer_validation.validate_single_class("COAD", coad_output, single_class=1)

Using the class 0 only.
Using the class 1 only.


  _warn_prf(average, modifier, msg_start, len(result))
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


### ESCA

In [17]:
esca_output_0 = cancer_validation.validate_single_class("ESCA", esca_output, single_class=0)
esca_output_1 = cancer_validation.validate_single_class("ESCA", esca_output, single_class=1)

Using the class 0 only.
Using the class 1 only.


  _warn_prf(average, modifier, msg_start, len(result))
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


### GBM

In [18]:
gbm_output_0 = cancer_validation.validate_single_class("GBM", gbm_output, single_class=0)
gbm_output_1 = cancer_validation.validate_single_class("GBM", gbm_output, single_class=1)

Using the class 0 only.
Using the class 1 only.


  _warn_prf(average, modifier, msg_start, len(result))
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


### HNSC

In [19]:
hnsc_output_0 = cancer_validation.validate_single_class("HNSC", hnsc_output, single_class=0)
hnsc_output_1 = cancer_validation.validate_single_class("HNSC", hnsc_output, single_class=1)

Using the class 0 only.
Using the class 1 only.


  _warn_prf(average, modifier, msg_start, len(result))
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


### OV

In [20]:
ov_output_0 = cancer_validation.validate_single_class("OV", ov_output, single_class=0)
ov_output_1 = cancer_validation.validate_single_class("OV", ov_output, single_class=1)

Using the class 0 only.
Using the class 1 only.


  _warn_prf(average, modifier, msg_start, len(result))
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


## Combine metrics

In [21]:
metrics_merged = pd.concat(
    [
        brca_output["metrics_data"],
        coad_output["metrics_data"],
        esca_output["metrics_data"],
        gbm_output["metrics_data"],
        hnsc_output["metrics_data"],
        ov_output["metrics_data"],
    ]
)

In [22]:
metrics_merged_0 = pd.concat(
    [
        brca_output_0["metrics_data"],
        coad_output_0["metrics_data"],
        esca_output_0["metrics_data"],
        gbm_output_0["metrics_data"],
        hnsc_output_0["metrics_data"],
        ov_output_0["metrics_data"],
    ]
)

metrics_merged_1 = pd.concat(
    [
        brca_output_1["metrics_data"],
        coad_output_1["metrics_data"],
        esca_output_1["metrics_data"],
        gbm_output_1["metrics_data"],
        hnsc_output_1["metrics_data"],
        ov_output_1["metrics_data"],
    ]
)

In [26]:
metrics_merged

Unnamed: 0,TCGA,#_Entries,#_Disruptive,#_Non_disruptive,ACCURACY,BALANCED_ACCURACY,F1,PRECISION,RECALL,MATTHEWS_COR
0,BRCA,3997,578,1337,0.495561,0.489455,0.582902,0.689479,0.504862,-0.019368
0,COAD,7517,1174,2413,0.511291,0.514953,0.581323,0.68602,0.504351,0.028066
0,ESCA,1194,179,376,0.484685,0.474793,0.569277,0.65625,0.50266,-0.047164
0,GBM,2945,409,955,0.538856,0.535786,0.622675,0.728933,0.543455,0.065652
0,HNSC,4003,672,1193,0.49866,0.503501,0.553699,0.643016,0.486169,0.006727
0,OV,2513,374,770,0.495629,0.507754,0.557854,0.680374,0.472727,0.01458


In [27]:
metrics_merged_0

Unnamed: 0,TCGA,#_Entries,#_Disruptive,#_Non_disruptive,ACCURACY,BALANCED_ACCURACY,F1,PRECISION,RECALL,MATTHEWS_COR
0,BRCA,1918,274,662,0.292735,0.5,0.0,0.0,0.0,0.0
0,COAD,3726,617,1196,0.34032,0.5,0.0,0.0,0.0,0.0
0,ESCA,599,80,187,0.299625,0.5,0.0,0.0,0.0,0.0
0,GBM,1418,216,436,0.331288,0.5,0.0,0.0,0.0,0.0
0,HNSC,2065,350,613,0.363448,0.5,0.0,0.0,0.0,0.0
0,OV,1334,203,406,0.333333,0.5,0.0,0.0,0.0,0.0


In [28]:
metrics_merged_1

Unnamed: 0,TCGA,#_Entries,#_Disruptive,#_Non_disruptive,ACCURACY,BALANCED_ACCURACY,F1,PRECISION,RECALL,MATTHEWS_COR
0,BRCA,2079,304,675,0.689479,0.5,0.816203,0.689479,1.0,0.0
0,COAD,3791,557,1217,0.68602,0.5,0.813775,0.68602,1.0,0.0
0,ESCA,595,99,189,0.65625,0.5,0.792453,0.65625,1.0,0.0
0,GBM,1527,193,519,0.728933,0.5,0.843217,0.728933,1.0,0.0
0,HNSC,1938,322,580,0.643016,0.5,0.782726,0.643016,1.0,0.0
0,OV,1179,171,364,0.680374,0.5,0.809789,0.680374,1.0,0.0


# ---------------