# Validation on Cancer Predictions


In [1]:
from datetime import datetime

print("\033[32m{}\033[0m".format(datetime.now().strftime("%B %d, %Y %H:%M:%S")))

[32mMarch 09, 2022 16:17:51[0m


## Setup

In [2]:
import pandas as pd

from utils import CancerValidation

BRCA_PREDICTION_PATH = "../../../../data/predictions_datasets/brca_prediction_2021-11-17/558297b6/predictions_soft_2021-11-17.csv"
COAD_PREDICTION_PATH = "../../../../data/predictions_datasets/coad_prediction_2022-01-06/9789393f/predictions_soft_2022-01-06.csv"
ESCA_PREDICTION_PATH = "../../../../data/predictions_datasets/esca_prediction_2021-11-17/59544312/predictions_soft_2021-11-17.csv"
GBM_PREDICTION_PATH = "../../../../data/predictions_datasets/gbm_prediction_2021-11-17/b9cfc7ea/predictions_soft_2021-11-17.csv"
HNSC_PREDICTION_PATH = "../../../../data/predictions_datasets/hnsc_prediction_2021-11-17/53930f30/predictions_soft_2021-11-17.csv"
OV_PREDICTION_PATH = "../../../../data/predictions_datasets/ov_prediction_2021-11-17/37bf1637/predictions_soft_2021-11-17.csv"

H_SAPIENS_INTERFACES_ALL_PATH = "../data/H_sapiens_interfacesALL.txt"

# Reflect changes in the modules immediately.
%load_ext autoreload
%autoreload 2

## Initialize Cancer Validation

In [3]:
cancer_validation = CancerValidation(interfaces_data_path=H_SAPIENS_INTERFACES_ALL_PATH)

In [4]:
brca_output = cancer_validation.load_output_dict("outputs/BRCA_output_ALL_2022-03-09.pickle")
coad_output = cancer_validation.load_output_dict("outputs/COAD_output_ALL_2022-03-09.pickle")
esca_output = cancer_validation.load_output_dict("outputs/ESCA_output_ALL_2022-03-09.pickle")
gbm_output = cancer_validation.load_output_dict("outputs/GBM_output_ALL_2022-03-09.pickle")
hnsc_output = cancer_validation.load_output_dict("outputs/HNSC_output_ALL_2022-03-09.pickle")
ov_output = cancer_validation.load_output_dict("outputs/OV_output_ALL_2022-03-09.pickle")

### BRCA

In [5]:
brca_output_0 = cancer_validation.validate_single_class("BRCA", brca_output, single_class=0)
brca_output_1 = cancer_validation.validate_single_class("BRCA", brca_output, single_class=1)

Using the class 0 only.
Using the class 1 only.


  _warn_prf(average, modifier, msg_start, len(result))
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


### COAD

In [6]:
coad_output_0 = cancer_validation.validate_single_class("COAD", coad_output, single_class=0)
coad_output_1 = cancer_validation.validate_single_class("COAD", coad_output, single_class=1)

Using the class 0 only.
Using the class 1 only.


  _warn_prf(average, modifier, msg_start, len(result))
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


### ESCA

In [7]:
esca_output_0 = cancer_validation.validate_single_class("ESCA", esca_output, single_class=0)
esca_output_1 = cancer_validation.validate_single_class("ESCA", esca_output, single_class=1)

Using the class 0 only.
Using the class 1 only.


  _warn_prf(average, modifier, msg_start, len(result))
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


### GBM

In [8]:
gbm_output_0 = cancer_validation.validate_single_class("GBM", gbm_output, single_class=0)
gbm_output_1 = cancer_validation.validate_single_class("GBM", gbm_output, single_class=1)

Using the class 0 only.
Using the class 1 only.


  _warn_prf(average, modifier, msg_start, len(result))
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


### HNSC

In [9]:
hnsc_output_0 = cancer_validation.validate_single_class("HNSC", hnsc_output, single_class=0)
hnsc_output_1 = cancer_validation.validate_single_class("HNSC", hnsc_output, single_class=1)

Using the class 0 only.
Using the class 1 only.


  _warn_prf(average, modifier, msg_start, len(result))
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


### OV

In [10]:
ov_output_0 = cancer_validation.validate_single_class("OV", ov_output, single_class=0)
ov_output_1 = cancer_validation.validate_single_class("OV", ov_output, single_class=1)

Using the class 0 only.
Using the class 1 only.


  _warn_prf(average, modifier, msg_start, len(result))
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


## Combine metrics

In [11]:
metrics_merged = pd.concat(
    [
        brca_output["metrics_data"],
        coad_output["metrics_data"],
        esca_output["metrics_data"],
        gbm_output["metrics_data"],
        hnsc_output["metrics_data"],
        ov_output["metrics_data"],
    ]
)

In [12]:
metrics_merged_0 = pd.concat(
    [
        brca_output_0["metrics_data"],
        coad_output_0["metrics_data"],
        esca_output_0["metrics_data"],
        gbm_output_0["metrics_data"],
        hnsc_output_0["metrics_data"],
        ov_output_0["metrics_data"],
    ]
)

metrics_merged_1 = pd.concat(
    [
        brca_output_1["metrics_data"],
        coad_output_1["metrics_data"],
        esca_output_1["metrics_data"],
        gbm_output_1["metrics_data"],
        hnsc_output_1["metrics_data"],
        ov_output_1["metrics_data"],
    ]
)

In [13]:
metrics_merged

Unnamed: 0,TCGA,#_Entries,#_Disruptive,#_Non_disruptive,ACCURACY,BALANCED_ACCURACY,F1,PRECISION,RECALL,MATTHEWS_COR
0,BRCA,3997,862,1059,0.484643,0.483266,0.515181,0.535097,0.496695,-0.0333
0,COAD,7517,1693,1908,0.50847,0.508775,0.520585,0.538677,0.503669,0.017519
0,ESCA,1194,283,272,0.452252,0.452609,0.457143,0.444444,0.470588,-0.094832
0,GBM,2945,595,773,0.525585,0.523123,0.563551,0.586835,0.542044,0.045897
0,HNSC,4003,947,926,0.504538,0.504368,0.494002,0.498899,0.489201,0.00874
0,OV,2513,541,610,0.504778,0.506651,0.504348,0.537037,0.47541,0.013304


In [14]:
metrics_merged_0

Unnamed: 0,TCGA,#_Entries,#_Disruptive,#_Non_disruptive,ACCURACY,BALANCED_ACCURACY,F1,PRECISION,RECALL,MATTHEWS_COR
0,BRCA,1918,405,533,0.43177,0.5,0.0,0.0,0.0,0.0
0,COAD,3726,870,947,0.478811,0.5,0.0,0.0,0.0,0.0
0,ESCA,599,123,144,0.460674,0.5,0.0,0.0,0.0,0.0
0,GBM,1418,300,354,0.458716,0.5,0.0,0.0,0.0,0.0
0,HNSC,2065,492,473,0.509845,0.5,0.0,0.0,0.0,0.0
0,OV,1334,291,320,0.476268,0.5,0.0,0.0,0.0,0.0


In [15]:
metrics_merged_1

Unnamed: 0,TCGA,#_Entries,#_Disruptive,#_Non_disruptive,ACCURACY,BALANCED_ACCURACY,F1,PRECISION,RECALL,MATTHEWS_COR
0,BRCA,2079,457,526,0.535097,0.5,0.69715,0.535097,1.0,0.0
0,COAD,3791,823,961,0.538677,0.5,0.700182,0.538677,1.0,0.0
0,ESCA,595,160,128,0.444444,0.5,0.615385,0.444444,1.0,0.0
0,GBM,1527,295,419,0.586835,0.5,0.739629,0.586835,1.0,0.0
0,HNSC,1938,455,453,0.498899,0.5,0.665687,0.498899,1.0,0.0
0,OV,1179,250,290,0.537037,0.5,0.698795,0.537037,1.0,0.0


# ---------------