# Validation on Cancer Predictions


In [1]:
from datetime import datetime

print("\033[32m{}\033[0m".format(datetime.now().strftime("%B %d, %Y %H:%M:%S")))

[32mMarch 09, 2022 16:50:01[0m


## Setup

In [2]:
import pandas as pd

from utils import CancerValidation

BRCA_PREDICTION_PATH = "../../../../data/predictions_datasets/brca_prediction_2021-11-17/558297b6/predictions_soft_2021-11-17.csv"
COAD_PREDICTION_PATH = "../../../../data/predictions_datasets/coad_prediction_2022-01-06/9789393f/predictions_soft_2022-01-06.csv"
ESCA_PREDICTION_PATH = "../../../../data/predictions_datasets/esca_prediction_2021-11-17/59544312/predictions_soft_2021-11-17.csv"
GBM_PREDICTION_PATH = "../../../../data/predictions_datasets/gbm_prediction_2021-11-17/b9cfc7ea/predictions_soft_2021-11-17.csv"
HNSC_PREDICTION_PATH = "../../../../data/predictions_datasets/hnsc_prediction_2021-11-17/53930f30/predictions_soft_2021-11-17.csv"
OV_PREDICTION_PATH = "../../../../data/predictions_datasets/ov_prediction_2021-11-17/37bf1637/predictions_soft_2021-11-17.csv"

H_SAPIENS_INTERFACES_HQ_non_ECLAIR_PATH = "../data/H_sapiens_interfacesHQ_non_ECLAIR_2022-03-09.txt"

# Reflect changes in the modules immediately.
%load_ext autoreload
%autoreload 2

## Initialize Cancer Validation

In [3]:
cancer_validation = CancerValidation(interfaces_data_path=H_SAPIENS_INTERFACES_HQ_non_ECLAIR_PATH)

In [4]:
brca_output = cancer_validation.load_output_dict("outputs/BRCA_output_HQ_non_ECLAIR_2022-03-09.pickle")
coad_output = cancer_validation.load_output_dict("outputs/COAD_output_HQ_non_ECLAIR_2022-03-09.pickle")
esca_output = cancer_validation.load_output_dict("outputs/ESCA_output_HQ_non_ECLAIR_2022-03-09.pickle")
gbm_output = cancer_validation.load_output_dict("outputs/GBM_output_HQ_non_ECLAIR_2022-03-09.pickle")
hnsc_output = cancer_validation.load_output_dict("outputs/HNSC_output_HQ_non_ECLAIR_2022-03-09.pickle")
ov_output = cancer_validation.load_output_dict("outputs/OV_output_HQ_non_ECLAIR_2022-03-09.pickle")

### BRCA

In [5]:
brca_output_0 = cancer_validation.validate_single_class("BRCA", brca_output, single_class=0)
brca_output_1 = cancer_validation.validate_single_class("BRCA", brca_output, single_class=1)

Using the class 0 only.
Using the class 1 only.


  _warn_prf(average, modifier, msg_start, len(result))
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


### COAD

In [6]:
coad_output_0 = cancer_validation.validate_single_class("COAD", coad_output, single_class=0)
coad_output_1 = cancer_validation.validate_single_class("COAD", coad_output, single_class=1)

Using the class 0 only.
Using the class 1 only.


  _warn_prf(average, modifier, msg_start, len(result))
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


### ESCA

In [7]:
esca_output_0 = cancer_validation.validate_single_class("ESCA", esca_output, single_class=0)
esca_output_1 = cancer_validation.validate_single_class("ESCA", esca_output, single_class=1)

Using the class 0 only.
Using the class 1 only.


  _warn_prf(average, modifier, msg_start, len(result))
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


### GBM

In [8]:
gbm_output_0 = cancer_validation.validate_single_class("GBM", gbm_output, single_class=0)
gbm_output_1 = cancer_validation.validate_single_class("GBM", gbm_output, single_class=1)

Using the class 0 only.
Using the class 1 only.


  _warn_prf(average, modifier, msg_start, len(result))
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


### HNSC

In [9]:
hnsc_output_0 = cancer_validation.validate_single_class("HNSC", hnsc_output, single_class=0)
hnsc_output_1 = cancer_validation.validate_single_class("HNSC", hnsc_output, single_class=1)

Using the class 0 only.
Using the class 1 only.


  _warn_prf(average, modifier, msg_start, len(result))
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


### OV

In [10]:
ov_output_0 = cancer_validation.validate_single_class("OV", ov_output, single_class=0)
ov_output_1 = cancer_validation.validate_single_class("OV", ov_output, single_class=1)

Using the class 0 only.
Using the class 1 only.


  _warn_prf(average, modifier, msg_start, len(result))
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


## Combine metrics

In [11]:
metrics_merged = pd.concat(
    [
        brca_output["metrics_data"],
        coad_output["metrics_data"],
        esca_output["metrics_data"],
        gbm_output["metrics_data"],
        hnsc_output["metrics_data"],
        ov_output["metrics_data"],
    ]
)

In [12]:
metrics_merged_0 = pd.concat(
    [
        brca_output_0["metrics_data"],
        coad_output_0["metrics_data"],
        esca_output_0["metrics_data"],
        gbm_output_0["metrics_data"],
        hnsc_output_0["metrics_data"],
        ov_output_0["metrics_data"],
    ]
)

metrics_merged_1 = pd.concat(
    [
        brca_output_1["metrics_data"],
        coad_output_1["metrics_data"],
        esca_output_1["metrics_data"],
        gbm_output_1["metrics_data"],
        hnsc_output_1["metrics_data"],
        ov_output_1["metrics_data"],
    ]
)

In [13]:
metrics_merged

Unnamed: 0,TCGA,#_Entries,#_Disruptive,#_Non_disruptive,ACCURACY,BALANCED_ACCURACY,F1,PRECISION,RECALL,MATTHEWS_COR
0,BRCA,3997,372,389,0.484888,0.48422,0.505051,0.496278,0.514139,-0.031608
0,COAD,7517,756,626,0.474674,0.473418,0.442396,0.426036,0.460064,-0.05294
0,ESCA,1194,134,90,0.415179,0.410862,0.348259,0.315315,0.388889,-0.174809
0,GBM,2945,271,261,0.513158,0.513516,0.517691,0.503623,0.532567,0.027046
0,HNSC,4003,441,358,0.519399,0.517048,0.479675,0.465789,0.494413,0.033952
0,OV,2513,265,213,0.508368,0.505014,0.462243,0.450893,0.474178,0.009988


In [14]:
metrics_merged_0

Unnamed: 0,TCGA,#_Entries,#_Disruptive,#_Non_disruptive,ACCURACY,BALANCED_ACCURACY,F1,PRECISION,RECALL,MATTHEWS_COR
0,BRCA,1918,169,189,0.472067,0.5,0.0,0.0,0.0,0.0
0,COAD,3726,368,338,0.521246,0.5,0.0,0.0,0.0,0.0
0,ESCA,599,58,55,0.513274,0.5,0.0,0.0,0.0,0.0
0,GBM,1418,134,122,0.523438,0.5,0.0,0.0,0.0,0.0
0,HNSC,2065,238,181,0.568019,0.5,0.0,0.0,0.0,0.0
0,OV,1334,142,112,0.559055,0.5,0.0,0.0,0.0,0.0


In [15]:
metrics_merged_1

Unnamed: 0,TCGA,#_Entries,#_Disruptive,#_Non_disruptive,ACCURACY,BALANCED_ACCURACY,F1,PRECISION,RECALL,MATTHEWS_COR
0,BRCA,2079,203,200,0.496278,0.5,0.66335,0.496278,1.0,0.0
0,COAD,3791,388,288,0.426036,0.5,0.59751,0.426036,1.0,0.0
0,ESCA,595,76,35,0.315315,0.5,0.479452,0.315315,1.0,0.0
0,GBM,1527,137,139,0.503623,0.5,0.66988,0.503623,1.0,0.0
0,HNSC,1938,203,177,0.465789,0.5,0.635548,0.465789,1.0,0.0
0,OV,1179,123,101,0.450893,0.5,0.621538,0.450893,1.0,0.0


# ---------------