# Validation on Cancer Predictions

Feb 25th, 2022

## Setup

In [1]:
import pandas as pd
from IPython.display import display

from utils import CancerValidation

BRCA_PREDICTION_PATH = "../../../../data/predictions_datasets/brca_prediction_2021-11-17/558297b6/predictions_soft_2021-11-17.csv"
COAD_PREDICTION_PATH = "../../../../data/predictions_datasets/coad_prediction_2022-01-06/9789393f/predictions_soft_2022-01-06.csv"
ESCA_PREDICTION_PATH = "../../../../data/predictions_datasets/esca_prediction_2021-11-17/59544312/predictions_soft_2021-11-17.csv"
GBM_PREDICTION_PATH = "../../../../data/predictions_datasets/gbm_prediction_2021-11-17/b9cfc7ea/predictions_soft_2021-11-17.csv"
HNSC_PREDICTION_PATH = "../../../../data/predictions_datasets/hnsc_prediction_2021-11-17/53930f30/predictions_soft_2021-11-17.csv"
OV_PREDICTION_PATH = "../../../../data/predictions_datasets/ov_prediction_2021-11-17/37bf1637/predictions_soft_2021-11-17.csv"

H_SAPIENS_INTERFACES_ALL_PATH = "../data/H_sapiens_interfacesALL.txt"

# Reflect changes in the modules immediately.
%load_ext autoreload
%autoreload 2

## Load Cancer Prediction Datasets

In [2]:
brca_predictions = pd.read_csv(BRCA_PREDICTION_PATH)
coad_predictions = pd.read_csv(COAD_PREDICTION_PATH)
esca_predictions = pd.read_csv(ESCA_PREDICTION_PATH)
gbm_predictions = pd.read_csv(GBM_PREDICTION_PATH)
hnsc_predictions = pd.read_csv(HNSC_PREDICTION_PATH)
ov_predictions = pd.read_csv(OV_PREDICTION_PATH)

In [3]:
print(f"BRCA data: {brca_predictions.shape}")
display(brca_predictions.head())

print(f"COAD data: {coad_predictions.shape}")
display(coad_predictions.head())

print(f"ESCA data: {esca_predictions.shape}")
display(esca_predictions.head())

print(f"GBM data: {gbm_predictions.shape}")
display(gbm_predictions.head())

print(f"HNSC data: {hnsc_predictions.shape}")
display(hnsc_predictions.head())

print(f"OV data: {ov_predictions.shape}")
display(ov_predictions.head())

BRCA data: (3997, 5)


Unnamed: 0,UniProt_ID,Mutation,Interactor_UniProt_ID,Prediction,Median_Probability
0,P28062,R216W,P40306,1,0.55345
1,Q15842,E237K,Q14654,0,0.48443
2,Q15842,E237K,P63252,0,0.47205
3,Q9UKS6,R24H,Q9BY11,0,0.49584
4,Q9UKS6,R24H,Q9UNF0,0,0.41309


COAD data: (7517, 5)


Unnamed: 0,UniProt_ID,Mutation,Interactor_UniProt_ID,Prediction,Median_Probability
0,P62873,S334I,Q13371,0,0.48376
1,Q9BXB4,N590D,Q96SU4,1,0.6416
2,P35251,V921A,P35250,0,0.2517
3,P08123,K1248N,P25940,1,0.78179
4,P15056,V600E,P53667,0,0.43582


ESCA data: (1194, 5)


Unnamed: 0,UniProt_ID,Mutation,Interactor_UniProt_ID,Prediction,Median_Probability
0,P61088,E61K,P38398,1,0.57282
1,P61088,E61K,Q96PU5,1,0.68296
2,P61088,E61K,Q9Y4L5,1,0.62781
3,P61088,E61K,Q9Y3C5,1,0.60244
4,P61088,E61K,Q9C035,1,0.52623


GBM data: (2945, 5)


Unnamed: 0,UniProt_ID,Mutation,Interactor_UniProt_ID,Prediction,Median_Probability
0,P46734,K200E,P27361,1,0.8174
1,P46734,K200E,Q16512,1,0.9181
2,P46734,K200E,Q5S007,1,0.84646
3,P46734,K200E,Q9Y2U5,1,0.8072
4,P46734,K200E,Q99759,1,0.85617


HNSC data: (4003, 5)


Unnamed: 0,UniProt_ID,Mutation,Interactor_UniProt_ID,Prediction,Median_Probability
0,Q13485,R361H,B7Z5N5,0,0.45127
1,Q13485,R361H,Q68DB7,0,0.47297
2,Q13485,R361H,Q53XR6,0,0.42872
3,Q9Y566,R2148H,Q9BYB0,0,0.19476
4,O15116,L87I,Q9Y333,1,0.83768


OV data: (2513, 5)


Unnamed: 0,UniProt_ID,Mutation,Interactor_UniProt_ID,Prediction,Median_Probability
0,P08134,D59E,P52565,0,0.28821
1,P62993,A3V,Q9H8V3,0,0.44397
2,P62993,A3V,Q14155,1,0.51214
3,O75175,Q684H,Q9NZN8,0,0.32091
4,O75175,Q684H,B2RDX7,0,0.3347


## Initialize Cancer Validation

In [4]:
cancer_validation = CancerValidation(interfaces_data_path=H_SAPIENS_INTERFACES_ALL_PATH)

## Validate

### BRCA

In [5]:
brca_output = cancer_validation.validate("BRCA", brca_predictions)

  0%|          | 0/3997 [00:00<?, ?it/s]

ValueError: too many values to unpack (expected 1)

In [None]:
brca_output["metrics_data"]

### COAD

In [None]:
coad_output = cancer_validation.validate("COAD", coad_predictions)

In [None]:
coad_output["metrics_data"]

### ESCA

In [None]:
esca_output = cancer_validation.validate("ESCA", esca_predictions)

In [None]:
esca_output["metrics_data"]

### GBM

In [None]:
gbm_output = cancer_validation.validate("GBM", gbm_predictions)

In [None]:
gbm_output["metrics_data"]

### HNSC

In [None]:
hnsc_output = cancer_validation.validate("HNSC", hnsc_predictions)

In [None]:
hnsc_output["metrics_data"]

### OV

In [None]:
ov_output = cancer_validation.validate("OV", ov_predictions)

In [None]:
ov_output["metrics_data"]

## Combine metrics

In [None]:
metrics_merged = pd.concat(
    [
        brca_output["metrics_data"],
        coad_output["metrics_data"],
        esca_output["metrics_data"],
        gbm_output["metrics_data"],
        hnsc_output["metrics_data"],
        ov_output["metrics_data"],
    ]
)

In [None]:
metrics_merged

# ---------------