# Mutual Exclusivity

## Setup

In [1]:
import os
import os.path as op

os.chdir('../')

# os.listdir()
# Reflect changes in the modules immediately.
%load_ext autoreload
%autoreload 2

In [2]:
# SNV_PATHS
SNV_COMMON_PATH = "C:/Users/ibrah/Desktop/SNV_data/SNV_datasets/"
BRCA_SNV_PATH = op.join(SNV_COMMON_PATH, "SNV_BRCA_hg38_2021-09-22.csv")
ESCA_SNV_PATH = op.join(SNV_COMMON_PATH, "SNV_ESCA_hg38_2021-09-22.csv")
GBM_SNV_PATH = op.join(SNV_COMMON_PATH, "SNV_GBM_hg38_2021-09-22.csv")
HNSC_SNV_PATH = op.join(SNV_COMMON_PATH, "SNV_HNSC_hg38_2021-09-22.csv")
OV_SNV_PATH = op.join(SNV_COMMON_PATH, "SNV_OV_hg38_2021-09-22.csv")

BRCA_PRELIMINARY_DATA_PATH = r"../data/predictions_datasets/brca_prediction_2021-11-17/558297b6/brca_preliminary_data_cgc_2021-11-17.xlsx"
ESCA_PRELIMINARY_DATA_PATH = r"../data/predictions_datasets/esca_prediction_2021-11-17/59544312/esca_preliminary_data_cgc_2021-11-17.xlsx"
GBM_PRELIMINARY_DATA_PATH = r"../data/predictions_datasets/gbm_prediction_2021-11-17/b9cfc7ea/gbm_preliminary_data_cgc_2021-11-17.xlsx"
HNSC_PRELIMINARY_DATA_PATH = r"../data/predictions_datasets/hnsc_prediction_2021-11-17/53930f30/hnsc_preliminary_data_cgc_2021-11-17.xlsx"
OV_PRELIMINARY_DATA_PATH = r"../data/predictions_datasets/ov_prediction_2021-11-17/37bf1637/ov_preliminary_data_cgc_2021-11-17.xlsx"

# PATIENT INTERACTION DATA PATHS
BRCA_PATIENT_INTERACTION_DATA_PATH = "../data/patient_interaction_datasets/BRCA_patient_interactions_analysis_table_2021-11-17.xlsx"
ESCA_PATIENT_INTERACTION_DATA_PATH = "../data/patient_interaction_datasets/ESCA_patient_interactions_analysis_table_2021-11-17.xlsx"
GBM_PATIENT_INTERACTION_DATA_PATH = "../data/patient_interaction_datasets/GBM_patient_interactions_analysis_table_2021-11-17.xlsx"
HNSC_PATIENT_INTERACTION_DATA_PATH = "../data/patient_interaction_datasets/HNSC_patient_interactions_analysis_table_2021-11-17.xlsx"
OV_PATIENT_INTERACTION_DATA_PATH = "../data/patient_interaction_datasets/OV_patient_interactions_analysis_table_2021-11-17.xlsx"

# Reflect changes in the modules immediately.
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [3]:
def get_protein_gene_pairs(preliminary_data):
    """
    Applies the following filtering conditions to given preliminary data and return protein and gene pairs.
    """
    data_filtered = preliminary_data[
        (preliminary_data["NUM_ELASPIC_INTERFACE_ENTRIES"] >= 10) &
        (preliminary_data["PATIENT_INTERFACE_COUNT"] > 5)
    ]
    
    protein_gene_pairs = list(zip(data_filtered["PROTEIN"], data_filtered["GENE"]))
    return protein_gene_pairs

In [4]:
import pandas as pd
from helpers.helpers_analysis.mutual_exclusivity import MutualExclusivity
from helpers.helpers_analysis.protein_id_retrieval import ProteinIDFetcher

## Instantiate `MutualExclusivity` Objects for Each Cohort

### BRCA

In [5]:
brca_mutex = MutualExclusivity(
    tcga="BRCA", 
    tcga_snv_path=BRCA_SNV_PATH, 
    patient_interaction_data_path=BRCA_PATIENT_INTERACTION_DATA_PATH
)

2021-11-20 02:49:45 |[36m DEBUG    [0m| helpers.helpers_analysis.mutual_exclusivity | Loading SNV data simplified ..
2021-11-20 02:49:45 |[32m INFO     [0m| helpers.helpers_analysis.loaders | Loading BRCA SNV datasets ..
2021-11-20 02:49:50 |[36m DEBUG    [0m| helpers.helpers_analysis.loaders | BRCA SNV data size: (120988, 121)
2021-11-20 02:49:51 |[36m DEBUG    [0m| helpers.helpers_analysis.loaders | BRCA SNV data processed size: (60251, 121)
2021-11-20 02:49:51 |[32m INFO     [0m| helpers.helpers_analysis.loaders | BRCA SNV datasets are loaded.
2021-11-20 02:49:51 |[36m DEBUG    [0m| helpers.helpers_analysis.mutual_exclusivity | Loading patient ids ..
2021-11-20 02:49:51 |[36m DEBUG    [0m| helpers.helpers_analysis.mutual_exclusivity | Loading patient to snv_data ..


  0%|          | 0/985 [00:00<?, ?it/s]

2021-11-20 02:49:59 |[36m DEBUG    [0m| helpers.helpers_analysis.mutual_exclusivity | patient interaction data patient data ..


In [6]:
len(brca_mutex.patients)

985

### ESCA

In [7]:
esca_mutex = MutualExclusivity(
    tcga="ESCA", 
    tcga_snv_path=ESCA_SNV_PATH, 
    patient_interaction_data_path=ESCA_PATIENT_INTERACTION_DATA_PATH
)

2021-11-20 02:50:00 |[36m DEBUG    [0m| helpers.helpers_analysis.mutual_exclusivity | Loading SNV data simplified ..
2021-11-20 02:50:00 |[32m INFO     [0m| helpers.helpers_analysis.loaders | Loading ESCA SNV datasets ..
2021-11-20 02:50:02 |[36m DEBUG    [0m| helpers.helpers_analysis.loaders | ESCA SNV data size: (45313, 121)
2021-11-20 02:50:02 |[36m DEBUG    [0m| helpers.helpers_analysis.loaders | ESCA SNV data processed size: (19497, 121)
2021-11-20 02:50:02 |[32m INFO     [0m| helpers.helpers_analysis.loaders | ESCA SNV datasets are loaded.
2021-11-20 02:50:02 |[36m DEBUG    [0m| helpers.helpers_analysis.mutual_exclusivity | Loading patient ids ..
2021-11-20 02:50:02 |[36m DEBUG    [0m| helpers.helpers_analysis.mutual_exclusivity | Loading patient to snv_data ..


  0%|          | 0/184 [00:00<?, ?it/s]

2021-11-20 02:50:03 |[36m DEBUG    [0m| helpers.helpers_analysis.mutual_exclusivity | patient interaction data patient data ..


In [8]:
len(esca_mutex.patients)

184

### GBM

In [9]:
gbm_mutex = MutualExclusivity(
    tcga="GBM", 
    tcga_snv_path=GBM_SNV_PATH, 
    patient_interaction_data_path=GBM_PATIENT_INTERACTION_DATA_PATH
)

2021-11-20 02:50:04 |[36m DEBUG    [0m| helpers.helpers_analysis.mutual_exclusivity | Loading SNV data simplified ..
2021-11-20 02:50:04 |[32m INFO     [0m| helpers.helpers_analysis.loaders | Loading GBM SNV datasets ..
2021-11-20 02:50:07 |[36m DEBUG    [0m| helpers.helpers_analysis.loaders | GBM SNV data size: (82765, 121)
2021-11-20 02:50:08 |[36m DEBUG    [0m| helpers.helpers_analysis.loaders | GBM SNV data processed size: (46847, 121)
2021-11-20 02:50:08 |[32m INFO     [0m| helpers.helpers_analysis.loaders | GBM SNV datasets are loaded.
2021-11-20 02:50:08 |[36m DEBUG    [0m| helpers.helpers_analysis.mutual_exclusivity | Loading patient ids ..
2021-11-20 02:50:08 |[36m DEBUG    [0m| helpers.helpers_analysis.mutual_exclusivity | Loading patient to snv_data ..


  0%|          | 0/392 [00:00<?, ?it/s]

2021-11-20 02:50:09 |[36m DEBUG    [0m| helpers.helpers_analysis.mutual_exclusivity | patient interaction data patient data ..


In [10]:
len(gbm_mutex.patients)

392

### HNSC

In [11]:
hnsc_mutex = MutualExclusivity(
    tcga="HNSC", 
    tcga_snv_path=HNSC_SNV_PATH, 
    patient_interaction_data_path=HNSC_PATIENT_INTERACTION_DATA_PATH
)

2021-11-20 02:50:10 |[36m DEBUG    [0m| helpers.helpers_analysis.mutual_exclusivity | Loading SNV data simplified ..
2021-11-20 02:50:10 |[32m INFO     [0m| helpers.helpers_analysis.loaders | Loading HNSC SNV datasets ..
2021-11-20 02:50:14 |[36m DEBUG    [0m| helpers.helpers_analysis.loaders | HNSC SNV data size: (102309, 121)
2021-11-20 02:50:15 |[36m DEBUG    [0m| helpers.helpers_analysis.loaders | HNSC SNV data processed size: (54883, 121)
2021-11-20 02:50:15 |[32m INFO     [0m| helpers.helpers_analysis.loaders | HNSC SNV datasets are loaded.
2021-11-20 02:50:15 |[36m DEBUG    [0m| helpers.helpers_analysis.mutual_exclusivity | Loading patient ids ..
2021-11-20 02:50:15 |[36m DEBUG    [0m| helpers.helpers_analysis.mutual_exclusivity | Loading patient to snv_data ..


  0%|          | 0/507 [00:00<?, ?it/s]

2021-11-20 02:50:17 |[36m DEBUG    [0m| helpers.helpers_analysis.mutual_exclusivity | patient interaction data patient data ..


In [12]:
len(hnsc_mutex.patients)

507

### OV

In [13]:
ov_mutex = MutualExclusivity(
    tcga="OV", 
    tcga_snv_path=OV_SNV_PATH, 
    patient_interaction_data_path=OV_PATIENT_INTERACTION_DATA_PATH
)

2021-11-20 02:50:18 |[36m DEBUG    [0m| helpers.helpers_analysis.mutual_exclusivity | Loading SNV data simplified ..
2021-11-20 02:50:18 |[32m INFO     [0m| helpers.helpers_analysis.loaders | Loading OV SNV datasets ..
2021-11-20 02:50:21 |[36m DEBUG    [0m| helpers.helpers_analysis.loaders | OV SNV data size: (75168, 121)
2021-11-20 02:50:21 |[36m DEBUG    [0m| helpers.helpers_analysis.loaders | OV SNV data processed size: (38486, 121)
2021-11-20 02:50:21 |[32m INFO     [0m| helpers.helpers_analysis.loaders | OV SNV datasets are loaded.
2021-11-20 02:50:21 |[36m DEBUG    [0m| helpers.helpers_analysis.mutual_exclusivity | Loading patient ids ..
2021-11-20 02:50:21 |[36m DEBUG    [0m| helpers.helpers_analysis.mutual_exclusivity | Loading patient to snv_data ..


  0%|          | 0/436 [00:00<?, ?it/s]

2021-11-20 02:50:23 |[36m DEBUG    [0m| helpers.helpers_analysis.mutual_exclusivity | patient interaction data patient data ..


In [14]:
len(ov_mutex.patients)

436

## Instantiate `MutualExclusivity` Objects for Each Cohort

In [15]:
def export_mutex_data_for_selected_genes(preliminary_data_path, mutex_obj):
    preliminary_data = pd.read_excel(preliminary_data_path)

    protein_gene_list = get_protein_gene_pairs(preliminary_data)

    for protein_gene in protein_gene_list:
        protein, gene = protein_gene
        print(f"PROTEIN_GENE: {protein}_{gene}")
        mutex_obj.get_disruptive_mutual_exclusivity_data(protein)
        mutex_obj.export_disruptive_mutual_exclusivity_data("dev\mutex_counts_and_probs", protein)
        mutex_obj.export_disruptive_mutual_exclusivity_data("dev\mutex_counts_and_probs", protein, prob=True)
    
    print("Completed.")

In [16]:
# BRCA
export_mutex_data_for_selected_genes(BRCA_PRELIMINARY_DATA_PATH, brca_mutex)

PROTEIN_GENE: O75582_RPS6KA5
2021-11-20 02:50:27 |[32m INFO     [0m| helpers.helpers_analysis.mutual_exclusivity | Calculating Mutual Exclusivity over O75582's interactors ..
2021-11-20 02:50:27 |[36m DEBUG    [0m| helpers.helpers_analysis.mutual_exclusivity | O75582 have 6 interactors:
Counter({'Q15759:MAPK11': 2, 'Q15418:RPS6KA1': 2, 'Q16539:MAPK14': 1, 'L7RSM2:MAPK14': 1, 'Q9UK32:RPS6KA6': 1, 'P67870:CSNK2B': 1})
2021-11-20 02:50:27 |[32m INFO     [0m| helpers.helpers_analysis.mutual_exclusivity | Calculating Mutual Exclusivity over O75582's interactors ..
2021-11-20 02:50:27 |[36m DEBUG    [0m| helpers.helpers_analysis.mutual_exclusivity | O75582 have 6 interactors:
Counter({'Q15759:MAPK11': 2, 'Q15418:RPS6KA1': 2, 'Q16539:MAPK14': 1, 'L7RSM2:MAPK14': 1, 'Q9UK32:RPS6KA6': 1, 'P67870:CSNK2B': 1})
2021-11-20 02:50:27 |[36m DEBUG    [0m| helpers.helpers_analysis.mutual_exclusivity | Exporting Mutual Exclusivity BRCA O75582 ..
2021-11-20 02:50:27 |[32m INFO     [0m| helpers

In [17]:
# ESCA
export_mutex_data_for_selected_genes(ESCA_PRELIMINARY_DATA_PATH, esca_mutex)

PROTEIN_GENE: P04637_TP53
2021-11-20 02:50:33 |[32m INFO     [0m| helpers.helpers_analysis.mutual_exclusivity | Calculating Mutual Exclusivity over P04637's interactors ..
2021-11-20 02:50:33 |[36m DEBUG    [0m| helpers.helpers_analysis.mutual_exclusivity | P04637 have 17 interactors:
Counter({'Q13625:TP53BP2': 30, 'Q96KQ4:PPP1R13B': 30, 'P62993:GRB2': 19, 'Q06187:BTK': 18, 'P07948:LYN': 18, 'Q8WUF5:PPP1R13L': 17, 'Q9H3D4:TP63': 13, 'H2EHT1:TP53': 11, 'K7PPA8:TP53': 11, 'Q96KQ7:EHMT2': 8, 'O15350:TP73': 6, 'O75832:PSMD10': 3, 'Q99466:NOTCH4': 2, 'Q9H9B1:EHMT1': 2, 'Q99728:BARD1': 2, 'Q9ULT8:HECTD1': 2, 'Q53GA5:nan': 1})
2021-11-20 02:50:33 |[32m INFO     [0m| helpers.helpers_analysis.mutual_exclusivity | Calculating Mutual Exclusivity over P04637's interactors ..
2021-11-20 02:50:33 |[36m DEBUG    [0m| helpers.helpers_analysis.mutual_exclusivity | P04637 have 17 interactors:
Counter({'Q13625:TP53BP2': 30, 'Q96KQ4:PPP1R13B': 30, 'P62993:GRB2': 19, 'Q06187:BTK': 18, 'P07948:LYN':

In [18]:
# GBM
export_mutex_data_for_selected_genes(GBM_PRELIMINARY_DATA_PATH, gbm_mutex)

PROTEIN_GENE: P04637_TP53
2021-11-20 02:50:37 |[32m INFO     [0m| helpers.helpers_analysis.mutual_exclusivity | Calculating Mutual Exclusivity over P04637's interactors ..
2021-11-20 02:50:37 |[36m DEBUG    [0m| helpers.helpers_analysis.mutual_exclusivity | P04637 have 19 interactors:
Counter({'Q13625:TP53BP2': 30, 'Q96KQ4:PPP1R13B': 30, 'P62993:GRB2': 22, 'P07948:LYN': 19, 'Q9H3D4:TP63': 18, 'Q8WUF5:PPP1R13L': 18, 'Q06187:BTK': 18, 'H2EHT1:TP53': 14, 'K7PPA8:TP53': 14, 'O15350:TP73': 7, 'Q96KQ7:EHMT2': 6, 'Q9H9B1:EHMT1': 5, 'Q99728:BARD1': 5, 'O75832:PSMD10': 5, 'Q9ULT8:HECTD1': 5, 'Q99466:NOTCH4': 2, 'Q9GZV1:ANKRD2': 1, 'P25963:NFKBIA': 1, 'Q53GA5:nan': 1})
2021-11-20 02:50:37 |[32m INFO     [0m| helpers.helpers_analysis.mutual_exclusivity | Calculating Mutual Exclusivity over P04637's interactors ..
2021-11-20 02:50:37 |[36m DEBUG    [0m| helpers.helpers_analysis.mutual_exclusivity | P04637 have 19 interactors:
Counter({'Q13625:TP53BP2': 30, 'Q96KQ4:PPP1R13B': 30, 'P62993:GR

In [19]:
# HNSC
export_mutex_data_for_selected_genes(HNSC_PRELIMINARY_DATA_PATH, hnsc_mutex)

PROTEIN_GENE: P01112_HRAS
2021-11-20 02:50:40 |[32m INFO     [0m| helpers.helpers_analysis.mutual_exclusivity | Calculating Mutual Exclusivity over P01112's interactors ..
2021-11-20 02:50:40 |[36m DEBUG    [0m| helpers.helpers_analysis.mutual_exclusivity | P01112 have 18 interactors:
Counter({'Q07889:SOS1': 25, 'Q07890:SOS2': 25, 'G5E9C8:SOS1': 21, 'Q13972:RASGRF1': 16, 'Q9UJ41:RABGEF1': 15, 'Q13671:RIN1': 12, 'Q8IV61:RASGRP3': 11, 'Q9NZL6:RGL1': 9, 'Q12967:RALGDS': 3, 'Q15811:ITSN1': 3, 'P15498:VAV1': 3, 'Q13905:RAPGEF1': 3, 'Q13009:TIAM1': 3, 'Q9P212:PLCE1': 3, 'P10398:ARAF': 1, 'P08069:IGF1R': 1, 'P06239:LCK': 1, 'O95267:RASGRP1': 1})
2021-11-20 02:50:41 |[32m INFO     [0m| helpers.helpers_analysis.mutual_exclusivity | Calculating Mutual Exclusivity over P01112's interactors ..
2021-11-20 02:50:41 |[36m DEBUG    [0m| helpers.helpers_analysis.mutual_exclusivity | P01112 have 18 interactors:
Counter({'Q07889:SOS1': 25, 'Q07890:SOS2': 25, 'G5E9C8:SOS1': 21, 'Q13972:RASGRF1': 1

In [20]:
# OV
export_mutex_data_for_selected_genes(OV_PRELIMINARY_DATA_PATH, ov_mutex)

PROTEIN_GENE: P04637_TP53
2021-11-20 02:50:48 |[32m INFO     [0m| helpers.helpers_analysis.mutual_exclusivity | Calculating Mutual Exclusivity over P04637's interactors ..
2021-11-20 02:50:48 |[36m DEBUG    [0m| helpers.helpers_analysis.mutual_exclusivity | P04637 have 19 interactors:
Counter({'Q13625:TP53BP2': 61, 'Q96KQ4:PPP1R13B': 61, 'P62993:GRB2': 46, 'Q8WUF5:PPP1R13L': 40, 'Q06187:BTK': 40, 'P07948:LYN': 33, 'Q9H3D4:TP63': 31, 'H2EHT1:TP53': 27, 'K7PPA8:TP53': 27, 'O15350:TP73': 21, 'Q96KQ7:EHMT2': 18, 'O75832:PSMD10': 8, 'Q9H9B1:EHMT1': 6, 'Q99728:BARD1': 6, 'Q9ULT8:HECTD1': 6, 'Q99466:NOTCH4': 3, 'Q53GA5:nan': 3, 'Q9GZV1:ANKRD2': 1, 'P25963:NFKBIA': 1})
2021-11-20 02:50:48 |[32m INFO     [0m| helpers.helpers_analysis.mutual_exclusivity | Calculating Mutual Exclusivity over P04637's interactors ..
2021-11-20 02:50:48 |[36m DEBUG    [0m| helpers.helpers_analysis.mutual_exclusivity | P04637 have 19 interactors:
Counter({'Q13625:TP53BP2': 61, 'Q96KQ4:PPP1R13B': 61, 'P62993:

=======================================

## Extract Protein and Interactor Pairs

In [27]:
brca_preliminary_data = pd.read_excel(BRCA_PRELIMINARY_DATA_PATH)

In [34]:
brca_pairs = []
brca_selected_protein_gene_list = get_protein_gene_pairs(brca_preliminary_data)
for protein, gene in brca_selected_protein_gene_list:
    mutex_data = brca_mutex.get_disruptive_mutual_exclusivity_data(protein)
    interactors = list(mutex_data["INTERACTOR"])
    for interactor in interactors:
        # print(f"HOST PROTEIN_GENE: {protein}:{gene} \t INTERACTOR PROTEIN_GENE: {interactor}")
        brca_pairs.append((f"{protein}:{gene}", interactor))

2021-11-18 17:57:34 |[32m INFO     [0m| helpers.helpers_analysis.mutual_exclusivity | Calculating Mutual Exclusivity over O75582's interactors ..
2021-11-18 17:57:34 |[36m DEBUG    [0m| helpers.helpers_analysis.mutual_exclusivity | O75582 have 6 interactors:
['Q15759:MAPK11', 'Q15418:RPS6KA1', 'Q16539:MAPK14', 'L7RSM2:MAPK14', 'Q9UK32:RPS6KA6', 'P67870:CSNK2B']
2021-11-18 17:57:34 |[32m INFO     [0m| helpers.helpers_analysis.mutual_exclusivity | Calculating Mutual Exclusivity over P04626's interactors ..
2021-11-18 17:57:34 |[36m DEBUG    [0m| helpers.helpers_analysis.mutual_exclusivity | P04626 have 15 interactors:
['Q96SB4:SRPK1', 'P00533:EGFR', 'P51813:BMX', 'Q14289:PTK2B', 'P16591:FER', 'P06241:FYN', 'P08069:IGF1R', 'Q13153:PAK1', 'P42680:TEC', 'P21860:ERBB3', 'O60674:JAK2', 'P15153:RAC2', 'Q9UJM3:ERRFI1', 'Q92625:ANKS1A', 'P23458:JAK1']
2021-11-18 17:57:35 |[32m INFO     [0m| helpers.helpers_analysis.mutual_exclusivity | Calculating Mutual Exclusivity over P04637's inter

In [36]:
brca_pairs[:3]

[('O75582:RPS6KA5', 'Q15759:MAPK11'),
 ('O75582:RPS6KA5', 'Q15418:RPS6KA1'),
 ('O75582:RPS6KA5', 'Q16539:MAPK14')]

In [42]:
with open(r"dev\brca_pairs_2021-11-18.txt", "w") as file:
    for pair in brca_pairs:
        file.write(f"{pair[0]}_{pair[1]}\n")

### - - - 

### RPS6KA5

In [22]:
ProteinIDFetcher().fetch("RPS6KA5")

['O75582']

In [28]:
brca_mutex.get_disruptive_mutual_exclusivity_data("P04626")

2021-11-13 21:32:59 |[32m INFO     [0m| helpers.helpers_analysis.mutual_exclusivity | Calculating Mutual Exclusivity over P04626's interactors ..
2021-11-13 21:32:59 |[36m DEBUG    [0m| helpers.helpers_analysis.mutual_exclusivity | P04626 have 15 interactors:
['Q96SB4:SRPK1', 'P00533:EGFR', 'P51813:BMX', 'Q14289:PTK2B', 'P16591:FER', 'P06241:FYN', 'P08069:IGF1R', 'Q13153:PAK1', 'P42680:TEC', 'P21860:ERBB3', 'O60674:JAK2', 'P15153:RAC2', 'Q9UJM3:ERRFI1', 'Q92625:ANKS1A', 'P23458:JAK1']


Unnamed: 0,PROTEIN:GENE,NUM_PATIENTS,INTERACTOR,NUM_PATIENTS_INTERACTOR,MUTUAL_EXCLUSIVITY
0,P04626:ERBB2,27,Q96SB4:SRPK1,5,1.0
1,P04626:ERBB2,27,P00533:EGFR,13,1.0
2,P04626:ERBB2,27,P51813:BMX,6,0.9697
3,P04626:ERBB2,27,Q14289:PTK2B,4,1.0
4,P04626:ERBB2,27,P16591:FER,6,0.9697
5,P04626:ERBB2,27,P06241:FYN,2,1.0
6,P04626:ERBB2,27,P08069:IGF1R,9,0.9722
7,P04626:ERBB2,27,Q13153:PAK1,4,0.9677
8,P04626:ERBB2,27,P42680:TEC,5,1.0
9,P04626:ERBB2,27,P21860:ERBB3,19,0.9565


In [29]:
brca_mutex.export_disruptive_mutual_exclusivity_data("dev/mutex", "P04626")

2021-11-13 21:33:00 |[32m INFO     [0m| helpers.helpers_analysis.mutual_exclusivity | Calculating Mutual Exclusivity over P04626's interactors ..
2021-11-13 21:33:00 |[36m DEBUG    [0m| helpers.helpers_analysis.mutual_exclusivity | P04626 have 15 interactors:
['Q96SB4:SRPK1', 'P00533:EGFR', 'P51813:BMX', 'Q14289:PTK2B', 'P16591:FER', 'P06241:FYN', 'P08069:IGF1R', 'Q13153:PAK1', 'P42680:TEC', 'P21860:ERBB3', 'O60674:JAK2', 'P15153:RAC2', 'Q9UJM3:ERRFI1', 'Q92625:ANKS1A', 'P23458:JAK1']
2021-11-13 21:33:00 |[36m DEBUG    [0m| helpers.helpers_analysis.mutual_exclusivity | Exporting Mutual Exclusivity BRCA P04626 ..
2021-11-13 21:33:00 |[32m INFO     [0m| helpers.helpers_analysis.mutual_exclusivity | dev/mutex\BRCA_P04626_ERBB2_2021-11-13.csv is exported successfully.


---

In [17]:
brca_mutex.get_disrupted_interactors("PIK3CA", identifier_type="gene", return_counter=True)

Counter({'P27986:PIK3R1': 124,
         'O00459:PIK3R2': 124,
         'P43403:ZAP70': 119,
         'Q92569:PIK3R3': 5})

Örneğin ERBB2 geni için en sık disrupt olan interactor SRPK1, ERBB2 ile SRPK1 arasındaki mutual exclusivity'yi hesaplayalım. 

S1 ERBB'nun mutasyona uğradığı hasta seti olsun. 
S2 SRPK1'nın mutasyona uğradığı hasta seti olsun. 

P04626 - ERBBR
Q96SB4 - SRPK1

|S1 union S2| / |S1| + |S2| değerini hesaplayalım. 

mutasyona uğradığı derken direk snv datasına bakabiliriz elaspic'i karıştırmadan. 

Aynı hesaplamayı ERBB2'nun bütün interactorları için tekrarlayalım. SRPK1 ile olan mutual exclusivity değeri daha mı iyi görelim.

Bunu analysis.docx'deki tablolarda bulunan genler için tekrarlayalım.

In [11]:
brca_mutex.calculate_mutual_exclusivity("P04626", "Q96SB4")

1.0

## Patient Interaction Data

In [12]:
brca_mutex.patient_interaction_data.head()

Unnamed: 0,PATIENT,PROTEIN_GENE,MUTATION,INTERACTORS,NUM_INTERACTORS,DISRUPTIVE_INTERACTORS,NUM_DISRUPTIVE_INTERACTORS,NON_DISRUPTIVE_INTERACTORS,NUM_NON_DISRUPTIVE_INTERACTORS,CORE_INTERFACE_VS_INTERFACE_STATUS
0,TCGA-D8-A1XY,Q9UKS6:PACSIN3,R24H,"Q9BY11:PACSIN1,Q9UNF0:PACSIN2,Q5TZC3:PACSIN1,Q...",4,"Q9BY11:PACSIN1:0.5,Q9UNF0:PACSIN2:0.59,Q6FIA3:...",3,Q5TZC3:PACSIN1,1,I
1,TCGA-AO-A0J5,Q9UBN7:HDAC6,D567Y,Q96DB2:HDAC11,1,Q96DB2:HDAC11:0.55,1,,0,I
2,TCGA-A8-A093,P28062:PSMB8,R216W,P40306:PSMB10,1,,0,P40306:PSMB10,1,I
3,TCGA-A8-A093,Q15842:KCNJ8,E237K,"Q14654:KCNJ11,P63252:KCNJ2",2,"Q14654:KCNJ11:0.52,P63252:KCNJ2:0.53",2,,0,I
4,TCGA-LD-A74U,O15041:SEMA3E,E364K,Q9Y4D7:PLXND1,1,,0,Q9Y4D7:PLXND1,1,I


In [None]:
    SRPK1

In [15]:
brca_mutex.get_disrupted_interactors(
    "ERBB2", identifier_type="gene", return_counter=True
)

Counter({'P21860:ERBB3': 1,
         'O60674:JAK2': 1,
         'P15153:RAC2': 1,
         'Q96SB4:SRPK1': 7,
         'Q9UJM3:ERRFI1': 1,
         'Q92625:ANKS1A': 1,
         'P23458:JAK1': 1,
         'P00533:EGFR': 2,
         'P51813:BMX': 2,
         'Q14289:PTK2B': 2,
         'P16591:FER': 2,
         'P06241:FYN': 2,
         'P08069:IGF1R': 2,
         'Q13153:PAK1': 2,
         'P42680:TEC': 2})

In [14]:
brca_mutex.get_disrupted_interactors("TP53", identifier_type="gene", return_counter=True)

Counter({'O15350:TP73': 13,
         'Q96KQ7:EHMT2': 14,
         'Q13625:TP53BP2': 42,
         'Q9H9B1:EHMT1': 9,
         'Q9H3D4:TP63': 18,
         'Q96KQ4:PPP1R13B': 42,
         'Q99728:BARD1': 8,
         'O75832:PSMD10': 8,
         'Q9ULT8:HECTD1': 8,
         'H2EHT1:TP53': 16,
         'K7PPA8:TP53': 16,
         'P62993:GRB2': 33,
         'P07948:LYN': 12,
         'Q8WUF5:PPP1R13L': 30,
         'Q06187:BTK': 31})

In [14]:
brca_mutex.get_disrupted_interactors("TP53", identifier_type="gene", return_counter=True)

Counter({'O15350:TP73': 13,
         'Q96KQ7:EHMT2': 14,
         'Q13625:TP53BP2': 42,
         'Q9H9B1:EHMT1': 9,
         'Q9H3D4:TP63': 18,
         'Q96KQ4:PPP1R13B': 42,
         'Q99728:BARD1': 8,
         'O75832:PSMD10': 8,
         'Q9ULT8:HECTD1': 8,
         'H2EHT1:TP53': 16,
         'K7PPA8:TP53': 16,
         'P62993:GRB2': 33,
         'P07948:LYN': 12,
         'Q8WUF5:PPP1R13L': 30,
         'Q06187:BTK': 31})

In [15]:
brca_mutex.get_disrupted_interactors("TP53", identifier_type="gene")

['Q13625:TP53BP2',
 'Q96KQ4:PPP1R13B',
 'P62993:GRB2',
 'Q06187:BTK',
 'Q8WUF5:PPP1R13L',
 'Q9H3D4:TP63',
 'H2EHT1:TP53',
 'K7PPA8:TP53',
 'Q96KQ7:EHMT2',
 'O15350:TP73',
 'P07948:LYN',
 'Q9H9B1:EHMT1',
 'Q99728:BARD1',
 'O75832:PSMD10',
 'Q9ULT8:HECTD1']

In [16]:
brca_mutex.get_disrupted_interactors("TP53", identifier_type="gene", return_counter=True)

Counter({'O15350:TP73': 13,
         'Q96KQ7:EHMT2': 14,
         'Q13625:TP53BP2': 42,
         'Q9H9B1:EHMT1': 9,
         'Q9H3D4:TP63': 18,
         'Q96KQ4:PPP1R13B': 42,
         'Q99728:BARD1': 8,
         'O75832:PSMD10': 8,
         'Q9ULT8:HECTD1': 8,
         'H2EHT1:TP53': 16,
         'K7PPA8:TP53': 16,
         'P62993:GRB2': 33,
         'P07948:LYN': 12,
         'Q8WUF5:PPP1R13L': 30,
         'Q06187:BTK': 31})

In [17]:
brca_mutex.get_disrupted_interactors("H3C1", identifier_type="gene", return_counter=True)

Counter({'P62805:H4C1': 6,
         'Q9UER7:DAXX': 4,
         'P04908:H2AC4': 4,
         'Q16777:H2AC20': 3,
         'P16104:H2AX': 4,
         'P0C0S8:H2AC11': 4,
         'O75367:MACROH2A1': 4,
         'P0C0S5:H2AZ1': 4,
         'Q9Y294:ASF1A': 1,
         'Q9NVP2:ASF1B': 1,
         'P49450:CENPA': 1})

In [18]:
brca_mutex.get_disrupted_interactors("H4C1", identifier_type="gene", return_counter=True)

Counter({'P84243:H3-3A': 7,
         'Q16695:H3-4': 5,
         'Q71DI3:H3C15': 7,
         'P49450:CENPA': 5,
         'P68431:H3C1': 8,
         'Q16514:TAF12': 2,
         'Q9UER7:DAXX': 3,
         'P06899:H2BC11': 1,
         'Q16778:H2BC21': 1,
         'B4E1C1:nan': 4,
         'Q53F85:nan': 3,
         'P04908:H2AC4': 2,
         'P16104:H2AX': 1,
         'P0C0S5:H2AZ1': 2,
         'Q16777:H2AC20': 2,
         'Q9Y294:ASF1A': 1})

In [19]:
brca_mutex.get_disrupted_interactors("MAP2K4", identifier_type="gene", return_counter=True)

Counter({'Q5S007:LRRK2': 6,
         'P52564:MAP2K6': 6,
         'P00533:EGFR': 1,
         'P35916:FLT4': 1,
         'Q99558:MAP3K14': 1,
         'P45984:MAPK9': 1,
         'Q16539:MAPK14': 2})

In [20]:
ProteinIDFetcher().fetch("TP53")

['H2EHT1', 'K7PPA8', 'P04637']

In [21]:
ProteinIDFetcher().fetch("MAP2K4")

['P45985']

In [22]:
ProteinIDFetcher().fetch("ERBB2")

['P04626', 'J3QLU9']

In [23]:
brca_mutex.calculate_mutual_exclusivity("Q96SB4", "P04626")

1.0

In [24]:
brca_mutex.calculate_mutual_exclusivity("P08069", "Q96SB4")

0.9285714285714286

In [25]:
brca_mutex.calculate_mutual_exclusivity("P08069", "P23458")

0.9375

In [26]:
brca_mutex.calculate_mutual_exclusivity("P23458", "P08069")

0.9375