# Mutual Exclusivity

In [1]:
import os
import os.path as op

os.chdir('../')

# os.listdir()

In [2]:
# SNV_PATHS
SNV_COMMON_PATH = "C:/Users/ibrah/Desktop/SNV_data/SNV_datasets/"
BRCA_SNV_PATH = op.join(SNV_COMMON_PATH, "SNV_BRCA_hg38_2021-09-22.csv")

# PATIENT INTERACTION DATA PATHS
BRCA_PATIENT_INTERACTION_DATA_PATH = "dev/BRCA_patient_interactions_analysis_table_2021-11-12.xlsx"

# Reflect changes in the modules immediately.
%load_ext autoreload
%autoreload 2

## BRCA

In [3]:
from helpers.helpers_analysis.mutual_exclusivity import MutualExclusivity
from helpers.helpers_analysis.protein_id_retrieval import ProteinIDFetcher

In [4]:
brca_mutex = MutualExclusivity(
    tcga="BRCA", 
    tcga_snv_path=BRCA_SNV_PATH, 
    patient_interaction_data_path=BRCA_PATIENT_INTERACTION_DATA_PATH
)

2021-11-13 20:57:29 |[36m DEBUG    [0m| helpers.helpers_analysis.mutual_exclusivity | Loading SNV data simplified ..
2021-11-13 20:57:29 |[32m INFO     [0m| helpers.helpers_analysis.loaders | Loading BRCA SNV datasets ..
2021-11-13 20:57:33 |[36m DEBUG    [0m| helpers.helpers_analysis.loaders | BRCA SNV data size: (120988, 121)
2021-11-13 20:57:34 |[36m DEBUG    [0m| helpers.helpers_analysis.loaders | BRCA SNV data processed size: (60251, 121)
2021-11-13 20:57:34 |[32m INFO     [0m| helpers.helpers_analysis.loaders | BRCA SNV datasets are loaded.
2021-11-13 20:57:34 |[36m DEBUG    [0m| helpers.helpers_analysis.mutual_exclusivity | Loading patient ids ..
2021-11-13 20:57:34 |[36m DEBUG    [0m| helpers.helpers_analysis.mutual_exclusivity | Loading patient to snv_data ..


  0%|          | 0/985 [00:00<?, ?it/s]

2021-11-13 20:57:38 |[36m DEBUG    [0m| helpers.helpers_analysis.mutual_exclusivity | patient interaction data patient data ..


In [5]:
len(brca_mutex.patients)

985

In [6]:
len(brca_mutex.get_patients_with("P04637", identifier_type="protein"))

204

In [7]:
len(brca_mutex.get_patients_with("P04626", identifier_type="protein"))

27

In [10]:
ProteinIDFetcher().fetch("ERBB2")

['P04626', 'J3QLU9']

In [11]:
len(brca_mutex.get_patients_with("P04626", identifier_type="protein"))

27

In [12]:
len(brca_mutex.get_patients_with("J3QLU9", identifier_type="protein"))

0

Örneğin ERBB2 geni için en sık disrupt olan interactor SRPK1, ERBB2 ile SRPK1 arasındaki mutual exclusivity'yi hesaplayalım. 

S1 ERBB'nun mutasyona uğradığı hasta seti olsun. 
S2 SRPK1'nın mutasyona uğradığı hasta seti olsun. 

P04626 - ERBBR
Q96SB4 - SRPK1

|S1 union S2| / |S1| + |S2| değerini hesaplayalım. 

mutasyona uğradığı derken direk snv datasına bakabiliriz elaspic'i karıştırmadan. 

Aynı hesaplamayı ERBB2'nun bütün interactorları için tekrarlayalım. SRPK1 ile olan mutual exclusivity değeri daha mı iyi görelim.

Bunu analysis.docx'deki tablolarda bulunan genler için tekrarlayalım.

In [13]:
brca_mutex.calculate_mutual_exclusivity("P04626", "Q96SB4")

1.0

## Patient Interaction Data

In [15]:
brca_mutex.patient_interaction_data.head()

Unnamed: 0,PATIENT,PROTEIN_GENE,MUTATION,INTERACTORS,NUM_INTERACTORS,DISRUPTIVE_INTERACTORS,NUM_DISRUPTIVE_INTERACTORS,NON_DISRUPTIVE_INTERACTORS,NUM_NON_DISRUPTIVE_INTERACTORS,CORE_INTERFACE_VS_INTERFACE_STATUS
0,TCGA-D8-A1XY,Q9UKS6:PACSIN3,R24H,"Q9BY11:PACSIN1,Q9UNF0:PACSIN2,Q5TZC3:PACSIN1,Q...",4,"Q9BY11:PACSIN1:0.5,Q9UNF0:PACSIN2:0.59,Q6FIA3:...",3,Q5TZC3:PACSIN1,1,I
1,TCGA-AO-A0J5,Q9UBN7:HDAC6,D567Y,Q96DB2:HDAC11,1,Q96DB2:HDAC11:0.55,1,,0,I
2,TCGA-A8-A093,P28062:PSMB8,R216W,P40306:PSMB10,1,,0,P40306:PSMB10,1,I
3,TCGA-A8-A093,Q15842:KCNJ8,E237K,"Q14654:KCNJ11,P63252:KCNJ2",2,"Q14654:KCNJ11:0.52,P63252:KCNJ2:0.53",2,,0,I
4,TCGA-LD-A74U,O15041:SEMA3E,E364K,Q9Y4D7:PLXND1,1,,0,Q9Y4D7:PLXND1,1,I


In [16]:
brca_mutex.get_disrupted_interactors("TP53", identifier_type="gene", return_counter=True)

Counter({'O15350:TP73': 13,
         'Q96KQ7:EHMT2': 14,
         'Q13625:TP53BP2': 42,
         'Q9H9B1:EHMT1': 9,
         'Q9H3D4:TP63': 18,
         'Q96KQ4:PPP1R13B': 42,
         'Q99728:BARD1': 8,
         'O75832:PSMD10': 8,
         'Q9ULT8:HECTD1': 8,
         'H2EHT1:TP53': 16,
         'K7PPA8:TP53': 16,
         'P62993:GRB2': 33,
         'P07948:LYN': 12,
         'Q8WUF5:PPP1R13L': 30,
         'Q06187:BTK': 31})

In [17]:
brca_mutex.get_disrupted_interactors("TP53", identifier_type="gene", return_counter=True)

Counter({'O15350:TP73': 13,
         'Q96KQ7:EHMT2': 14,
         'Q13625:TP53BP2': 42,
         'Q9H9B1:EHMT1': 9,
         'Q9H3D4:TP63': 18,
         'Q96KQ4:PPP1R13B': 42,
         'Q99728:BARD1': 8,
         'O75832:PSMD10': 8,
         'Q9ULT8:HECTD1': 8,
         'H2EHT1:TP53': 16,
         'K7PPA8:TP53': 16,
         'P62993:GRB2': 33,
         'P07948:LYN': 12,
         'Q8WUF5:PPP1R13L': 30,
         'Q06187:BTK': 31})

In [18]:
brca_mutex.get_disrupted_interactors("TP53", identifier_type="gene")

['Q13625:TP53BP2',
 'Q96KQ4:PPP1R13B',
 'P62993:GRB2',
 'Q06187:BTK',
 'Q8WUF5:PPP1R13L',
 'Q9H3D4:TP63',
 'H2EHT1:TP53',
 'K7PPA8:TP53',
 'Q96KQ7:EHMT2',
 'O15350:TP73',
 'P07948:LYN',
 'Q9H9B1:EHMT1',
 'Q99728:BARD1',
 'O75832:PSMD10',
 'Q9ULT8:HECTD1']

In [19]:
brca_mutex.get_disrupted_interactors("TP53", identifier_type="gene", return_counter=True)

Counter({'O15350:TP73': 13,
         'Q96KQ7:EHMT2': 14,
         'Q13625:TP53BP2': 42,
         'Q9H9B1:EHMT1': 9,
         'Q9H3D4:TP63': 18,
         'Q96KQ4:PPP1R13B': 42,
         'Q99728:BARD1': 8,
         'O75832:PSMD10': 8,
         'Q9ULT8:HECTD1': 8,
         'H2EHT1:TP53': 16,
         'K7PPA8:TP53': 16,
         'P62993:GRB2': 33,
         'P07948:LYN': 12,
         'Q8WUF5:PPP1R13L': 30,
         'Q06187:BTK': 31})

In [20]:
brca_mutex.get_disrupted_interactors("H3C1", identifier_type="gene", return_counter=True)

Counter({'P62805:H4C1': 6,
         'Q9UER7:DAXX': 4,
         'P04908:H2AC4': 4,
         'Q16777:H2AC20': 3,
         'P16104:H2AX': 4,
         'P0C0S8:H2AC11': 4,
         'O75367:MACROH2A1': 4,
         'P0C0S5:H2AZ1': 4,
         'Q9Y294:ASF1A': 1,
         'Q9NVP2:ASF1B': 1,
         'P49450:CENPA': 1})

In [21]:
brca_mutex.get_disrupted_interactors("H4C1", identifier_type="gene", return_counter=True)

Counter({'P84243:H3-3A': 7,
         'Q16695:H3-4': 5,
         'Q71DI3:H3C15': 7,
         'P49450:CENPA': 5,
         'P68431:H3C1': 8,
         'Q16514:TAF12': 2,
         'Q9UER7:DAXX': 3,
         'P06899:H2BC11': 1,
         'Q16778:H2BC21': 1,
         'B4E1C1:nan': 4,
         'Q53F85:nan': 3,
         'P04908:H2AC4': 2,
         'P16104:H2AX': 1,
         'P0C0S5:H2AZ1': 2,
         'Q16777:H2AC20': 2,
         'Q9Y294:ASF1A': 1})

In [22]:
brca_mutex.get_disrupted_interactors("MAP2K4", identifier_type="gene", return_counter=True)

Counter({'Q5S007:LRRK2': 6,
         'P52564:MAP2K6': 6,
         'P00533:EGFR': 1,
         'P35916:FLT4': 1,
         'Q99558:MAP3K14': 1,
         'P45984:MAPK9': 1,
         'Q16539:MAPK14': 2})

In [24]:
ProteinIDFetcher().fetch("TP53")

['H2EHT1', 'K7PPA8', 'P04637']

In [25]:
ProteinIDFetcher().fetch("MAP2K4")

['P45985']

In [26]:
ProteinIDFetcher().fetch("ERBB2")

['P04626', 'J3QLU9']

In [27]:
brca_mutex.calculate_mutual_exclusivity("Q96SB4", "P04626")

1.0

In [28]:
brca_mutex.calculate_mutual_exclusivity("P08069", "Q96SB4")

0.9285714285714286

In [29]:
brca_mutex.calculate_mutual_exclusivity("P08069", "P23458")

0.9375

In [30]:
brca_mutex.calculate_mutual_exclusivity("P23458", "P08069")

0.9375

## Analysis Genes

### ERBB2

In [32]:
ProteinIDFetcher().fetch("ERBB2")

['P04626', 'J3QLU9']

In [34]:
brca_mutex.get_disruptive_mutual_exclusivity_data("P04626")

2021-11-13 21:11:57 |[32m INFO     [0m| helpers.helpers_analysis.mutual_exclusivity | Calculating Mutual Exclusivity over P04626's interactors ..
2021-11-13 21:11:57 |[36m DEBUG    [0m| helpers.helpers_analysis.mutual_exclusivity | P04626 have 15 interactors:
['Q96SB4:SRPK1', 'P00533:EGFR', 'P51813:BMX', 'Q14289:PTK2B', 'P16591:FER', 'P06241:FYN', 'P08069:IGF1R', 'Q13153:PAK1', 'P42680:TEC', 'P21860:ERBB3', 'O60674:JAK2', 'P15153:RAC2', 'Q9UJM3:ERRFI1', 'Q92625:ANKS1A', 'P23458:JAK1']


Unnamed: 0,PROTEIN:GENE,INTERACTOR,MUTUAL_EXCLUSIVITY
0,P04626:ERBB2,Q96SB4:SRPK1,1.0
1,P04626:ERBB2,P00533:EGFR,1.0
2,P04626:ERBB2,P51813:BMX,0.9697
3,P04626:ERBB2,Q14289:PTK2B,1.0
4,P04626:ERBB2,P16591:FER,0.9697
5,P04626:ERBB2,P06241:FYN,1.0
6,P04626:ERBB2,P08069:IGF1R,0.97222
7,P04626:ERBB2,Q13153:PAK1,0.96774
8,P04626:ERBB2,P42680:TEC,1.0
9,P04626:ERBB2,P21860:ERBB3,0.95652


In [35]:
brca_mutex.export_disruptive_mutual_exclusivity_data("dev/mutex", "P04626")

2021-11-13 21:12:44 |[32m INFO     [0m| helpers.helpers_analysis.mutual_exclusivity | Calculating Mutual Exclusivity over P04626's interactors ..
2021-11-13 21:12:44 |[36m DEBUG    [0m| helpers.helpers_analysis.mutual_exclusivity | P04626 have 15 interactors:
['Q96SB4:SRPK1', 'P00533:EGFR', 'P51813:BMX', 'Q14289:PTK2B', 'P16591:FER', 'P06241:FYN', 'P08069:IGF1R', 'Q13153:PAK1', 'P42680:TEC', 'P21860:ERBB3', 'O60674:JAK2', 'P15153:RAC2', 'Q9UJM3:ERRFI1', 'Q92625:ANKS1A', 'P23458:JAK1']
2021-11-13 21:12:45 |[36m DEBUG    [0m| helpers.helpers_analysis.mutual_exclusivity | Exporting Mutual Exclusivity BRCA P04626 ..
2021-11-13 21:12:45 |[32m INFO     [0m| helpers.helpers_analysis.mutual_exclusivity | dev/mutex\BRCA_P04626_ERBB2_2021-11-13.csv is exported successfully.


### TP53

In [37]:
ProteinIDFetcher().fetch("TP53")

['H2EHT1', 'K7PPA8', 'P04637']

In [38]:
brca_mutex.get_disruptive_mutual_exclusivity_data("P04637")

2021-11-13 21:13:12 |[32m INFO     [0m| helpers.helpers_analysis.mutual_exclusivity | Calculating Mutual Exclusivity over P04637's interactors ..
2021-11-13 21:13:12 |[36m DEBUG    [0m| helpers.helpers_analysis.mutual_exclusivity | P04637 have 15 interactors:
['Q13625:TP53BP2', 'Q96KQ4:PPP1R13B', 'P62993:GRB2', 'Q06187:BTK', 'Q8WUF5:PPP1R13L', 'Q9H3D4:TP63', 'H2EHT1:TP53', 'K7PPA8:TP53', 'Q96KQ7:EHMT2', 'O15350:TP73', 'P07948:LYN', 'Q9H9B1:EHMT1', 'Q99728:BARD1', 'O75832:PSMD10', 'Q9ULT8:HECTD1']


Unnamed: 0,PROTEIN:GENE,INTERACTOR,MUTUAL_EXCLUSIVITY
0,P04637:TP53,Q13625:TP53BP2,0.99528
1,P04637:TP53,Q96KQ4:PPP1R13B,1.0
2,P04637:TP53,P62993:GRB2,1.0
3,P04637:TP53,Q06187:BTK,0.99526
4,P04637:TP53,Q8WUF5:PPP1R13L,1.0
5,P04637:TP53,Q9H3D4:TP63,0.99515
6,P04637:TP53,H2EHT1:TP53,1.0
7,P04637:TP53,K7PPA8:TP53,1.0
8,P04637:TP53,Q96KQ7:EHMT2,1.0
9,P04637:TP53,O15350:TP73,1.0


In [40]:
brca_mutex.export_disruptive_mutual_exclusivity_data("dev/mutex", "P04637")

2021-11-13 21:13:20 |[32m INFO     [0m| helpers.helpers_analysis.mutual_exclusivity | Calculating Mutual Exclusivity over P04637's interactors ..
2021-11-13 21:13:20 |[36m DEBUG    [0m| helpers.helpers_analysis.mutual_exclusivity | P04637 have 15 interactors:
['Q13625:TP53BP2', 'Q96KQ4:PPP1R13B', 'P62993:GRB2', 'Q06187:BTK', 'Q8WUF5:PPP1R13L', 'Q9H3D4:TP63', 'H2EHT1:TP53', 'K7PPA8:TP53', 'Q96KQ7:EHMT2', 'O15350:TP73', 'P07948:LYN', 'Q9H9B1:EHMT1', 'Q99728:BARD1', 'O75832:PSMD10', 'Q9ULT8:HECTD1']
2021-11-13 21:13:20 |[36m DEBUG    [0m| helpers.helpers_analysis.mutual_exclusivity | Exporting Mutual Exclusivity BRCA P04637 ..
To overwrite existing file, use `overwrite=True`.


### H3C1

In [43]:
ProteinIDFetcher().fetch("H3C1")

['P68431']

In [44]:
brca_mutex.get_disruptive_mutual_exclusivity_data("P68431")

2021-11-13 21:14:43 |[32m INFO     [0m| helpers.helpers_analysis.mutual_exclusivity | Calculating Mutual Exclusivity over P68431's interactors ..
2021-11-13 21:14:43 |[36m DEBUG    [0m| helpers.helpers_analysis.mutual_exclusivity | P68431 have 11 interactors:
['P62805:H4C1', 'Q9UER7:DAXX', 'P04908:H2AC4', 'P16104:H2AX', 'P0C0S8:H2AC11', 'O75367:MACROH2A1', 'P0C0S5:H2AZ1', 'Q16777:H2AC20', 'Q9Y294:ASF1A', 'Q9NVP2:ASF1B', 'P49450:CENPA']


Unnamed: 0,PROTEIN:GENE,INTERACTOR,MUTUAL_EXCLUSIVITY
0,P68431:H3C1,P62805:H4C1,0.97872
1,P68431:H3C1,Q9UER7:DAXX,1.0
2,P68431:H3C1,P04908:H2AC4,0.97674
3,P68431:H3C1,P16104:H2AX,1.0
4,P68431:H3C1,P0C0S8:H2AC11,0.97619
5,P68431:H3C1,O75367:MACROH2A1,1.0
6,P68431:H3C1,P0C0S5:H2AZ1,0.96875
7,P68431:H3C1,Q16777:H2AC20,1.0
8,P68431:H3C1,Q9Y294:ASF1A,1.0
9,P68431:H3C1,Q9NVP2:ASF1B,1.0


In [40]:
brca_mutex.export_disruptive_mutual_exclusivity_data("dev/mutex", "P68431")

2021-11-13 21:13:20 |[32m INFO     [0m| helpers.helpers_analysis.mutual_exclusivity | Calculating Mutual Exclusivity over P04637's interactors ..
2021-11-13 21:13:20 |[36m DEBUG    [0m| helpers.helpers_analysis.mutual_exclusivity | P04637 have 15 interactors:
['Q13625:TP53BP2', 'Q96KQ4:PPP1R13B', 'P62993:GRB2', 'Q06187:BTK', 'Q8WUF5:PPP1R13L', 'Q9H3D4:TP63', 'H2EHT1:TP53', 'K7PPA8:TP53', 'Q96KQ7:EHMT2', 'O15350:TP73', 'P07948:LYN', 'Q9H9B1:EHMT1', 'Q99728:BARD1', 'O75832:PSMD10', 'Q9ULT8:HECTD1']
2021-11-13 21:13:20 |[36m DEBUG    [0m| helpers.helpers_analysis.mutual_exclusivity | Exporting Mutual Exclusivity BRCA P04637 ..
To overwrite existing file, use `overwrite=True`.


### H4C1

In [45]:
ProteinIDFetcher().fetch("H4C1")

['P62805']

In [46]:
brca_mutex.get_disruptive_mutual_exclusivity_data("P62805")

2021-11-13 21:16:26 |[32m INFO     [0m| helpers.helpers_analysis.mutual_exclusivity | Calculating Mutual Exclusivity over P62805's interactors ..
2021-11-13 21:16:26 |[36m DEBUG    [0m| helpers.helpers_analysis.mutual_exclusivity | P62805 have 16 interactors:
['P68431:H3C1', 'P84243:H3-3A', 'Q71DI3:H3C15', 'Q16695:H3-4', 'P49450:CENPA', 'B4E1C1:nan', 'Q9UER7:DAXX', 'Q53F85:nan', 'Q16514:TAF12', 'P04908:H2AC4', 'P0C0S5:H2AZ1', 'Q16777:H2AC20', 'P06899:H2BC11', 'Q16778:H2BC21', 'P16104:H2AX', 'Q9Y294:ASF1A']


Unnamed: 0,PROTEIN:GENE,INTERACTOR,MUTUAL_EXCLUSIVITY
0,P62805:H4C1,P68431:H3C1,0.97872
1,P62805:H4C1,P84243:H3-3A,1.0
2,P62805:H4C1,Q71DI3:H3C15,1.0
3,P62805:H4C1,Q16695:H3-4,1.0
4,P62805:H4C1,P49450:CENPA,1.0
5,P62805:H4C1,B4E1C1:nan,1.0
6,P62805:H4C1,Q9UER7:DAXX,0.95455
7,P62805:H4C1,Q53F85:nan,1.0
8,P62805:H4C1,Q16514:TAF12,1.0
9,P62805:H4C1,P04908:H2AC4,0.96875


In [40]:
brca_mutex.export_disruptive_mutual_exclusivity_data("dev/mutex", "P62805")

2021-11-13 21:13:20 |[32m INFO     [0m| helpers.helpers_analysis.mutual_exclusivity | Calculating Mutual Exclusivity over P04637's interactors ..
2021-11-13 21:13:20 |[36m DEBUG    [0m| helpers.helpers_analysis.mutual_exclusivity | P04637 have 15 interactors:
['Q13625:TP53BP2', 'Q96KQ4:PPP1R13B', 'P62993:GRB2', 'Q06187:BTK', 'Q8WUF5:PPP1R13L', 'Q9H3D4:TP63', 'H2EHT1:TP53', 'K7PPA8:TP53', 'Q96KQ7:EHMT2', 'O15350:TP73', 'P07948:LYN', 'Q9H9B1:EHMT1', 'Q99728:BARD1', 'O75832:PSMD10', 'Q9ULT8:HECTD1']
2021-11-13 21:13:20 |[36m DEBUG    [0m| helpers.helpers_analysis.mutual_exclusivity | Exporting Mutual Exclusivity BRCA P04637 ..
To overwrite existing file, use `overwrite=True`.


### MAP2K4

In [47]:
ProteinIDFetcher().fetch("MAP2K4")

['P45985']

In [46]:
brca_mutex.get_disruptive_mutual_exclusivity_data("P45985")

2021-11-13 21:16:26 |[32m INFO     [0m| helpers.helpers_analysis.mutual_exclusivity | Calculating Mutual Exclusivity over P62805's interactors ..
2021-11-13 21:16:26 |[36m DEBUG    [0m| helpers.helpers_analysis.mutual_exclusivity | P62805 have 16 interactors:
['P68431:H3C1', 'P84243:H3-3A', 'Q71DI3:H3C15', 'Q16695:H3-4', 'P49450:CENPA', 'B4E1C1:nan', 'Q9UER7:DAXX', 'Q53F85:nan', 'Q16514:TAF12', 'P04908:H2AC4', 'P0C0S5:H2AZ1', 'Q16777:H2AC20', 'P06899:H2BC11', 'Q16778:H2BC21', 'P16104:H2AX', 'Q9Y294:ASF1A']


Unnamed: 0,PROTEIN:GENE,INTERACTOR,MUTUAL_EXCLUSIVITY
0,P62805:H4C1,P68431:H3C1,0.97872
1,P62805:H4C1,P84243:H3-3A,1.0
2,P62805:H4C1,Q71DI3:H3C15,1.0
3,P62805:H4C1,Q16695:H3-4,1.0
4,P62805:H4C1,P49450:CENPA,1.0
5,P62805:H4C1,B4E1C1:nan,1.0
6,P62805:H4C1,Q9UER7:DAXX,0.95455
7,P62805:H4C1,Q53F85:nan,1.0
8,P62805:H4C1,Q16514:TAF12,1.0
9,P62805:H4C1,P04908:H2AC4,0.96875


In [48]:
brca_mutex.export_disruptive_mutual_exclusivity_data("dev/mutex", "P45985")

2021-11-13 21:16:47 |[32m INFO     [0m| helpers.helpers_analysis.mutual_exclusivity | Calculating Mutual Exclusivity over P45985's interactors ..
2021-11-13 21:16:47 |[36m DEBUG    [0m| helpers.helpers_analysis.mutual_exclusivity | P45985 have 7 interactors:
['Q5S007:LRRK2', 'P52564:MAP2K6', 'Q16539:MAPK14', 'P00533:EGFR', 'P35916:FLT4', 'Q99558:MAP3K14', 'P45984:MAPK9']
2021-11-13 21:16:47 |[36m DEBUG    [0m| helpers.helpers_analysis.mutual_exclusivity | Exporting Mutual Exclusivity BRCA P45985 ..
2021-11-13 21:16:47 |[32m INFO     [0m| helpers.helpers_analysis.mutual_exclusivity | dev/mutex\BRCA_P45985_MAP2K4_2021-11-13.csv is exported successfully.


## OV

---

In [21]:
tp53_disruptive_interactions_series = brca_mutex.patient_interaction_data[
    brca_mutex.patient_interaction_data["PROTEIN_GENE"].apply(lambda x: x.split(":")[1] == "TP53")
]["DISRUPTIVE_INTERACTORS"]

In [25]:
tp53_disruptive_interactions_series = tp53_disruptive_interactions_series.dropna()
tp53_disruptive_interactions_series = tp53_disruptive_interactions_series.apply(lambda x: x.split(",")).explode()
tp53_disruptive_interactions_series = tp53_disruptive_interactions_series.apply(lambda x: ":".join(x.split(":")[:-1]))

In [35]:
Counter(tp53_disruptive_interactions_series).most_common()

[('Q13625:TP53BP2', 42),
 ('Q96KQ4:PPP1R13B', 42),
 ('P62993:GRB2', 33),
 ('Q06187:BTK', 31),
 ('Q8WUF5:PPP1R13L', 30),
 ('Q9H3D4:TP63', 18),
 ('H2EHT1:TP53', 16),
 ('K7PPA8:TP53', 16),
 ('Q96KQ7:EHMT2', 14),
 ('O15350:TP73', 13),
 ('P07948:LYN', 12),
 ('Q9H9B1:EHMT1', 9),
 ('Q99728:BARD1', 8),
 ('O75832:PSMD10', 8),
 ('Q9ULT8:HECTD1', 8)]

In [29]:
":".join("O15350:TP73:0.77".split(":")[:-1])

'O15350:TP73'

In [13]:
brca_mutex.get_disrupted_interactors("MAP2K4", identifier_type="gene", return_counter=True)

Counter({'Q5S007:LRRK2': 5, 'P00533:EGFR': 1, 'Q16539:MAPK14': 1})

In [14]:
brca_mutex.get_disrupted_interactors("MAP2K4", identifier_type="gene", return_counter=True)

Counter({'Q5S007:LRRK2': 7,
         'P52564:MAP2K6': 7,
         'P00533:EGFR': 1,
         'P35916:FLT4': 1,
         'Q99558:MAP3K14': 1,
         'P45984:MAPK9': 1,
         'Q16539:MAPK14': 2})

In [16]:
brca_mutex.get_disrupted_interactors("MAP2K4", identifier_type="gene", return_counter=True)

Counter({'Q5S007:LRRK2': 7,
         'P52564:MAP2K6': 7,
         'P00533:EGFR': 1,
         'P35916:FLT4': 1,
         'Q99558:MAP3K14': 1,
         'P45984:MAPK9': 1,
         'Q16539:MAPK14': 2})

In [34]:
brca_mutex.get_disrupted_interactors("P04637", identifier_type="protein")

['Q13625:TP53BP2:0.89',
 'Q06187:BTK:0.79',
 'P62993:GRB2:0.71',
 'Q96KQ4:PPP1R13B:0.87',
 'Q8WUF5:PPP1R13L:0.59',
 'Q96KQ4:PPP1R13B:0.78',
 'Q96KQ4:PPP1R13B:0.81',
 'Q96KQ7:EHMT2:0.66',
 'Q9ULT8:HECTD1:0.73',
 'Q13625:TP53BP2:0.94',
 'P62993:GRB2:0.73',
 'O15350:TP73:0.77',
 'Q13625:TP53BP2:0.83',
 'Q9H9B1:EHMT1:0.69',
 'Q9H3D4:TP63:0.76',
 'O15350:TP73:0.83',
 'Q99728:BARD1:0.76',
 'O75832:PSMD10:0.76',
 'Q96KQ7:EHMT2:0.68',
 'H2EHT1:TP53:0.83',
 'K7PPA8:TP53:0.79',
 'Q9H3D4:TP63:0.77',
 'P07948:LYN:0.77',
 'Q8WUF5:PPP1R13L:0.85',
 'P62993:GRB2:0.75',
 'Q8WUF5:PPP1R13L:0.73',
 'Q06187:BTK:0.8',
 'H2EHT1:TP53:0.8',
 'K7PPA8:TP53:0.8',
 'Q8WUF5:PPP1R13L:0.86',
 'Q96KQ4:PPP1R13B:0.85',
 'Q06187:BTK:0.73',
 'Q13625:TP53BP2:0.65',
 'Q96KQ4:PPP1R13B:0.61',
 'P07948:LYN:0.51',
 'P62993:GRB2:0.79',
 'Q06187:BTK:0.81',
 'Q13625:TP53BP2:0.88',
 'Q06187:BTK:0.77',
 'K7PPA8:TP53:0.83',
 'P07948:LYN:0.85',
 'P62993:GRB2:0.64',
 'P62993:GRB2:0.78',
 'Q96KQ4:PPP1R13B:0.79',
 'Q13625:TP53BP2:0.66',


In [35]:
brca_mutex.get_disrupted_interactors("P04637", identifier_type="protein", return_counter=True)

Counter({'O15350:TP73:0.77': 6,
         'Q96KQ7:EHMT2:0.66': 7,
         'Q13625:TP53BP2:0.83': 6,
         'Q9H9B1:EHMT1:0.69': 6,
         'Q9H3D4:TP63:0.76': 6,
         'O15350:TP73:0.83': 6,
         'Q96KQ4:PPP1R13B:0.78': 8,
         'Q99728:BARD1:0.76': 6,
         'O75832:PSMD10:0.76': 6,
         'Q9ULT8:HECTD1:0.73': 7,
         'Q96KQ7:EHMT2:0.68': 6,
         'H2EHT1:TP53:0.83': 6,
         'K7PPA8:TP53:0.79': 6,
         'P62993:GRB2:0.64': 1,
         'Q13625:TP53BP2:0.65': 3,
         'Q96KQ4:PPP1R13B:0.61': 3,
         'P07948:LYN:0.51': 3,
         'Q13625:TP53BP2:0.88': 2,
         'P62993:GRB2:0.78': 1,
         'Q96KQ4:PPP1R13B:0.79': 1,
         'Q8WUF5:PPP1R13L:0.86': 4,
         'Q06187:BTK:0.79': 13,
         'Q13625:TP53BP2:0.89': 17,
         'P62993:GRB2:0.71': 12,
         'Q96KQ4:PPP1R13B:0.87': 12,
         'Q8WUF5:PPP1R13L:0.59': 12,
         'Q13625:TP53BP2:0.94': 7,
         'P62993:GRB2:0.79': 3,
         'Q96KQ4:PPP1R13B:0.81': 8,
         'Q06187:B

In [21]:
brca_mutex.patient_interaction_data["DISRUPTIVE_INTERACTORS"]

0       Q9BY11:PACSIN1:0.5,Q9UNF0:PACSIN2:0.59,Q6FIA3:...
1                                      Q96DB2:HDAC11:0.55
2                                                     NaN
3                    Q14654:KCNJ11:0.52,P63252:KCNJ2:0.53
4                                                     NaN
                              ...                        
1707    P61088:UBE2N:0.5,P51668:UBE2D1:0.51,P62837:UBE...
1708                                                  NaN
1709                                                  NaN
1710                                                  NaN
1711                                                  NaN
Name: DISRUPTIVE_INTERACTORS, Length: 1712, dtype: object

In [22]:
P04626_ERBB2_interactors_series = brca_patient_data[
    brca_patient_data["PROTEIN_GENE"] == "P04626:ERBB2"
]["INTERACTORS"]

In [28]:
P04626_ERBB2_interactors = list(P04626_ERBB2_interactors_series.apply(lambda x: x.split(',')).explode())

In [39]:
P04626_ERBB2_interactors_counter = Counter(P04626_ERBB2_interactors)
P04626_ERBB2_interactors_counter.most_common()

[('Q96SB4:SRPK1', 8),
 ('Q13153:PAK1', 3),
 ('P21860:ERBB3', 2),
 ('P00533:EGFR', 2),
 ('P51813:BMX', 2),
 ('Q14289:PTK2B', 2),
 ('P16591:FER', 2),
 ('P06241:FYN', 2),
 ('P08069:IGF1R', 2),
 ('P42680:TEC', 2),
 ('P36897:TGFBR1', 2),
 ('O60674:JAK2', 1),
 ('P15153:RAC2', 1),
 ('Q9UJM3:ERRFI1', 1),
 ('Q92625:ANKS1A', 1),
 ('P23458:JAK1', 1)]

In [45]:
P04626_ERBB2_interactors_unique = [e.split(':')[0] for e, _ in P04626_ERBB2_interactors_counter.most_common()]
P04626_ERBB2_interactors_unique[:5]

['Q96SB4', 'Q13153', 'P21860', 'P00533', 'P51813']

In [46]:
for interactor in P04626_ERBB2_interactors_unique:
    print(brca_mutex.calculate_mutual_exclusivity("P04626", interactor))

1.0
0.967741935483871
0.9565217391304348
1.0
0.9696969696969697
1.0
0.9696969696969697
1.0
0.9722222222222222
1.0
1.0
1.0
1.0
1.0
1.0
0.9705882352941176


In [None]:
P04626_ERBB2_interactors_unique = set(P04626_ERBB2_interactors)

In [38]:
sorted(P04626_ERBB2_interactors_counter.keys())

['O60674:JAK2',
 'P00533:EGFR',
 'P06241:FYN',
 'P08069:IGF1R',
 'P15153:RAC2',
 'P16591:FER',
 'P21860:ERBB3',
 'P23458:JAK1',
 'P36897:TGFBR1',
 'P42680:TEC',
 'P51813:BMX',
 'Q13153:PAK1',
 'Q14289:PTK2B',
 'Q92625:ANKS1A',
 'Q96SB4:SRPK1',
 'Q9UJM3:ERRFI1']

In [29]:
P04626_ERBB2_interactors

['P21860:ERBB3',
 'O60674:JAK2',
 'P15153:RAC2',
 'Q96SB4:SRPK1',
 'Q9UJM3:ERRFI1',
 'Q92625:ANKS1A',
 'P23458:JAK1',
 'Q96SB4:SRPK1',
 'Q96SB4:SRPK1',
 'Q13153:PAK1',
 'P21860:ERBB3',
 'Q96SB4:SRPK1',
 'Q96SB4:SRPK1',
 'Q96SB4:SRPK1',
 'Q96SB4:SRPK1',
 'P00533:EGFR',
 'Q96SB4:SRPK1',
 'P51813:BMX',
 'Q14289:PTK2B',
 'P16591:FER',
 'P06241:FYN',
 'P08069:IGF1R',
 'Q13153:PAK1',
 'P42680:TEC',
 'P36897:TGFBR1',
 'P00533:EGFR',
 'P51813:BMX',
 'Q14289:PTK2B',
 'P16591:FER',
 'P06241:FYN',
 'P08069:IGF1R',
 'Q13153:PAK1',
 'P42680:TEC',
 'P36897:TGFBR1']

In [18]:
brca_patient_data[
    (brca_patient_data["PROTEIN_GENE"] == "P04626:ERBB2")
]["PATIENT"].nunique()

14

In [42]:
brca_mutex.snv_data[
    (brca_mutex.snv_data["Tumor_Sample_Barcode"] == 'TCGA-EW-A1PD') &
    (brca_mutex.snv_data["SWISSPROT"] == "P04626")
]

Unnamed: 0,Hugo_Symbol,SWISSPROT,HGVSp_Short,Tumor_Sample_Barcode
34918,ERBB2,P04626,H470Q,TCGA-EW-A1PD


In [None]:
'TCGA-EW-A1PD'

In [31]:
brca_patient_data["PATIENT"].nunique()

612

In [45]:
brca_prediction = pd.read_csv("../data/predictions_datasets/brca_prediction_2021-11-11/8859d2cb/predictions_soft_2021-11-11.csv")
brca_prediction[
    (brca_prediction["UniProt_ID"] == "P04626") &
    (brca_prediction["Mutation"] == "H470Q")
]

Unnamed: 0,UniProt_ID,Mutation,Interactor_UniProt_ID,Prediction,Median_Probability
