# Disruptive Interactions For Each Patient

For each patient, disruptive predicted interactions will be displayed.

**Example**

* `TCGA-XXXX-XXXX-XX`:


In [1]:
import os

os.chdir('../')

# os.listdir()

In [2]:
from helpers.helpers_analysis.interactions_per_patient import InteractionsPerPatient
from helpers.helpers_analysis.get_patient_protein_to_mutations_dict import get_patient_protein_to_mutations_dict

# PREDICTION_COMMON_PATH = r"../../data/predictions_datasets"
# PREDICTION_DATA_PATH = os.path.join(
#     PREDICTION_COMMON_PATH, r"brca_prediction_2021-09-28/acf35ed1/predictions_soft_2021-09-28.csv"
# )

BRCA_PREDICTION_ID = "acf35ed1/"
BRCA_PREDICTIONS_COMMON_PATH = "../data/predictions_datasets/brca_prediction_2021-09-28/" + BRCA_PREDICTION_ID
PREDICTION_BRCA_REDUCED_PATH = BRCA_PREDICTIONS_COMMON_PATH + "predictions_soft_2021-09-28.csv"

OV_PREDICTION_ID = "d872749a/"
OV_PREDICTIONS_COMMON_PATH = "../data/predictions_datasets/ov_prediction_2021-09-28/" + OV_PREDICTION_ID
PREDICTION_OV_REDUCED_PATH = OV_PREDICTIONS_COMMON_PATH + "predictions_soft_2021-09-28.csv"

SNV_COMMON_PATH = "C:/Users/ibrah/Desktop/TUSEB_Study/Data_Collection_and_Filtering/SNV/"
BRCA_SNV_PATH = os.path.join(SNV_COMMON_PATH, "SNV_BRCA_hg38.csv")
OV_SNV_PATH = os.path.join(SNV_COMMON_PATH, "SNV_OV_hg38.csv")

# Reflect changes in the modules immediately.
%load_ext autoreload
%autoreload 2

## BRCA

### Instantiate the Class `InteractionsPerPatient`

In [3]:
brca_interactions_per_patient = InteractionsPerPatient(
    tcga="BRCA",
    prediction_data_path=PREDICTION_BRCA_REDUCED_PATH,
    tcga_snv_path=BRCA_SNV_PATH,
    identifier="uniprot",
    verbose=False,
)

2021-10-06 12:58:54 |[32m INFO     [0m| helpers.helpers_analysis.interactions_per_patient | Loading materials ..
2021-10-06 12:58:54 |[36m DEBUG    [0m| helpers.helpers_analysis.interactions_per_patient | Loading SNV data simplified ..
2021-10-06 12:58:54 |[32m INFO     [0m| helpers.helpers_analysis.loaders | Loading BRCA SNV datasets ..
2021-10-06 12:58:59 |[36m DEBUG    [0m| helpers.helpers_analysis.loaders | BRCA SNV data size: (120988, 121)
2021-10-06 12:58:59 |[36m DEBUG    [0m| helpers.helpers_analysis.loaders | BRCA SNV data processed size: (60278, 121)
2021-10-06 12:58:59 |[32m INFO     [0m| helpers.helpers_analysis.loaders | BRCA SNV datasets are loaded.
2021-10-06 12:58:59 |[36m DEBUG    [0m| helpers.helpers_analysis.interactions_per_patient | Loading patient ids ..
2021-10-06 12:58:59 |[36m DEBUG    [0m| helpers.helpers_analysis.interactions_per_patient | Loading patient to snv_data ..


  0%|          | 0/985 [00:00<?, ?it/s]

2021-10-06 12:59:04 |[36m DEBUG    [0m| helpers.helpers_analysis.interactions_per_patient | Loading the prediction data ..
2021-10-06 12:59:04 |[36m DEBUG    [0m| helpers.helpers_analysis.interactions_per_patient | Loading the disruptive prediction data ..
2021-10-06 12:59:04 |[32m INFO     [0m| helpers.helpers_analysis.interactions_per_patient | Loading UniProt ID to Gene ID ..


Retrieving Gene IDs from UniProt API .. :   0%|          | 0/2512 [00:00<?, ?it/s]

2021-10-06 12:59:05 |[32m INFO     [0m| helpers.helpers_analysis.interactions_per_patient | `uniprot_to_gene_id` loaded. 
2021-10-06 12:59:05 |[32m INFO     [0m| helpers.helpers_analysis.interactions_per_patient | Materials loaded.
2021-10-06 12:59:05 |[32m INFO     [0m| helpers.helpers_analysis.interactions_per_patient | Number of BRCA patients: None.
2021-10-06 12:59:05 |[32m INFO     [0m| helpers.helpers_analysis.interactions_per_patient | Finding all interactions (disruptive and non-disruptive) for each patient ..


  0%|          | 0/985 [00:00<?, ?it/s]

2021-10-06 13:00:31 |[32m INFO     [0m| helpers.helpers_analysis.interactions_per_patient | Finding disruptive interactions for each patient ..


  0%|          | 0/985 [00:00<?, ?it/s]

2021-10-06 13:01:49 |[32m INFO     [0m| helpers.helpers_analysis.interactions_per_patient | Disruptive interactions per patient completed.


In [4]:
brca_interactions_per_patient.print_disruptive_interactions_per_patient()

TCGA-D8-A1XY -> [('Q9UKS6', 'R24H', 'Q9BY11'), ('Q9UKS6', 'R24H', 'Q9UNF0'), ('Q9UKS6', 'R24H', 'Q5TZC3'), ('Q9UKS6', 'R24H', 'Q6FIA3')]
TCGA-AO-A0J5 -> [('Q9UBN7', 'D567Y', 'Q96DB2')]
TCGA-A8-A093 -> [('P28062', 'R216W', 'P40306'), ('Q15842', 'E237K', 'Q14654'), ('Q15842', 'E237K', 'P63252')]
TCGA-C8-A133 -> []
TCGA-AQ-A54O -> []
TCGA-A8-A07G -> []
TCGA-LD-A74U -> [('O43684', 'D117G', 'Q12834')]
TCGA-OL-A5RV -> [('Q01196', 'G95R', 'Q13951')]
TCGA-AR-A24V -> []
TCGA-PL-A8LX -> []
TCGA-BH-A0W5 -> []
TCGA-BH-A18F -> [('Q12840', 'G36R', 'O60282')]
TCGA-HN-A2OB -> []
TCGA-BH-A2L8 -> [('Q71DI3', 'E98Q', 'P62805'), ('Q14469', 'R46Q', 'Q9UBP5'), ('P20810', 'D466H', 'P17655'), ('P20810', 'D466H', 'P07384'), ('P20810', 'D466H', 'B2RDI5'), ('P20810', 'D466H', 'B4DWH5'), ('P67775', 'E297K', 'P62136'), ('O15111', 'M65I', 'O43353'), ('Q9H4L4', 'Q399H', 'P55854'), ('Q7RTN6', 'R226G', 'Q15831'), ('Q7RTN6', 'R226G', 'Q9Y376'), ('Q9HAP6', 'S115L', 'P78352'), ('Q9UHP3', 'S507L', 'P54725'), ('O14936', 'M

In [5]:
brca_interactions_per_patient.construct_analysis_table()

2021-10-06 13:01:49 |[32m INFO     [0m| helpers.helpers_analysis.interactions_per_patient | Constructing the analysis table ..


  0%|          | 0/985 [00:00<?, ?it/s]

TCGA-D8-A1XY	Q9UKS6	R24H	All interactors: ['Q9BY11', 'Q9UNF0', 'Q5TZC3', 'Q6FIA3']
----------------------------------------------------------------------------------------------------
TCGA-AO-A0J5	Q9UBN7	D567Y	All interactors: ['Q96DB2']
----------------------------------------------------------------------------------------------------
TCGA-A8-A093	P28062	R216W	All interactors: ['P40306']
TCGA-A8-A093	Q15842	E237K	All interactors: ['Q14654', 'P63252']
----------------------------------------------------------------------------------------------------
----------------------------------------------------------------------------------------------------
----------------------------------------------------------------------------------------------------
----------------------------------------------------------------------------------------------------
TCGA-LD-A74U	O43684	D117G	All interactors: ['Q12834']
-------------------------------------------------------------------------------------

In [6]:
brca_interactions_per_patient.analysis_table.head()

Unnamed: 0,PATIENT,PROTEIN_GENE,MUTATION,INTERACTORS,NUM_INTERACTORS,DISRUPTIVE_INTERACTORS,NUM_DISRUPTIVE_INTERACTORS,NON_DISRUPTIVE_INTERACTORS,NUM_NON_DISRUPTIVE_INTERACTORS
0,TCGA-D8-A1XY,Q9UKS6:PACSIN3,R24H,"Q9BY11:PACSIN1,Q9UNF0:PACSIN2,Q5TZC3:PACSIN1,Q...",4,"Q9BY11:PACSIN1,Q9UNF0:PACSIN2,Q5TZC3:PACSIN1,Q...",4,,0
1,TCGA-AO-A0J5,Q9UBN7:HDAC6,D567Y,Q96DB2:HDAC11,1,Q96DB2:HDAC11,1,,0
2,TCGA-A8-A093,P28062:PSMB8,R216W,P40306:PSMB10,1,P40306:PSMB10,1,,0
3,TCGA-A8-A093,Q15842:KCNJ8,E237K,"Q14654:KCNJ11,P63252:KCNJ2",2,"Q14654:KCNJ11,P63252:KCNJ2",2,,0
4,TCGA-LD-A74U,O43684:BUB3,D117G,Q12834:CDC20,1,Q12834:CDC20,1,,0


In [7]:
brca_interactions_per_patient.extract(folder="dev")

2021-10-06 13:01:54 |[32m INFO     [0m| helpers.helpers_analysis.interactions_per_patient | dev\BRCA_patient_interactions_analysis_table_2021-10-06.xlsx is exported.


## OV

### Instantiate the Class `InteractionsPerPatient`

In [8]:
ov_interactions_per_patient = InteractionsPerPatient(
    tcga="OV",
    prediction_data_path=PREDICTION_OV_REDUCED_PATH,
    tcga_snv_path=OV_SNV_PATH,
    identifier="uniprot",
    verbose=False,
)

2021-10-06 13:01:54 |[32m INFO     [0m| helpers.helpers_analysis.interactions_per_patient | Loading materials ..
2021-10-06 13:01:54 |[36m DEBUG    [0m| helpers.helpers_analysis.interactions_per_patient | Loading SNV data simplified ..
2021-10-06 13:01:54 |[32m INFO     [0m| helpers.helpers_analysis.loaders | Loading OV SNV datasets ..
2021-10-06 13:01:57 |[36m DEBUG    [0m| helpers.helpers_analysis.loaders | OV SNV data size: (75168, 121)
2021-10-06 13:01:57 |[36m DEBUG    [0m| helpers.helpers_analysis.loaders | OV SNV data processed size: (38508, 121)
2021-10-06 13:01:58 |[32m INFO     [0m| helpers.helpers_analysis.loaders | OV SNV datasets are loaded.
2021-10-06 13:01:58 |[36m DEBUG    [0m| helpers.helpers_analysis.interactions_per_patient | Loading patient ids ..
2021-10-06 13:01:58 |[36m DEBUG    [0m| helpers.helpers_analysis.interactions_per_patient | Loading patient to snv_data ..


  0%|          | 0/436 [00:00<?, ?it/s]

2021-10-06 13:01:59 |[36m DEBUG    [0m| helpers.helpers_analysis.interactions_per_patient | Loading the prediction data ..
2021-10-06 13:01:59 |[36m DEBUG    [0m| helpers.helpers_analysis.interactions_per_patient | Loading the disruptive prediction data ..
2021-10-06 13:01:59 |[32m INFO     [0m| helpers.helpers_analysis.interactions_per_patient | Loading UniProt ID to Gene ID ..


Retrieving Gene IDs from UniProt API .. :   0%|          | 0/2012 [00:00<?, ?it/s]

2021-10-06 13:06:19 |[31m[47m[1m CRITICAL [0m| helpers.helpers_analysis.gene_id_retrieval | COULD NOT RETRIEVE GENE FOR PROTEIN: Q13748
2021-10-06 13:09:28 |[32m INFO     [0m| helpers.helpers_analysis.interactions_per_patient | `uniprot_to_gene_id` loaded. 
2021-10-06 13:09:28 |[32m INFO     [0m| helpers.helpers_analysis.interactions_per_patient | Materials loaded.
2021-10-06 13:09:28 |[32m INFO     [0m| helpers.helpers_analysis.interactions_per_patient | Number of OV patients: None.
2021-10-06 13:09:28 |[32m INFO     [0m| helpers.helpers_analysis.interactions_per_patient | Finding all interactions (disruptive and non-disruptive) for each patient ..


  0%|          | 0/436 [00:00<?, ?it/s]

2021-10-06 13:10:29 |[32m INFO     [0m| helpers.helpers_analysis.interactions_per_patient | Finding disruptive interactions for each patient ..


  0%|          | 0/436 [00:00<?, ?it/s]

2021-10-06 13:11:30 |[32m INFO     [0m| helpers.helpers_analysis.interactions_per_patient | Disruptive interactions per patient completed.


In [9]:
ov_interactions_per_patient.print_disruptive_interactions_per_patient()

TCGA-61-2092 -> [('Q9NRM7', 'K672I', 'O96017'), ('Q9NRM7', 'K672I', 'Q96GD4'), ('Q13619', 'D750N', 'Q8IWE4'), ('Q13619', 'D750N', 'Q96GG9')]
TCGA-36-2540 -> []
TCGA-24-1467 -> [('P34897', 'F501V', 'P34896')]
TCGA-13-0923 -> [('P08134', 'D59E', 'P52565'), ('P62993', 'A3V', 'Q9H8V3'), ('O75175', 'Q684H', 'Q9NZN8'), ('O75175', 'Q684H', 'B2RDX7'), ('O75175', 'Q684H', 'B3KTL6'), ('O75175', 'Q684H', 'F8VV52')]
TCGA-25-1313 -> [('P00742', 'I451M', 'P00740'), ('P00742', 'I451M', 'P35237'), ('P00742', 'I451M', 'P10646'), ('P00742', 'I451M', 'P05154'), ('Q9BVG8', 'R451H', 'F5H4I9'), ('P04156', 'G131R', 'Q53YK7'), ('Q9NQ66', 'E539K', 'Q9BRC7')]
TCGA-29-2432 -> []
TCGA-13-0724 -> []
TCGA-61-1995 -> []
TCGA-61-1998 -> [('Q14814', 'K31T', 'Q06413'), ('P07948', 'F490S', 'Q9UBS0'), ('O43306', 'S989C', 'Q08462'), ('P06737', 'N254I', 'P11216')]
TCGA-20-1686 -> [('P04637', 'R248W', 'Q13625'), ('P04637', 'R248W', 'P62993'), ('P04637', 'R248W', 'Q9H3D4'), ('P04637', 'R248W', 'Q96KQ4'), ('P04637', 'R248W', 

In [10]:
ov_interactions_per_patient.construct_analysis_table()

2021-10-06 13:11:31 |[32m INFO     [0m| helpers.helpers_analysis.interactions_per_patient | Constructing the analysis table ..


  0%|          | 0/436 [00:00<?, ?it/s]

TCGA-61-2092	Q9NRM7	K672I	All interactors: ['O96017', 'Q96GD4']
TCGA-61-2092	Q13619	D750N	All interactors: ['Q6PH85', 'Q92564', 'Q8IWE4', 'Q96GG9', 'Q9BTE7']
----------------------------------------------------------------------------------------------------
----------------------------------------------------------------------------------------------------
TCGA-24-1467	P34897	F501V	All interactors: ['P34896']
----------------------------------------------------------------------------------------------------
TCGA-13-0923	P08134	D59E	All interactors: ['P52565']
TCGA-13-0923	P62993	A3V	All interactors: ['Q9H8V3', 'Q14155']
TCGA-13-0923	O75175	Q684H	All interactors: ['Q9NZN8', 'B2RDX7', 'B3KTL6', 'F8VV52']
----------------------------------------------------------------------------------------------------
TCGA-25-1313	P00742	I451M	All interactors: ['P00740', 'P35237', 'P10646', 'P05154']
TCGA-25-1313	Q9BVG8	R451H	All interactors: ['F5H4I9']
TCGA-25-1313	P04156	G131R	All interactors: ['Q5

In [11]:
ov_interactions_per_patient.analysis_table.head()

Unnamed: 0,PATIENT,PROTEIN_GENE,MUTATION,INTERACTORS,NUM_INTERACTORS,DISRUPTIVE_INTERACTORS,NUM_DISRUPTIVE_INTERACTORS,NON_DISRUPTIVE_INTERACTORS,NUM_NON_DISRUPTIVE_INTERACTORS
0,TCGA-61-2092,Q9NRM7:LATS2,K672I,"O96017:CHEK2,Q96GD4:AURKB",2,"O96017:CHEK2,Q96GD4:AURKB",2,,0
1,TCGA-61-2092,Q13619:CUL4A,D750N,"Q6PH85:DCUN1D2,Q92564:DCUN1D4,Q8IWE4:DCUN1D3,Q...",5,"Q8IWE4:DCUN1D3,Q96GG9:DCUN1D1",2,"Q6PH85:DCUN1D2,Q92564:DCUN1D4,Q9BTE7:DCUN1D5",3
2,TCGA-24-1467,P34897:SHMT2,F501V,P34896:SHMT1,1,P34896:SHMT1,1,,0
3,TCGA-13-0923,P08134:RHOC,D59E,P52565:ARHGDIA,1,P52565:ARHGDIA,1,,0
4,TCGA-13-0923,P62993:GRB2,A3V,"Q9H8V3:ECT2,Q14155:ARHGEF7",2,Q9H8V3:ECT2,1,Q14155:ARHGEF7,1


In [12]:
ov_interactions_per_patient.extract(folder="dev")

2021-10-06 13:11:35 |[32m INFO     [0m| helpers.helpers_analysis.interactions_per_patient | dev\OV_patient_interactions_analysis_table_2021-10-06.xlsx is exported.


In [13]:
ov_interactions_per_patient.get_disruptive_interactors("Q13619", "D750N")

['Q8IWE4', 'Q96GG9']

In [16]:
ov_prediction_data = ov_interactions_per_patient.prediction_data
ov_prediction_data[
    (ov_prediction_data["UniProt_ID"] == "Q13619") &
    (ov_prediction_data["Mutation"] == "D750N")
]

Unnamed: 0,UniProt_ID,Mutation,Interactor_UniProt_ID,Prediction
32,Q13619,D750N,Q6PH85,1
33,Q13619,D750N,Q92564,1
34,Q13619,D750N,Q8IWE4,0
35,Q13619,D750N,Q96GG9,0
36,Q13619,D750N,Q9BTE7,1


In [14]:
brca_interactions_per_patient.get_disruptive_interactors("Q13619", "D750N")

[]

In [None]:
# TODO.
# run unittest with OV, too.

In [46]:
analysis_table  # <- last remained here
# next on: export to excel
# also export to text file (simpler version of this.)

Unnamed: 0_level_0,PROTEIN_GENE,MUTATION,INTERACTORS,NUM_INTERACTORS,DISRUPTIVE_INTERACTORS,NUM_DISRUPTIVE_INTERACTORS,NON_DISRUPTIVE_INTERACTORS,NUM_NON_DISRUPTIVE_INTERACTORS
PATIENT,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
TCGA-D8-A1XY,Q9UKS6:PACSIN3,R24H,"Q9BY11:PACSIN1,Q9UNF0:PACSIN2,Q5TZC3:PACSIN1,Q...",4,"Q9BY11:PACSIN1,Q9UNF0:PACSIN2,Q5TZC3:PACSIN1,Q...",4,,0
TCGA-AO-A0J5,Q9UBN7:HDAC6,D567Y,Q96DB2:HDAC11,1,Q96DB2:HDAC11,1,,0
TCGA-A8-A093,Q15842:KCNJ8,E237K,"Q14654:KCNJ11,P63252:KCNJ2",2,"Q14654:KCNJ11,P63252:KCNJ2",2,,0
TCGA-LD-A74U,O43684:BUB3,D117G,Q12834:CDC20,1,Q12834:CDC20,1,,0
TCGA-OL-A5RV,Q01196:RUNX1,G95R,Q13951:CBFB,1,Q13951:CBFB,1,,0
...,...,...,...,...,...,...,...,...
TCGA-B6-A1KI,Q86UL8:MAGI2,R925C,Q9Y4G8:RAPGEF2,1,Q9Y4G8:RAPGEF2,1,,0
TCGA-AN-A0AK,P53675:CLTCL1,R1226C,P09496:CLTA,1,P09496:CLTA,1,,0
TCGA-EW-A1PB,P00748:F12,D557Y,P05067:APP,1,P05067:APP,1,,0
TCGA-PE-A5DE,Q9UNE7:STUB1,E238Q,"P68036:UBE2L3,Q13404:UBE2V1,P61088:UBE2N,P5166...",11,P51668:UBE2D1,1,"P68036:UBE2L3,Q13404:UBE2V1,P61088:UBE2N,P6107...",10


In [5]:
disruptive_interactions_per_patient.patient_to_disruptive_interactions["TCGA-BH-A0BR"]

[('Q16828', 'E25K', 'P27361')]

In [None]:
for patient in disruptive_interactions_per_patient.patients:
    patient_snv_data = disruptive_interactions_per_patient.patient_to_snv_data[patient]
    

In [41]:
disruptive_interactions_per_patient.prediction_data[
    (disruptive_interactions_per_patient.prediction_data["UniProt_ID"] == 'Q16828') &
    (disruptive_interactions_per_patient.prediction_data["Mutation"] == 'E25K')
]["Interactor_UniProt_ID"]

1582    P45984
1583    P27361
Name: Interactor_UniProt_ID, dtype: object

In [42]:
disruptive_interactions_per_patient.get_all_interactions("Q16828", "E25K")

['P45984', 'P27361']

In [21]:
disruptive_interactions_per_patient.patient_to_interactions["TCGA-BH-A0BR"]

[('Q8WZ74', 'N820S', 'P62714'),
 ('Q8WZ74', 'N820S', 'Q9Y6E0'),
 ('Q8WZ74', 'N820S', 'Q9P289'),
 ('Q8WZ74', 'N820S', 'P67775'),
 ('Q8WZ74', 'N820S', 'O00506'),
 ('Q16828', 'E25K', 'P45984'),
 ('Q16828', 'E25K', 'P27361')]

In [22]:
disruptive_interactions_per_patient.disruptive_prediction_data

Unnamed: 0,UniProt_ID,Mutation,Interactor_UniProt_ID,Prediction
0,P28062,R216W,P40306,0
1,Q15842,E237K,Q14654,0
2,Q15842,E237K,P63252,0
3,Q9UKS6,R24H,Q9BY11,0
4,Q9UKS6,R24H,Q9UNF0,0
...,...,...,...,...
3868,P49356,G244E,P49354,0
3873,P00352,S501L,P05091,0
3876,P50616,S100Y,Q9UIV1,0
3878,P68431,F79L,P62805,0


In [14]:
from tqdm.notebook import tqdm

In [42]:
def construct_table():
    entries = []
    patient_to_table_values = {}
    for patient in tqdm(disruptive_interactions_per_patient.patients):
        patient_disruptive_interactions = disruptive_interactions_per_patient.patient_to_disruptive_interactions[patient]
        
        for protein, mutation, _ in patient_disruptive_interactions:
            # unnecessary duplication: but works fine.
            # TCGA-A8-A093   Q15842   E237K   ['Q14654', 'P63252']
            # TCGA-A8-A093   Q15842   E237K   ['Q14654', 'P63252']
            interactors = disruptive_interactions_per_patient.get_all_interactors(protein, mutation)
            disruptive_interactors = disruptive_interactions_per_patient.get_disruptive_interactors(protein, mutation)
            non_disruptive_interactors = disruptive_interactions_per_patient.get_non_disruptive_interactors(protein, mutation)
            
            patient_to_table_values[patient] = {
                "PROTEIN": f"{protein}:{#TODO}", 
                "MUTATION": mutation,
                "INTERACTORS": ','.join(interactors),
                "NUM_INTERACTORS": len(interactors),
                "DISRUPTIVE_INTERACTORS": ','.join(disruptive_interactors),
                "NUM_DISRUPTIVE_INTERACTORS": len(disruptive_interactors),     
                "NON_DISRUPTIVE_INTERACTORS": ','.join(non_disruptive_interactors),
                "NUM_NON_DISRUPTIVE_INTERACTORS": len(non_disruptive_interactors),   
            }
            
            print(
                f"{patient}\t{protein}\t{mutation}\t{interactors}"
            )
        
        print('-' * 100)
        

        
    analysis_table = pd.DataFrame(
        columns=[
            "PATIENT",
            "PROTEIN",
            "MUTATION",
            "INTERACTORS",
            "NUM_INTERACTORS",
            "DISRUPTIVE_INTERACTORS",
            "NUM_DISRUPTIVE_INTERACTORS",
            "NON_DISRUPTIVE_INTERACTORS",
            "NUM_NON_DISRUPTIVE_INTERACTORS"
        ]
    )
    analysis_table.set_index("PATIENT", inplace=True)
    
    for patient, table_values in patient_to_table_values.items():
        df.loc[patient] = table_values
    
    return patient_to_table_values
#             entry = (
#                 patient, protein, mutation, interactor,
#             )

#         num_disruptive_interactions = len(patient_disruptive_interactions)

In [43]:
mydict = construct_table()

  0%|          | 0/985 [00:00<?, ?it/s]

TCGA-D8-A1XY	Q9UKS6	R24H	['Q9BY11', 'Q9UNF0', 'Q5TZC3', 'Q6FIA3']
TCGA-D8-A1XY	Q9UKS6	R24H	['Q9BY11', 'Q9UNF0', 'Q5TZC3', 'Q6FIA3']
TCGA-D8-A1XY	Q9UKS6	R24H	['Q9BY11', 'Q9UNF0', 'Q5TZC3', 'Q6FIA3']
TCGA-D8-A1XY	Q9UKS6	R24H	['Q9BY11', 'Q9UNF0', 'Q5TZC3', 'Q6FIA3']
----------------------------------------------------------------------------------------------------
TCGA-AO-A0J5	Q9UBN7	D567Y	['Q96DB2']
----------------------------------------------------------------------------------------------------
TCGA-A8-A093	P28062	R216W	['P40306']
TCGA-A8-A093	Q15842	E237K	['Q14654', 'P63252']
TCGA-A8-A093	Q15842	E237K	['Q14654', 'P63252']
----------------------------------------------------------------------------------------------------
----------------------------------------------------------------------------------------------------
----------------------------------------------------------------------------------------------------
---------------------------------------------------------------

In [44]:
import pandas as pd

In [54]:
df = pd.DataFrame(
    columns=[
        "PATIENT",
        "PROTEIN",
        "MUTATION",
        "INTERACTORS",
        "NUM_INTERACTORS",
        "DISRUPTIVE_INTERACTORS",
        "NUM_DISRUPTIVE_INTERACTORS",
        "NON_DISRUPTIVE_INTERACTORS",
        "NUM_NON_DISRUPTIVE_INTERACTORS"
    ]
)
df.set_index("PATIENT", inplace=True)
df

Unnamed: 0_level_0,PROTEIN,MUTATION,INTERACTORS,NUM_INTERACTORS,DISRUPTIVE_INTERACTORS,NUM_DISRUPTIVE_INTERACTORS,NON_DISRUPTIVE_INTERACTORS,NUM_NON_DISRUPTIVE_INTERACTORS
PATIENT,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1


In [55]:
for patient, values in mydict.items():
    df.loc[patient] = values

In [57]:
df.head()

Unnamed: 0_level_0,PROTEIN,MUTATION,INTERACTORS,NUM_INTERACTORS,DISRUPTIVE_INTERACTORS,NUM_DISRUPTIVE_INTERACTORS,NON_DISRUPTIVE_INTERACTORS,NUM_NON_DISRUPTIVE_INTERACTORS
PATIENT,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
TCGA-D8-A1XY,Q9UKS6,R24H,"Q9BY11,Q9UNF0,Q5TZC3,Q6FIA3",4,"Q9BY11,Q9UNF0,Q5TZC3,Q6FIA3",4,,0
TCGA-AO-A0J5,Q9UBN7,D567Y,Q96DB2,1,Q96DB2,1,,0
TCGA-A8-A093,Q15842,E237K,"Q14654,P63252",2,"Q14654,P63252",2,,0
TCGA-LD-A74U,O43684,D117G,Q12834,1,Q12834,1,,0
TCGA-OL-A5RV,Q01196,G95R,Q13951,1,Q13951,1,,0


In [58]:
df.sort_values(by="NUM_NON_DISRUPTIVE_INTERACTORS", ascending=False)

Unnamed: 0_level_0,PROTEIN,MUTATION,INTERACTORS,NUM_INTERACTORS,DISRUPTIVE_INTERACTORS,NUM_DISRUPTIVE_INTERACTORS,NON_DISRUPTIVE_INTERACTORS,NUM_NON_DISRUPTIVE_INTERACTORS
PATIENT,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
TCGA-AR-A5QQ,P01112,G13R,"Q12967,O95267,Q8TDF6,Q8IV61,O15211,Q9Y4G8,Q8IZ...",18,"Q13905,Q13972,Q07890,Q07889,G5E9C8",5,"Q12967,O95267,Q8TDF6,Q8IV61,O15211,Q9Y4G8,Q8IZ...",13
TCGA-PE-A5DE,Q9UNE7,E238Q,"P68036,Q13404,P61088,P51668,P61077,P51965,Q96L...",11,P51668,1,"P68036,Q13404,P61088,P61077,P51965,Q96LR5,Q969...",10
TCGA-A2-A0CS,Q13131,V35G,"Q8TDC3,Q8IWQ3,P41240,P57059,Q9H0K1,P54646,Q8N5...",15,"Q8TDC3,Q8IWQ3,P57059,P54646,Q8N5S9,Q9H093,Q9Y2...",8,"P41240,Q9H0K1,O43318,P78362,Q9UM73,O60285,Q15831",7
TCGA-E2-A574,P04637,R248Q,"Q13625,P62993,Q9H3D4,Q96KQ4,Q8WUF5,P07948,Q061...",9,"Q13625,Q96KQ4,P07948",3,"P62993,Q9H3D4,Q8WUF5,Q06187,H2EHT1,K7PPA8",6
TCGA-B6-A0I1,P04637,R248Q,"Q13625,P62993,Q9H3D4,Q96KQ4,Q8WUF5,P07948,Q061...",9,"Q13625,Q96KQ4,P07948",3,"P62993,Q9H3D4,Q8WUF5,Q06187,H2EHT1,K7PPA8",6
...,...,...,...,...,...,...,...,...
TCGA-OL-A5RY,P04637,H179Q,"Q13625,Q9H9B1,Q9H3D4,O15350,Q96KQ4,Q99728,O758...",11,"Q13625,Q9H9B1,Q9H3D4,O15350,Q96KQ4,Q99728,O758...",11,,0
TCGA-GM-A3XL,Q9Y297,R460T,"Q9UM11,O15169,Q9UKB1",3,"Q9UM11,O15169,Q9UKB1",3,,0
TCGA-AR-A0TW,Q12879,G322W,Q05586,1,Q05586,1,,0
TCGA-BH-A0C1,Q01196,G108D,Q13951,1,Q13951,1,,0


In [None]:
df[df[""]]

In [26]:
mydict["TCGA-BH-A0BR"]

{'PROTEIN': 'Q16828',
 'MUTATION': 'E25K',
 'INTERACTORS': ['P45984', 'P27361'],
 'NUM_INTERACTORS': 2,
 'DISRUPTIVE_INTERACTORS': ['P27361'],
 'NUM_DISRUPTIVE_INTERACTORS': 1,
 'NON_DISRUPTIVE_INTERACTORS': ['P45984'],
 'NUM_NON_DISRUPTIVE_INTERACTORS': 1}

In [None]:
for item in patient_to_table_values[patient]

In [17]:
patient_to_table_values

NameError: name 'patient_to_table_values' is not defined

In [3]:
disruptive_interactions_per_patient = DisruptiveInteractionsPerPatient(
    tcga="BRCA",
    prediction_data_path=PREDICTION_BRCA_REDUCED_PATH,
    tcga_snv_path=SNV_PATH,
    identifier="uniprot",
    verbose=False,
)

2021-10-05 21:17:47 |[32m INFO     [0m| helpers.helpers_analysis.disruptive_interactions_per_patient | Loading materials ..
2021-10-05 21:17:47 |[36m DEBUG    [0m| helpers.helpers_analysis.disruptive_interactions_per_patient | Loading SNV data simplified ..
2021-10-05 21:17:47 |[32m INFO     [0m| helpers.helpers_analysis.loaders | Loading BRCA SNV datasets ..
2021-10-05 21:17:52 |[36m DEBUG    [0m| helpers.helpers_analysis.loaders | BRCA SNV data size: (120988, 121)
2021-10-05 21:17:52 |[36m DEBUG    [0m| helpers.helpers_analysis.loaders | BRCA SNV data processed size: (60278, 121)
2021-10-05 21:17:52 |[32m INFO     [0m| helpers.helpers_analysis.loaders | BRCA SNV datasets are loaded.
2021-10-05 21:17:52 |[36m DEBUG    [0m| helpers.helpers_analysis.disruptive_interactions_per_patient | Loading patient ids ..
2021-10-05 21:17:52 |[36m DEBUG    [0m| helpers.helpers_analysis.disruptive_interactions_per_patient | Loading patient to snv_data ..


  0%|          | 0/985 [00:00<?, ?it/s]

2021-10-05 21:17:57 |[36m DEBUG    [0m| helpers.helpers_analysis.disruptive_interactions_per_patient | Loading the prediction data ..
2021-10-05 21:17:57 |[36m DEBUG    [0m| helpers.helpers_analysis.disruptive_interactions_per_patient | Loading the disruptive prediction data ..
2021-10-05 21:17:57 |[32m INFO     [0m| helpers.helpers_analysis.disruptive_interactions_per_patient | Loading UniProt ID to Gene ID ..


Retrieving Gene IDs from UniProt API .. :   0%|          | 0/2512 [00:00<?, ?it/s]

2021-10-05 21:17:57 |[32m INFO     [0m| helpers.helpers_analysis.disruptive_interactions_per_patient | `uniprot_to_gene_id` loaded. 
2021-10-05 21:17:57 |[32m INFO     [0m| helpers.helpers_analysis.disruptive_interactions_per_patient | Materials loaded.
2021-10-05 21:17:57 |[32m INFO     [0m| helpers.helpers_analysis.disruptive_interactions_per_patient | Number of BRCA patients: None.
2021-10-05 21:17:57 |[32m INFO     [0m| helpers.helpers_analysis.disruptive_interactions_per_patient | Finding disruptive interactions for each patient ..


  0%|          | 0/985 [00:00<?, ?it/s]

2021-10-05 21:19:18 |[32m INFO     [0m| helpers.helpers_analysis.disruptive_interactions_per_patient | Disruptive interactions per patient completed.


In [4]:
disruptive_interactions_per_patient.print()

TCGA-D8-A1XY -> [('Q9UKS6', 'R24H', 'Q9BY11'), ('Q9UKS6', 'R24H', 'Q9UNF0'), ('Q9UKS6', 'R24H', 'Q5TZC3'), ('Q9UKS6', 'R24H', 'Q6FIA3')]
TCGA-AO-A0J5 -> [('Q9UBN7', 'D567Y', 'Q96DB2')]
TCGA-A8-A093 -> [('P28062', 'R216W', 'P40306'), ('Q15842', 'E237K', 'Q14654'), ('Q15842', 'E237K', 'P63252')]
TCGA-C8-A133 -> []
TCGA-AQ-A54O -> []
TCGA-A8-A07G -> []
TCGA-LD-A74U -> [('O43684', 'D117G', 'Q12834')]
TCGA-OL-A5RV -> [('Q01196', 'G95R', 'Q13951')]
TCGA-AR-A24V -> []
TCGA-PL-A8LX -> []
TCGA-BH-A0W5 -> []
TCGA-BH-A18F -> [('Q12840', 'G36R', 'O60282')]
TCGA-HN-A2OB -> []
TCGA-BH-A2L8 -> [('Q71DI3', 'E98Q', 'P62805'), ('Q14469', 'R46Q', 'Q9UBP5'), ('P20810', 'D466H', 'P17655'), ('P20810', 'D466H', 'P07384'), ('P20810', 'D466H', 'B2RDI5'), ('P20810', 'D466H', 'B4DWH5'), ('P67775', 'E297K', 'P62136'), ('O15111', 'M65I', 'O43353'), ('Q9H4L4', 'Q399H', 'P55854'), ('Q7RTN6', 'R226G', 'Q15831'), ('Q7RTN6', 'R226G', 'Q9Y376'), ('Q9HAP6', 'S115L', 'P78352'), ('Q9UHP3', 'S507L', 'P54725'), ('O14936', 'M

In [None]:
disruptive_interactions_per_patient.

In [7]:
disruptive_interactions_per_patient = DisruptiveInteractionsPerPatient(
    tcga="BRCA",
    prediction_data_path=PREDICTION_BRCA_REDUCED_PATH,
    tcga_snv_path=SNV_PATH,
    identifier="hugo",
    verbose=False,
)

2021-10-05 14:39:43 |[32m INFO     [0m| helpers.helpers_analysis.disruptive_interactions_per_patient | Loading materials ..
2021-10-05 14:39:43 |[36m DEBUG    [0m| helpers.helpers_analysis.disruptive_interactions_per_patient | Loading SNV data simplified ..
2021-10-05 14:39:43 |[32m INFO     [0m| helpers.helpers_analysis.loaders | Loading BRCA SNV datasets ..
2021-10-05 14:39:49 |[36m DEBUG    [0m| helpers.helpers_analysis.loaders | BRCA SNV data size: (120988, 121)
2021-10-05 14:39:49 |[36m DEBUG    [0m| helpers.helpers_analysis.loaders | BRCA SNV data processed size: (60278, 121)
2021-10-05 14:39:49 |[32m INFO     [0m| helpers.helpers_analysis.loaders | BRCA SNV datasets are loaded.
2021-10-05 14:39:49 |[36m DEBUG    [0m| helpers.helpers_analysis.disruptive_interactions_per_patient | Loading patient ids ..
2021-10-05 14:39:49 |[36m DEBUG    [0m| helpers.helpers_analysis.disruptive_interactions_per_patient | Loading patient to snv_data ..


  0%|          | 0/985 [00:00<?, ?it/s]

2021-10-05 14:39:55 |[36m DEBUG    [0m| helpers.helpers_analysis.disruptive_interactions_per_patient | Loading the prediction data ..
2021-10-05 14:39:55 |[36m DEBUG    [0m| helpers.helpers_analysis.disruptive_interactions_per_patient | Loading the disruptive prediction data ..
2021-10-05 14:39:55 |[32m INFO     [0m| helpers.helpers_analysis.disruptive_interactions_per_patient | Loading UniProt ID to Gene ID ..


Retrieving Gene IDs from UniProt API .. :   0%|          | 0/2512 [00:00<?, ?it/s]

2021-10-05 14:39:56 |[32m INFO     [0m| helpers.helpers_analysis.disruptive_interactions_per_patient | `uniprot_to_gene_id` loaded. 
2021-10-05 14:39:56 |[32m INFO     [0m| helpers.helpers_analysis.disruptive_interactions_per_patient | Materials loaded.
2021-10-05 14:39:56 |[32m INFO     [0m| helpers.helpers_analysis.disruptive_interactions_per_patient | Number of BRCA patients: None.
2021-10-05 14:39:56 |[32m INFO     [0m| helpers.helpers_analysis.disruptive_interactions_per_patient | Finding disruptive interactions for each patient ..


  0%|          | 0/985 [00:00<?, ?it/s]

2021-10-05 14:41:46 |[32m INFO     [0m| helpers.helpers_analysis.disruptive_interactions_per_patient | Disruptive interactions per patient completed.


In [8]:
disruptive_interactions_per_patient.print()

TCGA-D8-A1XY -> [('Q9UKS6:PACSIN3', 'R24H', 'Q9BY11:PACSIN1'), ('Q9UKS6:PACSIN3', 'R24H', 'Q9UNF0:PACSIN2'), ('Q9UKS6:PACSIN3', 'R24H', 'Q5TZC3:PACSIN1'), ('Q9UKS6:PACSIN3', 'R24H', 'Q6FIA3:PACSIN2')]
TCGA-AO-A0J5 -> [('Q9UBN7:HDAC6', 'D567Y', 'Q96DB2:HDAC11')]
TCGA-A8-A093 -> [('P28062:PSMB8', 'R216W', 'P40306:PSMB10'), ('Q15842:KCNJ8', 'E237K', 'Q14654:KCNJ11'), ('Q15842:KCNJ8', 'E237K', 'P63252:KCNJ2')]
TCGA-C8-A133 -> []
TCGA-AQ-A54O -> []
TCGA-A8-A07G -> []
TCGA-LD-A74U -> [('O43684:BUB3', 'D117G', 'Q12834:CDC20')]
TCGA-OL-A5RV -> [('Q01196:RUNX1', 'G95R', 'Q13951:CBFB')]
TCGA-AR-A24V -> []
TCGA-PL-A8LX -> []
TCGA-BH-A0W5 -> []
TCGA-BH-A18F -> [('Q12840:KIF5A', 'G36R', 'O60282:KIF5C')]
TCGA-HN-A2OB -> []
TCGA-BH-A2L8 -> [('Q71DI3:H3C15', 'E98Q', 'P62805:H4C1'), ('Q14469:HES1', 'R46Q', 'Q9UBP5:HEY2'), ('P20810:CAST', 'D466H', 'P17655:CAPN2'), ('P20810:CAST', 'D466H', 'P07384:CAPN1'), ('P20810:CAST', 'D466H', 'B2RDI5:nan'), ('P20810:CAST', 'D466H', 'B4DWH5:nan'), ('P67775:PPP2CA', '

In [19]:
import numpy as np

In [20]:
np.nan

nan

In [24]:
pd.isna(df.loc["A8K1F6"]["GENE"])

True

In [22]:
df.loc["A8K1F6"]["GENE"]  np.nan

True

In [1]:
import pandas as pd

In [13]:
df = pd.read_csv("helpers/helpers_analysis/gene_retrieval/UNIPROT_GENE_MAPPING.csv", index_col="UNIPROT")
df[:20] 

Unnamed: 0_level_0,GENE
UNIPROT,Unnamed: 1_level_1
A0AV96,RBM47
A0AVT1,UBA6
A0FGR8,ESYT2
A0FGR9,ESYT3
A0M8W4,UBE2V2
A0PJK8,SCRIB
A1X283,SH3PXD2B
A2RTX5,TARS3
A4D0W0,LSM8
A4D105,RPA3


In [7]:
df.fillna("nan",inplace=True)[:20]

Unnamed: 0,UNIPROT,GENE
0,A0AV96,RBM47
1,A0AVT1,UBA6
2,A0FGR8,ESYT2
3,A0FGR9,ESYT3
4,A0M8W4,UBE2V2
5,A0PJK8,SCRIB
6,A1X283,SH3PXD2B
7,A2RTX5,TARS3
8,A4D0W0,LSM8
9,A4D105,RPA3


Index([], dtype='object')

In [21]:
from pandas import DataFrame
import pandas as pd

In [70]:
mapping_data = DataFrame(columns=["UNIPROT", "GENE"])
mapping_data.set_index('UNIPROT', inplace=True)
mapping_data

Unnamed: 0_level_0,GENE
UNIPROT,Unnamed: 1_level_1


In [71]:
pairs = [("protein_1", "gene_1"), ("protein_2", "gene_2"), ("protein_3", "gene_3")]

In [72]:
mapping_data.loc["protein_1"] = "gene_1"
mapping_data

Unnamed: 0_level_0,GENE
UNIPROT,Unnamed: 1_level_1
protein_1,gene_1


In [67]:
for pair in pairs:
    mapping_data = mapping_data.append({"UNIPROT": pair[0], "GENE": pair[1]}, ignore_index=True)
mapping_data

Unnamed: 0,GENE,UNIPROT
0,gene_1,protein_1
1,gene_2,protein_2
2,gene_3,protein_3


In [55]:
mapping_data[mapping_data["UNIPROT"] == "protein_1"]

Unnamed: 0,UNIPROT,GENE
0,protein_1,gene_1


In [None]:
mapping_data

In [40]:
mapping_data.loc[0] = ["val1", "val2"]

In [41]:
mapping_data

Unnamed: 0,UNIPROT,GENE
0,val1,val2


In [42]:
mapping_data.append(["val1", "val2"])

Unnamed: 0,0,GENE,UNIPROT
0,,val2,val1
0,val1,,
1,val2,,


In [34]:
new_entry = DataFrame({"UNIPROT": "", "val2"})
new_entry

Unnamed: 0,0
0,val1
1,val2


In [23]:
new_entry = DataFrame({["val1", "val2"]})
mapping_data = pd.concat([mapping_data, ["val1", "val2"]])
mapping_data

TypeError: cannot concatenate object of type '<class 'list'>'; only Series and DataFrame objs are valid

In [8]:
disruptive_interactions_per_patient.prediction_data.head()

Unnamed: 0,UniProt_ID,Mutation,Interactor_UniProt_ID,Prediction
0,P28062,R216W,P40306,0
1,Q15842,E237K,Q14654,0
2,Q15842,E237K,P63252,0
3,Q9UKS6,R24H,Q9BY11,0
4,Q9UKS6,R24H,Q9UNF0,0


In [87]:
big_snv_data = pd.read_csv(SNV_PATH, low_memory=False)

In [88]:
big_snv_data.head(2)

Unnamed: 0.1,Unnamed: 0,Hugo_Symbol,Entrez_Gene_Id,Center,NCBI_Build,Chromosome,Start_Position,End_Position,Strand,Variant_Classification,...,FILTER,CONTEXT,src_vcf_id,tumor_bam_uuid,normal_bam_uuid,case_id,GDC_FILTER,COSMIC,MC3_Overlap,GDC_Validation_Status
0,1,USP24,23358,WUGSC,GRCh38,chr1,55159655,55159655,+,Missense_Mutation,...,panel_of_normals,CTGGATTGTAG,d083d669-6646-463b-853e-c58da8d06439,4374e19d-c5e7-49cf-8707-05ae5aeb7369,aadee87c-6a68-4580-bd10-64ac273b1e3d,0130d616-885e-4a6c-9d03-2f17dd692a05,common_in_exac;gdc_pon,,True,Unknown
1,2,ERICH3,127254,WUGSC,GRCh38,chr1,74571494,74571494,+,Missense_Mutation,...,PASS,TTCCTCTACCA,d083d669-6646-463b-853e-c58da8d06439,4374e19d-c5e7-49cf-8707-05ae5aeb7369,aadee87c-6a68-4580-bd10-64ac273b1e3d,0130d616-885e-4a6c-9d03-2f17dd692a05,,COSM1474194,True,Unknown


In [91]:
big_snv_stripped = big_snv_data[["Hugo_Symbol", "SWISSPROT"]].copy()
big_snv_stripped.sample(10)

Unnamed: 0,Hugo_Symbol,SWISSPROT
38316,FLNC,Q14315
11881,CDH11,P55287
60102,RELN,P78509
7543,POC1B,Q8TC44
115775,FANCI,Q9NVI1
115986,PALLD,Q8WX93
105790,ADAMTS9,Q9P2N4
11648,ALCAM,Q13740
65905,PKD1,P98161
12280,PRELID1,Q9Y255


In [90]:
big_snv_stripped[big_snv_stripped['Hugo_Symbol'] == 'NACA2']

Unnamed: 0,Hugo_Symbol,SWISSPROT
43649,NACA2,Q9H009
70505,NACA2,Q9H009
74268,NACA2,Q9H009
82309,NACA2,Q9H009
85598,NACA2,Q9H009
96146,NACA2,Q9H009
115285,NACA2,Q9H009


In [61]:
"B4DRS2" in big_snv_stripped["SWISSPROT"]

False

In [93]:
import os.path as op
import os

print(os.listdir())

['.ipynb_checkpoints', 'Basic.ipynb', 'brca_exceptions.txt', 'dadada_2021-09-07.csv', 'data_set_entries.ipynb', 'dev', 'gene_retrievalUNIPROT_GENE_MAPPING.csv', 'helpers', 'jupyter-goes-crazy', 'poster.py', 'Predator.py', 'PredatorAnalysis.py', 'PredatorAnalysis_BRCA_CancerMine.ipynb', 'PredatorAnalysis_BRCA_CGC.ipynb', 'PredatorAnalysis_BRCA_CGC_old.ipynb', 'PredatorAnalysis_OV_CancerMine.ipynb', 'PredatorAnalysis_OV_CGC.ipynb', 'PredatorStudy_BRCA.ipynb', 'PredatorStudy_OV.ipynb', 'processed_data_740.csv', 'rankings_10.csv', 'temp.csv', 'train_data_sampled.csv', 'trash', '__pycache__']


In [101]:
df = pd.read_csv("gene_retrievalUNIPROT_GENE_MAPPING.csv", index_col="UNIPROT")[:20]
df

Unnamed: 0_level_0,GENE
UNIPROT,Unnamed: 1_level_1
A0AV96,RBM47
A0AVT1,UBA6
A0FGR8,ESYT2
A0FGR9,ESYT3
A0M8W4,UBE2V2
A0PJK8,SCRIB
A1X283,SH3PXD2B
A2RTX5,TARS3
A4D0W0,LSM8
A4D105,RPA3


In [103]:
df.loc["A8K3R3"]["GENE"]

nan

In [64]:
sorted(disruptive_interactions_per_patient.disruptive_prediction_data["Interactor_UniProt_ID"].unique())

['A0AV96',
 'A0AVT1',
 'A0FGR8',
 'A2RTX5',
 'A4D0W0',
 'A4D1F7',
 'A4D1W7',
 'A7KAX9',
 'A8ASI8',
 'A8K0D4',
 'A8K1F6',
 'A8K287',
 'A8K3R3',
 'A8K3Y2',
 'A8K632',
 'A8K6P0',
 'A8MVU1',
 'A8YXX4',
 'B0AZM1',
 'B0AZS3',
 'B0LPE5',
 'B1APZ0',
 'B2R5T5',
 'B2RA10',
 'B2RDI5',
 'B3KM80',
 'B3KMF1',
 'B3KNL6',
 'B3KP06',
 'B3KT07',
 'B3KT21',
 'B3KUK7',
 'B3KV96',
 'B3KVA4',
 'B3KVF5',
 'B3KVH4',
 'B3KXY7',
 'B4DG32',
 'B4DGU4',
 'B4DLQ9',
 'B4DUQ1',
 'B4DV51',
 'B4DWA2',
 'B4DWH5',
 'B4E0K5',
 'B4E0X6',
 'B4E1C1',
 'B6VEX3',
 'B6VEX4',
 'B6VEX5',
 'B7Z1F9',
 'B7Z1I4',
 'B7Z1Z5',
 'B7Z3M9',
 'B7Z425',
 'B7Z4B8',
 'B7Z596',
 'B7Z5N5',
 'B7Z722',
 'B7Z8R1',
 'B7ZAB3',
 'B7ZBM3',
 'C8YZ26',
 'D0VY79',
 'D2CGD1',
 'D2KUA6',
 'D3DRD5',
 'D3W0D1',
 'D6RAH7',
 'D9YZV4',
 'D9YZV5',
 'D9YZV7',
 'E7ESI2',
 'E7ETZ0',
 'E9PER6',
 'E9PHA6',
 'F5H6I0',
 'F5H7S3',
 'F8VXU5',
 'F8WAP6',
 'G3V150',
 'G5E9C8',
 'H0UIA5',
 'H2EHT1',
 'H3BTP3',
 'H7BYY1',
 'I3L4C2',
 'J3KNL2',
 'J3KRN4',
 'K7PML8',
 'K7PPA8',

In [55]:
for protein in disruptive_interactions_per_patient.disruptive_prediction_data["UniProt_ID"].unique():
    search_data = big_snv_stripped[big_snv_stripped["SWISSPROT"] == protein]
    print(f"Current protein: {protein}.")
    if search_data.empty:
        raise ValueError("Oh no!")

Current protein: P28062.
Current protein: Q15842.
Current protein: Q9UKS6.
Current protein: O43684.
Current protein: Q01196.
Current protein: Q12840.
Current protein: Q71DI3.
Current protein: Q14469.
Current protein: P20810.
Current protein: P67775.
Current protein: O15111.
Current protein: Q9H4L4.
Current protein: Q9UBN7.
Current protein: Q7RTN6.
Current protein: Q9HAP6.
Current protein: Q9UHP3.
Current protein: O14936.
Current protein: Q9NZM3.
Current protein: P11215.
Current protein: P05231.
Current protein: P63096.
Current protein: Q9Y2T1.
Current protein: Q96KN2.
Current protein: P36896.
Current protein: P84022.
Current protein: Q01094.
Current protein: P60709.
Current protein: O60506.
Current protein: Q9UQQ2.
Current protein: P04637.
Current protein: Q13619.
Current protein: P12757.
Current protein: P62805.
Current protein: P15056.
Current protein: P01112.
Current protein: P05981.
Current protein: Q06141.
Current protein: Q9Y2Z0.
Current protein: P68431.
Current protein: P11802.


In [56]:
pd.DataFrame({
    "Val_1", "Val_2"
})

Unnamed: 0,0
0,Val_1
1,Val_2


In [54]:
disruptive_interactions_per_patient.disruptive_prediction_data

Unnamed: 0,UniProt_ID,Mutation,Interactor_UniProt_ID,Prediction
0,P28062,R216W,P40306,0
1,Q15842,E237K,Q14654,0
2,Q15842,E237K,P63252,0
3,Q9UKS6,R24H,Q9BY11,0
4,Q9UKS6,R24H,Q9UNF0,0
...,...,...,...,...
3868,P49356,G244E,P49354,0
3873,P00352,S501L,P05091,0
3876,P50616,S100Y,Q9UIV1,0
3878,P68431,F79L,P62805,0


In [39]:
disruptive_interactions_per_patient.prediction_data.sample()

Unnamed: 0,UniProt_ID,Mutation,Interactor_UniProt_ID,Prediction
124,P15056,A569D,P10398,0


In [7]:
print(disruptive_interactions_per_patient.prediction_data["UniProt_ID"].nunique())
print(disruptive_interactions_per_patient.prediction_data["Interactor_UniProt_ID"].nunique())

1168
1928


In [14]:
snv_simplified[snv_simplified["SWISSPROT"] == "P40306"]

Unnamed: 0,Hugo_Symbol,SWISSPROT,HGVSp_Short,Tumor_Sample_Barcode


In [11]:
snv_simplified = disruptive_interactions_per_patient.snv_data_simplified.copy()
snv_simplified.head()

Unnamed: 0,Hugo_Symbol,SWISSPROT,HGVSp_Short,Tumor_Sample_Barcode
0,USP24,Q9UPU5,I342V,TCGA-D8-A1XY
1,ERICH3,Q5RHP9,E1406K,TCGA-D8-A1XY
2,SPHKAP,Q2M3C7,R1372S,TCGA-D8-A1XY
3,HRH1,P35367,M206V,TCGA-D8-A1XY
4,LRRC2,Q9BYS8,P359L,TCGA-D8-A1XY


In [4]:
len(disruptive_interactions_per_patient.patient_to_disruptive_interactions)

985

In [16]:
import pandas as pd

In [19]:
data = pd.read_csv('./dev/Q9UPU5_tab.txt', sep='\t')
data

Unnamed: 0,Entry,Entry name,Status,Protein names,Gene names,Organism,Length
0,Q9UPU5,UBP24_HUMAN,reviewed,Ubiquitin carboxyl-terminal hydrolase 24 (EC 3...,USP24 KIAA1057,Homo sapiens (Human),2620


In [22]:
data["Gene names"].values[0]

'USP24 KIAA1057'

In [None]:
TCGA-EW-A1PB

In [134]:
def checker(patient):
    snv_data = disruptive_interactions_per_patient.snv_data_simplified
    disruptive_prediction_data = disruptive_interactions_per_patient.disruptive_prediction_data
    patient_snv_data = snv_data[snv_data["Tumor_Sample_Barcode"] == patient]
    
    assert patient_snv_data.equals(
        disruptive_interactions_per_patient.patient_to_snv_data[patient]
    )
    
    print(f"PATIENT SNV DATA: {patient}")
    # display(patient_snv_data.sort_values(["HGVSp_Short"]))
#     print("DISRUPTIVE_INTERACTIONS:")
#     display(disruptive_interactions_per_patient.patient_to_disruptive_interactions[patient])
    
#     patient_disruptive_interactions = disruptive_interactions_per_patient.patient_to_disruptive_interactions[patient]
#     for disruptive_interactions in patient_disruptive_interactions:
#         protein, mutation, interactor = disruptive_interactions
        
#         assert NOT patient_snv_data[
#             (patient_snv_data["UniProt_ID"] == protein) &
#             (patient_snv_data["Mutation"] == mutation)
#         ].empty
    
    for index, row in patient_snv_data.iterrows():
        snv_protein, snv_mutation = row["SWISSPROT"], row["HGVSp_Short"]
        search_data = disruptive_prediction_data[
            (disruptive_prediction_data["UniProt_ID"] == snv_protein) &
            (disruptive_prediction_data["Mutation"] == snv_mutation)
        ]
        if not search_data.empty:
            display(search_data)

In [135]:
checker("TCGA-EW-A1PB")

PATIENT SNV DATA: TCGA-EW-A1PB


Unnamed: 0,UniProt_ID,Mutation,Interactor_UniProt_ID,Prediction
3714,P00748,D557Y,P05067,0


In [120]:
checker("TCGA-D8-A1XY")

PATIENT SNV DATA: TCGA-D8-A1XY


Unnamed: 0,Hugo_Symbol,SWISSPROT,HGVSp_Short,Tumor_Sample_Barcode
18,SERPING1,P05155,A145T,TCGA-D8-A1XY
11,AKAP9,Q99996,A15V,TCGA-D8-A1XY
25,CDH3,P22223,A799T,TCGA-D8-A1XY
23,MAPK6,Q16659,D387Y,TCGA-D8-A1XY
1,ERICH3,Q5RHP9,E1406K,TCGA-D8-A1XY
5,PIK3CA,P42336,E545K,TCGA-D8-A1XY
31,SULF2,Q8IWU5,G319S,TCGA-D8-A1XY
24,ADAMTS17,Q8TE56,G562E,TCGA-D8-A1XY
20,DIP2B,Q9P265,G973V,TCGA-D8-A1XY
21,ZNF10,P21506,H391Q,TCGA-D8-A1XY


DISRUPTIVE_INTERACTIONS:


[('Q9UKS6', 'R24H', 'Q9BY11'),
 ('Q9UKS6', 'R24H', 'Q9UNF0'),
 ('Q9UKS6', 'R24H', 'Q5TZC3'),
 ('Q9UKS6', 'R24H', 'Q6FIA3')]

Unnamed: 0,UniProt_ID,Mutation,Interactor_UniProt_ID,Prediction
3,Q9UKS6,R24H,Q9BY11,0
4,Q9UKS6,R24H,Q9UNF0,0
5,Q9UKS6,R24H,Q5TZC3,0
6,Q9UKS6,R24H,Q6FIA3,0


In [121]:
checker("TCGA-A8-A093")

PATIENT SNV DATA: TCGA-A8-A093


Unnamed: 0,Hugo_Symbol,SWISSPROT,HGVSp_Short,Tumor_Sample_Barcode
139,NLGN3,Q9NZ94,A99T,TCGA-A8-A093
131,ZNF441,Q8N8Z8,C592S,TCGA-A8-A093
90,CASR,P41180,D1005N,TCGA-A8-A093
82,TARBP1,Q13395,D1149N,TCGA-A8-A093
107,SAMD9L,Q8IVG5,D120H,TCGA-A8-A093
137,KCNJ4,P48050,D435N,TCGA-A8-A093
136,SLC24A3,Q9HC58,D525N,TCGA-A8-A093
141,PLS3,P13797,E16K,TCGA-A8-A093
84,TTC7A,Q9ULT0,E191K,TCGA-A8-A093
88,WNT7A,O00755,E234K,TCGA-A8-A093


DISRUPTIVE_INTERACTIONS:


[('P28062', 'R216W', 'P40306'),
 ('Q15842', 'E237K', 'Q14654'),
 ('Q15842', 'E237K', 'P63252')]

Unnamed: 0,UniProt_ID,Mutation,Interactor_UniProt_ID,Prediction
0,P28062,R216W,P40306,0


Unnamed: 0,UniProt_ID,Mutation,Interactor_UniProt_ID,Prediction
1,Q15842,E237K,Q14654,0
2,Q15842,E237K,P63252,0


In [86]:
disruptive_prediction_data

Unnamed: 0,UniProt_ID,Mutation,Interactor_UniProt_ID,Prediction
0,P28062,R216W,P40306,0
1,Q15842,E237K,Q14654,0
2,Q15842,E237K,P63252,0
3,Q9UKS6,R24H,Q9BY11,0
4,Q9UKS6,R24H,Q9UNF0,0
...,...,...,...,...
3868,P49356,G244E,P49354,0
3873,P00352,S501L,P05091,0
3876,P50616,S100Y,Q9UIV1,0
3878,P68431,F79L,P62805,0


In [82]:
disruptive_interactions_per_patient.find_disruptive_interactions_single_patient("TCGA-EW-A1PB")

2021-10-01 13:44:29 |[32m INFO     [0m| helpers.helpers_analysis.disruptive_interactions_per_patient | Finding disruptive interactions for patient: TCGA-EW-A1PB ..
P00748, D557Y, P05067


In [73]:
disruptive_interactions_per_patient.get_disruptive_predicted_interactions(
    "P35367", "M206V"
)

[]

In [83]:
checker("TCGA-D8-A1XY")

PATIENT SNV DATA: TCGA-D8-A1XY


Unnamed: 0,Hugo_Symbol,SWISSPROT,HGVSp_Short,Tumor_Sample_Barcode
0,USP24,Q9UPU5,I342V,TCGA-D8-A1XY
1,ERICH3,Q5RHP9,E1406K,TCGA-D8-A1XY
2,SPHKAP,Q2M3C7,R1372S,TCGA-D8-A1XY
3,HRH1,P35367,M206V,TCGA-D8-A1XY
4,LRRC2,Q9BYS8,P359L,TCGA-D8-A1XY
5,PIK3CA,P42336,E545K,TCGA-D8-A1XY
6,MCCC1,Q96RQ3,R66H,TCGA-D8-A1XY
7,AP3B1,O00203,R227H,TCGA-D8-A1XY
8,CAMK4,Q16566,K79T,TCGA-D8-A1XY
9,SDK1,Q7Z5N4,V1540I,TCGA-D8-A1XY


DISRUPTIVE_INTERACTIONS:


[('Q9UKS6', 'R24H', 'Q9BY11'),
 ('Q9UKS6', 'R24H', 'Q9UNF0'),
 ('Q9UKS6', 'R24H', 'Q5TZC3'),
 ('Q9UKS6', 'R24H', 'Q6FIA3')]

Unnamed: 0,UniProt_ID,Mutation,Interactor_UniProt_ID,Prediction
3,Q9UKS6,R24H,Q9BY11,0
4,Q9UKS6,R24H,Q9UNF0,0
5,Q9UKS6,R24H,Q5TZC3,0
6,Q9UKS6,R24H,Q6FIA3,0


In [114]:
checker("TCGA-A2-A0D0")

PATIENT SNV DATA: TCGA-A2-A0D0


Unnamed: 0,Hugo_Symbol,SWISSPROT,HGVSp_Short,Tumor_Sample_Barcode
57663,CASQ2,O14958,D163H,TCGA-A2-A0D0
57664,TCHH,Q07283,E1129K,TCGA-A2-A0D0
57665,TPR,P12270,S371C,TCGA-A2-A0D0
57666,GREB1,Q4ZG55,V1656I,TCGA-A2-A0D0
57667,SLC5A6,Q9Y289,V234L,TCGA-A2-A0D0
57668,BIRC6,Q9NR09,S3504N,TCGA-A2-A0D0
57669,AUP1,Q9Y679,E335Q,TCGA-A2-A0D0
57670,ASTL,Q6HA08,S244R,TCGA-A2-A0D0
57671,ACVR1,Q04771,V65I,TCGA-A2-A0D0
57672,CCDC141,Q6ZP82,F1294V,TCGA-A2-A0D0


DISRUPTIVE_INTERACTIONS:


[('Q969H0', 'R479Q', 'P42345'), ('Q969H0', 'R479Q', 'P63244')]

Unnamed: 0,UniProt_ID,Mutation,Interactor_UniProt_ID,Prediction
3541,Q969H0,R479Q,P42345,0
3542,Q969H0,R479Q,P63244,0


In [65]:
checker("TCGA-PE-A5DE")

PATIENT SNV DATA: TCGA-PE-A5DE


Unnamed: 0,Hugo_Symbol,SWISSPROT,HGVSp_Short,Tumor_Sample_Barcode
59789,ESPN,B1AK53,R403T,TCGA-PE-A5DE
59790,RERE,Q9P2R6,E561K,TCGA-PE-A5DE
59791,VPS13D,Q5THJ4,P3515S,TCGA-PE-A5DE
59792,PRAMEF2,O60811,Q101E,TCGA-PE-A5DE
59793,PLEKHM2,Q8IWE5,E856K,TCGA-PE-A5DE


DISRUPTIVE_INTERACTIONS:


[('Q96I99', 'R269Q', 'P53597'),
 ('Q9BYE7', 'I132M', 'Q9H4P4'),
 ('Q9BYE7', 'I132M', 'Q9BUZ4'),
 ('P11142', 'Q58E', 'Q99933'),
 ('P11142', 'Q58E', 'Q92598'),
 ('Q00722', 'W361C', 'P63096'),
 ('Q00722', 'W361C', 'P51178'),
 ('Q9UNE7', 'E238Q', 'P51668')]

Unnamed: 0,UniProt_ID,Mutation,Interactor_UniProt_ID,Prediction
3719,Q96I99,R269Q,P53597,0


Unnamed: 0,UniProt_ID,Mutation,Interactor_UniProt_ID,Prediction
3728,Q9BYE7,I132M,Q9H4P4,0
3729,Q9BYE7,I132M,Q9BUZ4,0


Unnamed: 0,UniProt_ID,Mutation,Interactor_UniProt_ID,Prediction
3731,P11142,Q58E,Q99933,0
3735,P11142,Q58E,Q92598,0


Unnamed: 0,UniProt_ID,Mutation,Interactor_UniProt_ID,Prediction
3741,Q00722,W361C,P63096,0
3742,Q00722,W361C,P51178,0


Unnamed: 0,UniProt_ID,Mutation,Interactor_UniProt_ID,Prediction
3678,Q9UNE7,E238Q,P51668,0


In [49]:
from IPython.display import display

Unnamed: 0,Hugo_Symbol,SWISSPROT,HGVSp_Short,Tumor_Sample_Barcode
0,USP24,Q9UPU5,I342V,TCGA-D8-A1XY
1,ERICH3,Q5RHP9,E1406K,TCGA-D8-A1XY
2,SPHKAP,Q2M3C7,R1372S,TCGA-D8-A1XY
3,HRH1,P35367,M206V,TCGA-D8-A1XY
4,LRRC2,Q9BYS8,P359L,TCGA-D8-A1XY


In [45]:
disruptive_prediction_data = disruptive_interactions_per_patient.disruptive_prediction_data
disruptive_prediction_data.head()

Unnamed: 0,UniProt_ID,Mutation,Interactor_UniProt_ID,Prediction
0,P28062,R216W,P40306,0
1,Q15842,E237K,Q14654,0
2,Q15842,E237K,P63252,0
3,Q9UKS6,R24H,Q9BY11,0
4,Q9UKS6,R24H,Q9UNF0,0


In [56]:
sample_patient_id = "TCGA-PE-A5DE"

In [57]:
snv_TCGA_PE_A5DE = snv_data[snv_data["Tumor_Sample_Barcode"] == sample_patient_id]
snv_TCGA_PE_A5DE.head()

Unnamed: 0,Hugo_Symbol,SWISSPROT,HGVSp_Short,Tumor_Sample_Barcode
59789,ESPN,B1AK53,R403T,TCGA-PE-A5DE
59790,RERE,Q9P2R6,E561K,TCGA-PE-A5DE
59791,VPS13D,Q5THJ4,P3515S,TCGA-PE-A5DE
59792,PRAMEF2,O60811,Q101E,TCGA-PE-A5DE
59793,PLEKHM2,Q8IWE5,E856K,TCGA-PE-A5DE


In [58]:
disruptive_interactions_per_patient.patient_to_disruptive_interactions[sample_patient_id]

[('Q96I99', 'R269Q', 'P53597'),
 ('Q9BYE7', 'I132M', 'Q9H4P4'),
 ('Q9BYE7', 'I132M', 'Q9BUZ4'),
 ('P11142', 'Q58E', 'Q99933'),
 ('P11142', 'Q58E', 'Q92598'),
 ('Q00722', 'W361C', 'P63096'),
 ('Q00722', 'W361C', 'P51178'),
 ('Q9UNE7', 'E238Q', 'P51668')]

In [53]:
for index, row in snv_TCGA_PE_A5DE.iterrows():
    snv_protein, snv_mutation = row["SWISSPROT"], row["HGVSp_Short"]
    search_data = disruptive_prediction_data[
        (disruptive_prediction_data["UniProt_ID"] == snv_protein) &
        (disruptive_prediction_data["Mutation"] == snv_mutation)
    ]
    if not search_data.empty:
        display(search_data)

Unnamed: 0,UniProt_ID,Mutation,Interactor_UniProt_ID,Prediction
3719,Q96I99,R269Q,P53597,0


Unnamed: 0,UniProt_ID,Mutation,Interactor_UniProt_ID,Prediction
3728,Q9BYE7,I132M,Q9H4P4,0
3729,Q9BYE7,I132M,Q9BUZ4,0


Unnamed: 0,UniProt_ID,Mutation,Interactor_UniProt_ID,Prediction
3731,P11142,Q58E,Q99933,0
3735,P11142,Q58E,Q92598,0


Unnamed: 0,UniProt_ID,Mutation,Interactor_UniProt_ID,Prediction
3741,Q00722,W361C,P63096,0
3742,Q00722,W361C,P51178,0


Unnamed: 0,UniProt_ID,Mutation,Interactor_UniProt_ID,Prediction
3678,Q9UNE7,E238Q,P51668,0


In [4]:
disruptive_interactions_per_patient.prediction_data["Prediction"].value_counts().index

Int64Index([1, 0], dtype='int64')

In [5]:
disruptive_interactions_per_patient.disruptive_prediction_data["Prediction"].value_counts()

0    1934
Name: Prediction, dtype: int64

In [6]:
disruptive_interactions_per_patient.is_found_in_disruptive_prediction_data(
    "Q9UPU5",
    "I342V"
)

False

In [5]:
disruptive_interactions_per_patient.patient_to_snv_data["TCGA-D8-A1XY"].head()

Unnamed: 0,Hugo_Symbol,SWISSPROT,HGVSp_Short,Tumor_Sample_Barcode
0,USP24,Q9UPU5,I342V,TCGA-D8-A1XY
1,ERICH3,Q5RHP9,E1406K,TCGA-D8-A1XY
2,SPHKAP,Q2M3C7,R1372S,TCGA-D8-A1XY
3,HRH1,P35367,M206V,TCGA-D8-A1XY
4,LRRC2,Q9BYS8,P359L,TCGA-D8-A1XY


In [17]:
disruptive_prediction_data = disruptive_interactions_per_patient.disruptive_prediction_data
disruptive_prediction_data.head()

Unnamed: 0,UniProt_ID,Mutation,Interactor_UniProt_ID,Prediction
0,P28062,R216W,P40306,0
1,Q15842,E237K,Q14654,0
2,Q15842,E237K,P63252,0
3,Q9UKS6,R24H,Q9BY11,0
4,Q9UKS6,R24H,Q9UNF0,0


In [19]:
disruptive_prediction_data[
    (disruptive_prediction_data["UniProt_ID"] == "Q9UKS6") &
    (disruptive_prediction_data["Mutation"] == "R24H")
]["Interactor_UniProt_ID"].to_list()

['Q9BY11', 'Q9UNF0', 'Q5TZC3', 'Q6FIA3']

In [None]:
'', 'R24H'

In [11]:
for index, row in disruptive_interactions_per_patient.patient_to_snv_data["TCGA-D8-A1XY"].iterrows():
    protein = row["SWISSPROT"]
    mutation = row["HGVSp_Short"]
    
    print(f"'{protein}', '{mutation}'")
    
    print(
        disruptive_interactions_per_patient.is_found_in_disruptive_prediction_data(
            protein,
            mutation
        )
    )
    
    print()
    
#     is_found = is_found_in_disruptive_prediction_data(
#         disruptive_interactions_per_patient.disruptive_prediction_data,
#         protein,
#         mutation
#     )

'Q9UPU5', 'I342V'
False

'Q5RHP9', 'E1406K'
False

'Q2M3C7', 'R1372S'
False

'P35367', 'M206V'
False

'Q9BYS8', 'P359L'
False

'P42336', 'E545K'
False

'Q96RQ3', 'R66H'
False

'O00203', 'R227H'
False

'Q16566', 'K79T'
False

'Q7Z5N4', 'V1540I'
False

'P08236', 'R357Q'
False

'Q99996', 'A15V'
False

'O43451', 'R1140M'
False

'Q08357', 'R381H'
False

'Q9Y6Q3', 'I183F'
False

'Q9H347', 'L505M'
False

'Q8IX04', 'Y301C'
False

'Q9UKS6', 'R24H'
True

'P05155', 'A145T'
False

'P49286', 'T168N'
False

'Q9P265', 'G973V'
False

'P21506', 'H391Q'
False

'Q8TB24', 'R89W'
False

'Q16659', 'D387Y'
False

'Q8TE56', 'G562E'
False

'P22223', 'A799T'
False

'Q70EL4', 'R383H'
False

'P45985', 'P306R'
False

'Q96QE3', 'L1662F'
False

'Q14596', 'S551F'
False

'Q86VR7', 'R804C'
False

'Q8IWU5', 'G319S'
False

'Q14689', 'S145C'
False

'Q92834', 'S421P'
False



In [5]:
disruptive_interactions_per_patient.data_materials.keys()

AttributeError: 'DisruptiveInteractionsPerPatient' object has no attribute 'data_materials'

In [6]:
brca_snv = disruptive_interactions_per_patient.data_materials["BRCA_snv_data_simplified"]
brca_snv.head()

AttributeError: 'DisruptiveInteractionsPerPatient' object has no attribute 'data_materials'

In [16]:
brca_patients = list(brca_snv["Tumor_Sample_Barcode"].unique())
brca_patients[:5]

['TCGA-D8-A1XY',
 'TCGA-AO-A0J5',
 'TCGA-A8-A093',
 'TCGA-C8-A133',
 'TCGA-AQ-A54O']

In [4]:
snv_TCGA_D8_A1XY = brca_snv[brca_snv["Tumor_Sample_Barcode"] == "TCGA-D8-A1XY"]
snv_TCGA_D8_A1XY.head()

NameError: name 'brca_snv' is not defined

In [None]:
P28062	R216W	

In [91]:
from tqdm.notebook import tqdm

In [109]:
p = "P04637"

for patient in tqdm(disruptive_interactions_per_patient.patients):
    patient_protein_to_mutation = get_patient_protein_to_mutations_dict(disruptive_interactions_per_patient.get_patient_snv_data(patient))
    if p in patient_protein_to_mutation:
        if "R280K" in patient_protein_to_mutation[p]:
            print(f"PATIENT: {patient}")

  0%|          | 0/985 [00:00<?, ?it/s]

PATIENT: TCGA-LD-A74U
PATIENT: TCGA-LL-A5YP


In [106]:
get_patient_protein_to_mutations_dict(disruptive_interactions_per_patient.get_patient_snv_data("TCGA-LD-A74U"))["P04637"]

['R280K']

In [110]:
get_patient_protein_to_mutations_dict(disruptive_interactions_per_patient.get_patient_snv_data("TCGA-LL-A5YP"))["P04637"]

['R280K']

In [113]:
disruptive_prediction_data[
    (disruptive_prediction_data["UniProt_ID"] == "P04637") &
    (disruptive_prediction_data["Mutation"] == "R280K")
]

Unnamed: 0,UniProt_ID,Mutation,Interactor_UniProt_ID,Prediction


In [94]:
disruptive_interactions_per_patient.patients[:3]

['TCGA-D8-A1XY', 'TCGA-AO-A0J5', 'TCGA-A8-A093']

defaultdict(list,
            {'Q9UPU5': ['I342V'],
             'Q5RHP9': ['E1406K'],
             'Q2M3C7': ['R1372S'],
             'P35367': ['M206V'],
             'Q9BYS8': ['P359L'],
             'P42336': ['E545K'],
             'Q96RQ3': ['R66H'],
             'O00203': ['R227H'],
             'Q16566': ['K79T'],
             'Q7Z5N4': ['V1540I'],
             'P08236': ['R357Q'],
             'Q99996': ['A15V'],
             'O43451': ['R1140M'],
             'Q08357': ['R381H'],
             'Q9Y6Q3': ['I183F'],
             'Q9H347': ['L505M'],
             'Q8IX04': ['Y301C'],
             'Q9UKS6': ['R24H'],
             'P05155': ['A145T'],
             'P49286': ['T168N'],
             'Q9P265': ['G973V'],
             'P21506': ['H391Q'],
             'Q8TB24': ['R89W'],
             'Q16659': ['D387Y'],
             'Q8TE56': ['G562E'],
             'P22223': ['A799T'],
             'Q70EL4': ['R383H'],
             'P45985': ['P306R'],
             'Q96QE3': ['L1662F