# Simple Parse Supplementary S8

You should look at Table S8, in which a tab "BRCA1_log2FC(Mut-WT)_P0.65" shows log2Fold change (Mutant/WT) and p-value for each BRCA1-interacting prey.

In [4]:
DATA_PATH = r"science.abf3066_tables_s2_to_s12/science.abf3066_Table_S8.xlsx"

In [5]:
import pandas as pd

In [7]:
data = pd.read_excel(DATA_PATH, sheet_name="BRCA1_log2FC(Mut-WT)_P0.65")

In [8]:
data

Unnamed: 0,Uniprot ID,Prey,i26a-wt_log2FC,i26a-wt_pvalue,c61g-wt_log2FC,c61g-wt_pvalue,r71g-wt_log2FC,r71g-wt_pvalue,iso5-wt_log2FC,iso5-wt_pvalue,s1655f-wt_log2FC,s1655f-wt_pvalue,5382insC-wt_log2FC,5382insC-wt_pvalue,m1755r-wt_log2FC,m1755r-wt_pvalue
0,P33527,ABCC1,0.374635,6.977761e-01,0.615273,5.246638e-01,2.044543,4.165402e-02,-2.381219,0.074203,0.658841,0.495981,1.164338,0.275467,2.170889,0.047816
1,P21399,ACO1,-0.792530,4.208485e-01,-0.355842,7.167747e-01,1.293796,1.922145e-01,1.004295,0.309070,0.534004,0.586607,0.612379,0.533188,0.156814,0.872888
2,O00154,ACOT7,1.369135,2.590590e-02,,,1.738867,4.383400e-04,-0.966862,0.043427,,,2.543763,0.000029,,
3,O14734,ACOT8,1.481461,4.420244e-02,-0.069220,9.222773e-01,0.176436,8.036951e-01,0.111009,0.875683,0.383792,0.629194,1.446210,0.049123,1.547306,0.036174
4,O60488,ACSL4,1.522991,4.133813e-02,2.665689,7.212310e-04,1.729246,2.159676e-02,-0.625082,0.390581,1.062681,0.148366,2.964251,0.000219,1.021482,0.164267
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
123,O95071,UBR5,-1.056411,1.149461e-01,1.869710,7.023826e-03,-0.672379,3.107194e-01,-0.579372,0.381516,-2.179488,0.001999,0.494694,0.454285,-1.399912,0.039141
124,Q96RL1,UIMC1,-2.923227,6.510000e-11,-1.662660,1.450000e-07,-1.660817,1.470000e-07,,,,,,,,
125,P47985,UQCRFS1,0.736707,3.122358e-01,-0.354639,5.596683e-01,1.636576,5.476190e-02,,,1.529521,0.066989,2.311679,0.016820,,
126,Q96RU2,USP28,,,2.481066,5.467410e-04,,,0.566949,0.340059,,,1.871856,0.005055,,


In [22]:
exception_colnames = ["5382insC-wt_log2FC", "iso5-wt_log2FC"]
fold_change_colnames = [colname for colname in data.columns if ("log2FC" in colname) and (colname not in exception_colnames)]
mutation_colnames = [colname.split("-")[0].upper() for colname in fold_change_colnames]
identifier_colnames = ["Uniprot ID", "Prey"]
selected_colnames = identifier_colnames + fold_change_colnames
data_parsed = data[selected_colnames].copy()
data_parsed.columns = identifier_colnames + mutation_colnames
data_parsed

Unnamed: 0,Uniprot ID,Prey,I26A,C61G,R71G,S1655F,M1755R
0,P33527,ABCC1,0.374635,0.615273,2.044543,0.658841,2.170889
1,P21399,ACO1,-0.792530,-0.355842,1.293796,0.534004,0.156814
2,O00154,ACOT7,1.369135,,1.738867,,
3,O14734,ACOT8,1.481461,-0.069220,0.176436,0.383792,1.547306
4,O60488,ACSL4,1.522991,2.665689,1.729246,1.062681,1.021482
...,...,...,...,...,...,...,...
123,O95071,UBR5,-1.056411,1.869710,-0.672379,-2.179488,-1.399912
124,Q96RL1,UIMC1,-2.923227,-1.662660,-1.660817,,
125,P47985,UQCRFS1,0.736707,-0.354639,1.636576,1.529521,
126,Q96RU2,USP28,,2.481066,,,


## Giving it to the ELASPIC

In [35]:
data_parsed_melted = data_parsed.melt(id_vars=["Uniprot ID", "Prey"], var_name=["Mutation"], value_name="FoldChange")
data_parsed_melted

Unnamed: 0,Uniprot ID,Prey,Mutation,FoldChange
0,P33527,ABCC1,I26A,0.374635
1,P21399,ACO1,I26A,-0.792530
2,O00154,ACOT7,I26A,1.369135
3,O14734,ACOT8,I26A,1.481461
4,O60488,ACSL4,I26A,1.522991
...,...,...,...,...
635,O95071,UBR5,M1755R,-1.399912
636,Q96RL1,UIMC1,M1755R,
637,P47985,UQCRFS1,M1755R,
638,Q96RU2,USP28,M1755R,


In [42]:
elaspic_input_brca1 = "P38398" + "." + data_parsed_melted["Mutation"]
elaspic_input_brca1

0        P38398.I26A
1        P38398.I26A
2        P38398.I26A
3        P38398.I26A
4        P38398.I26A
           ...      
635    P38398.M1755R
636    P38398.M1755R
637    P38398.M1755R
638    P38398.M1755R
639    P38398.M1755R
Name: Mutation, Length: 640, dtype: object

In [48]:
print(
    "\n".join(
        set(elaspic_input_brca1)
    )
)
print("---")

P38398.S1655F
P38398.C61G
P38398.M1755R
P38398.R71G
P38398.I26A
---


#### Each mutation in different link
| ---           | ---                                        | ---       |
| P38398.S1655F | http://elaspic.kimlab.org/result/128cdea8/ | Completed |
| P38398.C61G   | http://elaspic.kimlab.org/result/80cd13c2/ | On going  |
| P38398.M1755R | There is an unrecognized protein residue.  | -         |
| P38398.R71G   | http://elaspic.kimlab.org/result/6e7d709e/ | Completed |
| P38398.I26A   | http://elaspic.kimlab.org/result/d7f544b3/ | Completed |