In [1]:
import pandas as pd
import os
import re
import plotly.express as px
import plotly.graph_objects as go
import plotlyshare
import seaborn as sns
sns.set_style('darkgrid')

# number of structures analysis

In [2]:
cgc_data = pd.read_csv('Census_all_with_pdb.csv')

In [3]:
fig = px.histogram(cgc_data, x="n_structures")
fig.show()

In [4]:
# horizontal boxplot
fig = px.box(cgc_data, x="n_structures", orientation='h')
fig.show()

### read clean residue data

In [5]:
residue_data = pd.read_csv('final_data.csv')
residue_data.head()

Unnamed: 0,gene_symbol,pdb_id,chain,uniprot_id,assoc_gene,resolution,res_num,pdb_res,uniprot_res,network_score,outside_range,residue_match
0,EGFR,3POZ,A,P00533,EGFR,1.5,701,Q,Q,-3.791355,False,True
1,EGFR,3POZ,A,P00533,EGFR,1.5,702,A,A,-2.634055,False,True
2,EGFR,3POZ,A,P00533,EGFR,1.5,703,L,L,-1.231055,False,True
3,EGFR,3POZ,A,P00533,EGFR,1.5,704,L,L,0.452193,False,True
4,EGFR,3POZ,A,P00533,EGFR,1.5,705,R,R,-1.086163,False,True


In [6]:
residue_data[residue_data['gene_symbol']=="EGFR"]['pdb_id'].unique()

array(['3POZ', '3VRP', '3W32', '3W33', '4I22', '4I24', '5CNO', '5GNK',
       '5HG5', '5HG8', '5U8L', '5UG8', '5UG9', '5UGA', '5UGC', '6TFV',
       '6TFY', '6TFZ', '6TG0', '6TG1', '6V66', '6WXN', '7JXQ', '7SI1',
       '8A27', '8A2A', '8A2B', '8A2D'], dtype=object)

In [7]:
# count number of unique pdb_id for each gene
residue_data.groupby('gene_symbol')['pdb_id'].nunique().reset_index()

Unnamed: 0,gene_symbol,pdb_id
0,EGFR,28
1,HRAS,30
2,KRAS,31
3,TP53,109


# TCGA data

In [8]:
residue_data = pd.read_csv('final_data.csv')
residue_data = residue_data.sort_values("assoc_gene", ascending=True)
residue_data 
residue_data.head()

Unnamed: 0,gene_symbol,pdb_id,chain,uniprot_id,assoc_gene,resolution,res_num,pdb_res,uniprot_res,network_score,outside_range,residue_match
0,EGFR,3POZ,A,P00533,EGFR,1.5,701,Q,Q,-3.791355,False,True
8569,EGFR,6V66,C,P00533,EGFR,1.79,909,T,T,0.551312,False,True
8570,EGFR,6V66,C,P00533,EGFR,1.79,910,F,F,1.368711,False,True
8571,EGFR,6V66,C,P00533,EGFR,1.79,911,G,G,3.902907,False,True
8572,EGFR,6V66,C,P00533,EGFR,1.79,912,S,S,1.386478,False,True


In [9]:
all_merged_data = pd.DataFrame()

# map assoc_gene + residues to their mutational frequency 
for file in os.listdir('TCGA_data'):
    
    # extract gene name from file name
    gene = re.match(r'^([A-Z0-9]+)', file).group(1)
    
    # filter residue data for gene
    filtered_data = residue_data[residue_data['gene_symbol']==gene]
    
    # read tp53 TCGA data
    missense_data = pd.read_csv(f"TCGA_data/{file}", sep='\t')

    # filter for missense mutations
    missense_data = missense_data[missense_data['consequence']=="Missense"].reset_index(drop=True)
    missense_data['res_num'] = missense_data['protein_change'].apply(lambda x: x.split(' ')[1]).str.extract('(\d+)').astype(int) # get residue number
    missense_data['orig_aa'] = missense_data['protein_change'].apply(lambda x: x.split(' ')[1][0]) # get original amino acid   

    # sum the number of mutations for each residue
    missense_data = missense_data.groupby(['res_num', 'orig_aa']).agg({'num_ssm_affected_cases': 'sum', 'sift_score': 'mean'}).reset_index()

    # merge with residue data
    merged_data = pd.merge(filtered_data, missense_data, left_on='res_num', right_on='res_num', how='left')
    merged_data['num_ssm_affected_cases'] = merged_data['num_ssm_affected_cases'].fillna(0)
    all_merged_data = pd.concat([all_merged_data, merged_data])
    tmp = merged_data[merged_data['num_ssm_affected_cases'] > 0]
    # plot scatter plot
 
    fig = px.scatter(tmp, x='network_score', y='num_ssm_affected_cases', hover_data=['res_num', 'pdb_id', 'chain'], 
                            labels={'num_ssm_affected_cases': 'Number of affected cases',
                                    'score': 'Network Score',
                                    'num': 'Residue Number',
                                    'pdb_id': 'PDB ID',
                                    'chain': 'Chain'},
                            color="residue_match")
            
    # fig.update_layout(title=f"Gene {gene} | PDB {pdb_id} | Chain {chain}")
    fig.update_layout(title=f"Gene {gene} with {tmp['pdb_id'].nunique()} PDB structures")
    fig.update_xaxes(title_text='Network Score')
    fig.update_yaxes(title_text='Number of affected cases')

    # add label residue number to each point by hovering
    fig.update_traces(text=tmp['res_num'], hoverinfo='text+y')
    
	# make markers more transparent
    fig.update_traces(marker=dict(opacity=0.5))

    # fig.write_image(f"plots/TP53_{pdb_id}_{chain}.png")
    fig.show(renderer="plotlyshare") #



invalid escape sequence '\d'


invalid escape sequence '\d'


invalid escape sequence '\d'



PlotlyShare: New plot quizzical solidity 24th Apr created at https://plotlyshare-1-r8742502.deta.app/plot/b3453d1bba1a757be445046a8f7c2bf0 of size 138.21kB
PlotlyShare: New plot grieving nurture 24th Apr created at https://plotlyshare-1-r8742502.deta.app/plot/c39bebe0f946da128048a51956fdbe22 of size 52.01kB
PlotlyShare: New plot deeply crocodile 24th Apr created at https://plotlyshare-1-r8742502.deta.app/plot/5f3ba6ed440d53ad13ad113d066dc214 of size 86.72kB
PlotlyShare: New plot stingy shrine 24th Apr created at https://plotlyshare-1-r8742502.deta.app/plot/d4e57ae9e7ceba0720966ee6e1466804 of size 811.98kB


In [10]:
all_merged_data[(all_merged_data['pdb_id']=="5WDQ") & (all_merged_data['num_ssm_affected_cases']>0)]

Unnamed: 0,gene_symbol,pdb_id,chain,uniprot_id,assoc_gene,resolution,res_num,pdb_res,uniprot_res,network_score,outside_range,residue_match,orig_aa,num_ssm_affected_cases,sift_score
249,HRAS,5WDQ,A,P01112,HRAS,1.25,123,R,R,0.577568,False,True,R,1.0,0.02
252,HRAS,5WDQ,A,P01112,HRAS,1.25,120,A,L,0.703925,False,False,L,1.0,0.0
254,HRAS,5WDQ,A,P01112,HRAS,1.25,118,C,C,0.721531,False,True,C,1.0,0.05
255,HRAS,5WDQ,A,P01112,HRAS,1.25,117,K,K,1.963923,False,True,K,7.0,0.006667
270,HRAS,5WDQ,A,P01112,HRAS,1.25,127,S,S,-1.939055,False,True,S,1.0,0.86
275,HRAS,5WDQ,A,P01112,HRAS,1.25,147,K,K,-0.006125,False,True,K,1.0,0.0
278,HRAS,5WDQ,A,P01112,HRAS,1.25,144,T,T,2.479485,False,True,T,1.0,0.06
279,HRAS,5WDQ,A,P01112,HRAS,1.25,143,E,E,-0.107645,False,True,E,1.0,0.02
287,HRAS,5WDQ,A,P01112,HRAS,1.25,135,R,R,-3.439047,False,True,R,1.0,0.17
288,HRAS,5WDQ,A,P01112,HRAS,1.25,134,A,A,0.170913,False,True,A,1.0,0.0


In [11]:
import dash
from dash import dcc, html
from dash.dependencies import Input, Output, State
import plotly.express as px
import pandas as pd


In [12]:
# Sample data (replace this with your actual data)
df = all_merged_data
df['pdb_chain'] = df['pdb_id'] + '_' + df['chain']

# Get unique genes
unique_genes = df['gene_symbol'].unique()

# Initialize Dash app
app = dash.Dash(__name__)

# Define layout
app.layout = html.Div([
    dcc.Dropdown(
        id='gene-dropdown',
        options=[{'label': gene, 'value': gene} for gene in unique_genes],
        value='TP53',  # Default value set to 'TP53'
        clearable=False
    ),
    html.Br(),
    dcc.Dropdown(
        id='pdb-dropdown',
        options=[{'label': 'All PDBs', 'value': 'all'}] + [{'label': pdb_id, 'value': pdb_id} for pdb_id in df['pdb_chain'].unique()],
        multi=True
    ),
    dcc.Graph(id='scatter-plot')
])

# Define callback to update PDB dropdown based on selected gene
@app.callback(
    Output('pdb-dropdown', 'value'),
    [Input('gene-dropdown', 'value')]
)
def update_pdb_value(selected_gene):
    return ['all'] + list(df[df['gene_symbol'] == selected_gene]['pdb_chain'].unique())

# Define callback to update scatter plot
@app.callback(
    Output('scatter-plot', 'figure'),
    [Input('gene-dropdown', 'value'),
     Input('pdb-dropdown', 'value')]
)
def update_plot(selected_gene, selected_pdb_ids):

    if 'all' in selected_pdb_ids:
        filtered_df = df[df['gene_symbol'] == selected_gene]
    else:
        filtered_df = df[df['pdb_chain'].isin(selected_pdb_ids)]
    title = "Network score distribution against TCGA mutational data (missense only)"
    fig = px.scatter(filtered_df, x='network_score', y='num_ssm_affected_cases',
                     color='residue_match', color_discrete_map={True: 'blue', False: 'red'},
                     hover_data=['pdb_id', 'chain', 'res_num', 'pdb_res', 'uniprot_res'],
                     labels={'num_ssm_affected_cases': 'Number of affected cases',
                             'network_score': 'Network Score',
                             'residue_match': 'Residue Match',
                                    'pdb_id': 'PDB ID',
                                    'chain': 'Chain',
                                    'res_num': 'Residue Number',
                                    'pdb_res': 'PDB Residue',
                                    'uniprot_res': 'UniProt Residue'},
                     title=f'{title} ({len(selected_pdb_ids)} PDB chains selected)' if 'all' not in selected_pdb_ids else f'{title} (All PDB chains selected)')
    
    fig.update_xaxes(title_text='Network Score')
    fig.update_yaxes(title_text='Number of affected cases')
    fig.update_traces(opacity=.4)
    return fig

# Run the app
if __name__ == '__main__':
    app.run_server(debug=True)


**VEP impact**: Variant impact A subjective classification of the severity of the variant consequence, based on agreement with SNPEff.
https://www.biostars.org/p/468502/
* Modifier impact variant consequence Usually non-coding variants or variants affecting non-coding genes, where predictions are difficult or there is no evidence of impact.
* Low impact variant consequence A variant that is assumed to be mostly harmless or unlikely to change protein behaviour.
* Moderate impact variant consequence A non-disruptive variant that might change protein effectiveness.
* High impact variant consequence The variant is assumed to have high (disruptive) impact in the protein, probably causing protein truncation, loss of function or triggering nonsense mediated dec

In [40]:
all_merged_data = all_merged_data.sort_values(['gene_symbol', 'pdb_id', 'chain', 'res_num'])
all_merged_data.to_csv('all_merged_data.csv', index=False)

In [36]:
all_merged_data

Unnamed: 0,gene_symbol,pdb_id,chain,uniprot_id,assoc_gene,resolution,res_num,pdb_res,uniprot_res,network_score,outside_range,residue_match,orig_aa,num_ssm_affected_cases,sift_score
0,EGFR,8A2D,A,P00533,EGFR,1.11,1021,Q,Q,-3.622340,False,True,,0.0,
1,EGFR,5UG9,A,P00533,EGFR,1.33,870,H,H,-0.676394,False,True,,0.0,
2,EGFR,5UG9,A,P00533,EGFR,1.33,869,Y,Y,1.176390,False,True,,0.0,
3,EGFR,5UG9,A,P00533,EGFR,1.33,868,E,E,-0.904982,False,True,,0.0,
4,EGFR,5UG9,A,P00533,EGFR,1.33,867,K,K,-2.274415,False,True,,0.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22205,TP53,4AGL,A,P04637,TP53,1.70,145,L,L,3.172981,False,True,L,6.0,0.016667
22206,TP53,4AGL,A,P04637,TP53,1.70,144,Q,Q,1.379384,False,True,Q,3.0,0.025000
22207,TP53,4AGL,A,P04637,TP53,1.70,143,V,V,1.078742,False,True,V,13.0,0.000000
22208,TP53,4AGL,A,P04637,TP53,1.70,113,F,F,1.423380,False,True,F,10.0,0.000000


### Sift impact and sift score

In [37]:
# plot network score against sift score
fig = px.scatter(all_merged_data, x='network_score', y='sift_score', hover_data=['res_num', 'pdb_id', 'chain'], 
							labels={'sift_score': 'SIFT Score',
									'network_score': 'Network Score',
									'num': 'Residue Number',
									'pdb_id': 'PDB ID',
									'chain': 'Chain'},) #color="residue_match"
fig.update_layout(title=f"Network Score vs SIFT Score")
fig.update_xaxes(title_text='Network Score')
fig.update_yaxes(title_text='SIFT Score')

fig.update_traces(marker=dict(opacity=0.5))
fig.show(renderer="plotlyshare")

PlotlyShare: New plot mysterious lender 20th Apr created at https://plotlyshare-1-r8742502.deta.app/plot/5f9160c4ed0d1580a9b878abb422e573 of size 1006.42kB


In [31]:
# plot network score against sift score
fig = px.scatter(all_merged_data, x='num_ssm_affected_cases', y='sift_score', hover_data=['res_num', 'pdb_id', 'chain'], 
							labels={'sift_score': 'SIFT Score',
									'network_score': 'Network Score',
									'num': 'Residue Number',
									'pdb_id': 'PDB ID',
									'chain': 'Chain'},) # color="residue_match"
fig.update_layout(title=f"Number of affected cases vs SIFT Score")
fig.update_xaxes(title_text='Number of affected cases')
fig.update_yaxes(title_text='SIFT Score')

fig.update_traces(marker=dict(opacity=0.5))
fig.show()

In [33]:
# plot mutational frequency vs network coloured by strength of sift_score
fig = px.scatter(all_merged_data, x='network_score', y= 'num_ssm_affected_cases', hover_data=['res_num', 'pdb_id', 'chain'], 
							labels={'num_ssm_affected_cases': 'Number of affected cases',
									'network_score': 'Network Score',
									'num': 'Residue Number',
									'pdb_id': 'PDB ID',
									'chain': 'Chain'},
							color="sift_score", color_continuous_scale='Viridis')
fig.update_layout(title=f"Number of affected cases vs Network Score")
fig.update_xaxes(title_text='Network Score')
fig.update_yaxes(title_text='Number of affected cases')

fig.update_traces(marker=dict(opacity=0.3))

fig.show()

# ============================================

# TP53

In [None]:
gene = "TP53"

# read tp53 TCGA data
missense_data = pd.read_csv(f'{gene}_frequent-mutations.2024-04-15.tsv', sep='\t')

missense_data = missense_data[missense_data['consequence']=="Missense"].reset_index(drop=True)
missense_data['res_num'] = missense_data['protein_change'].apply(lambda x: x.split(' ')[1]).str.extract('(\d+)').astype(int) # get residue number

# sum the number of mutations for each residue
missense_data = missense_data.groupby('res_num').agg({'num_ssm_affected_cases': 'sum'}).reset_index()

# plot frequency vs score and save all plots to file
for pdb_id in top_structures[gene]:
    chains = get_chains_pdb_id(pdb_id)
    for chain in chains:
        # print(f"Processing {pdb_id} with chain {chain}...")
        # read FinalSum file
        try:
            with open(f"sbna_results/{pdb_id}/{chain}/{pdb_id}_monomer/FinalSum", "r") as f:
                final_sum = f.readlines()
        except Exception as e:
            print(f"Failed to read FinalSum for {pdb_id} with error {e}")
            continue
        # each line is like "MET1	-2.29160741999415", convert to table
        final_sum = [i.split() for i in final_sum]
        final_sum = [(i[0][:3], i[0][3:], i[1]) for i in final_sum] 
        final_sum = pd.DataFrame(final_sum, columns=['res', 'num', 'score'])
        final_sum['num'] = final_sum['num'].astype(int)
        final_sum['score'] = final_sum['score'].astype(float)
        
        # join with missense_data
        # merge and if no values then 0
        final_sum = final_sum.merge(missense_data, how='left', left_on='num', right_on='res_num').fillna(0) 

        # plot frequency vs score and save all plots to file

        fig = px.scatter(final_sum, x='score', y='num_ssm_affected_cases', hover_data=['num'], 
                         labels={'num_ssm_affected_cases': 'Number of affected cases',
                                 'score': 'Network Score',
                                 'num': 'Residue Number'})
        
        fig.update_layout(title=f"TP53 Gene | PDB {pdb_id} | Chain {chain}")
        fig.update_xaxes(title_text='Network Score')
        fig.update_yaxes(title_text='Number of affected cases')

        # add label residue number to each point by hovering
        fig.update_traces(text=final_sum['num'], hoverinfo='text+y')



        # fig.write_image(f"plots/TP53_{pdb_id}_{chain}.png")
        fig.show()

Failed to read FinalSum for 3LW1 with error [Errno 2] No such file or directory: 'sbna_results/3LW1/P/3LW1_monomer/FinalSum'


# HRAS

In [None]:
gene = "HRAS"

# read tp53 TCGA data
missense_data = pd.read_csv(f'{gene}_frequent-mutations.2024-04-16.tsv', sep='\t')

missense_data = missense_data[missense_data['consequence']=="Missense"].reset_index(drop=True)
missense_data['res_num'] = missense_data['protein_change'].apply(lambda x: x.split(' ')[1]).str.extract('(\d+)').astype(int) # get residue number

# sum the number of mutations for each residue
missense_data = missense_data.groupby('res_num').agg({'num_ssm_affected_cases': 'sum'}).reset_index()

# plot frequency vs score and save all plots to file
for pdb_id in top_structures[gene]:
    chains = get_chains_pdb_id(pdb_id)
    for chain in chains:
        # print(f"Processing {pdb_id} with chain {chain}...")
        # read FinalSum file
        try:
            with open(f"sbna_results/{pdb_id}/{chain}/{pdb_id}_monomer/FinalSum", "r") as f:
                final_sum = f.readlines()
        except Exception as e:
            print(f"Failed to read FinalSum for {pdb_id} with error {e}")
            continue
        # each line is like "MET1	-2.29160741999415", convert to table
        final_sum = [i.split() for i in final_sum]
        final_sum = [(i[0][:3], i[0][3:], i[1]) for i in final_sum]
        final_sum = pd.DataFrame(final_sum, columns=['res', 'num', 'score'])
        final_sum['num'] = final_sum['num'].astype(int)
        final_sum['score'] = final_sum['score'].astype(float)
        
        # join with missense_data
        # merge and if no values then 0
        final_sum = final_sum.merge(missense_data, how='left', left_on='num', right_on='res_num').fillna(0) 

        # plot frequency vs score and save all plots to file

        fig = px.scatter(final_sum, x='score', y='num_ssm_affected_cases', hover_data=['num'], 
                         labels={'num_ssm_affected_cases': 'Number of affected cases',
                                 'score': 'Network Score',
                                 'num': 'Residue Number'})
        
        fig.update_layout(title=f"{gene} Gene | PDB {pdb_id} | Chain {chain}")
        fig.update_xaxes(title_text='Network Score')
        fig.update_yaxes(title_text='Number of affected cases')

        # add label residue number to each point by hovering
        fig.update_traces(text=final_sum['num'], hoverinfo='text+y')
        # fig.write_image(f"plots/{gene}/{gene}_{pdb_id}_{chain}.png")
        fig.show()

# EGFR

In [None]:
gene = "EGFR"

# read tp53 TCGA data
missense_data = pd.read_csv(f'{gene}_frequent-mutations.2024-04-16.tsv', sep='\t')

missense_data = missense_data[missense_data['consequence']=="Missense"].reset_index(drop=True)
missense_data['res_num'] = missense_data['protein_change'].apply(lambda x: x.split(' ')[1]).str.extract('(\d+)').astype(int) # get residue number

# sum the number of mutations for each residue
missense_data = missense_data.groupby('res_num').agg({'num_ssm_affected_cases': 'sum'}).reset_index()

# plot frequency vs score and save all plots to file
for pdb_id in top_structures[gene]:
    chains = get_chains_pdb_id(pdb_id)
    for chain in chains:
        # print(f"Processing {pdb_id} with chain {chain}...")
        # read FinalSum file
        try:
            with open(f"sbna_results/{pdb_id}/{chain}/{pdb_id}_monomer/FinalSum", "r") as f:
                final_sum = f.readlines()
        except Exception as e:
            print(f"Failed to read FinalSum for {pdb_id} with error {e}")
            continue
        # each line is like "MET1	-2.29160741999415", convert to table
        final_sum = [i.split() for i in final_sum]
        final_sum = [(i[0][:3], i[0][3:], i[1]) for i in final_sum]
        final_sum = pd.DataFrame(final_sum, columns=['res', 'num', 'score'])
        final_sum['num'] = final_sum['num'].astype(int)
        final_sum['score'] = final_sum['score'].astype(float)
        
        # join with missense_data
        # merge and if no values then 0
        final_sum = final_sum.merge(missense_data, how='left', left_on='num', right_on='res_num').fillna(0) 

        # plot frequency vs score and save all plots to file

        fig = px.scatter(final_sum, x='score', y='num_ssm_affected_cases', hover_data=['num'], 
                         labels={'num_ssm_affected_cases': 'Number of affected cases',
                                 'score': 'Network Score',
                                 'num': 'Residue Number'})
        
        fig.update_layout(title=f"{gene} Gene | PDB {pdb_id} | Chain {chain}")
        fig.update_xaxes(title_text='Network Score')
        fig.update_yaxes(title_text='Number of affected cases')

        # add label residue number to each point by hovering
        fig.update_traces(text=final_sum['num'], hoverinfo='text+y')

        # save image
        # fig.write_image(f"plots/{gene}/{gene}_{pdb_id}_{chain}.png", engine="kaleido")
        fig.show()

ValueError: could not convert string to float: 'NA'

# KRAS

In [None]:
gene = "KRAS"

# read tp53 TCGA data
missense_data = pd.read_csv(f'{gene}_frequent-mutations.2024-04-16.tsv', sep='\t')

missense_data = missense_data[missense_data['consequence']=="Missense"].reset_index(drop=True)
missense_data['res_num'] = missense_data['protein_change'].apply(lambda x: x.split(' ')[1]).str.extract('(\d+)').astype(int) # get residue number

# sum the number of mutations for each residue
missense_data = missense_data.groupby('res_num').agg({'num_ssm_affected_cases': 'sum'}).reset_index()

# plot frequency vs score and save all plots to file
for pdb_id in top_structures[gene]:
    chains = get_chains_pdb_id(pdb_id)
    for chain in chains:
        # print(f"Processing {pdb_id} with chain {chain}...")
        # read FinalSum file
        try:
            with open(f"sbna_results/{pdb_id}/{chain}/{pdb_id}_monomer/FinalSum", "r") as f:
                final_sum = f.readlines()
        except Exception as e:
            print(f"Failed to read FinalSum for {pdb_id} with error {e}")
            continue
        # each line is like "MET1	-2.29160741999415", convert to table
        final_sum = [i.split() for i in final_sum]
        final_sum = [(i[0][:3], i[0][3:], i[1]) for i in final_sum]
        final_sum = pd.DataFrame(final_sum, columns=['res', 'num', 'score'])
        final_sum['num'] = final_sum['num'].astype(int)
        final_sum['score'] = final_sum['score'].astype(float)
        
        # join with missense_data
        # merge and if no values then 0
        final_sum = final_sum.merge(missense_data, how='left', left_on='num', right_on='res_num').fillna(0) 

        # plot frequency vs score and save all plots to file
        fig = px.scatter(final_sum, x='score', y='num_ssm_affected_cases', hover_data=['num'], 
                         labels={'num_ssm_affected_cases': 'Number of affected cases',
                                 'score': 'Network Score',
                                 'num': 'Residue Number'})
        
        fig.update_layout(title=f"{gene} Gene | PDB {pdb_id} | Chain {chain}")
        fig.update_xaxes(title_text='Network Score')
        fig.update_yaxes(title_text='Number of affected cases')

        # add label residue number to each point by hovering
        fig.update_traces(text=final_sum['num'], hoverinfo='text+y')
        # fig.write_image(f"plots/{gene}/{gene}_{pdb_id}_{chain}.png")
        fig.show()

Failed to read FinalSum for 6P0Z with error [Errno 2] No such file or directory: 'sbna_results/6P0Z/B/6P0Z_monomer/FinalSum'
