## BASE

In [1]:
# Import
from dash import Dash, html, dash_table, dcc, callback, Output, Input, State
import pandas as pd
import plotly.express as px
import os
import dash_cytoscape as cyto
import json
import igraph as ig
import dash_bootstrap_components as dbc
import venn
from io import BytesIO
import base64
import matplotlib.pyplot as plt
import numpy as np
import plotly.graph_objects as go
import plotly.tools as tls  
import pickle
import tap
from plotly.subplots import make_subplots
from lifelines import KaplanMeierFitter
from lifelines.statistics import pairwise_logrank_test

cyto.load_extra_layouts()

APP_NAME = "Network Tool"
APP_LOGO = "/assets/logo.png"
GRAPH_ELEMENTS = None
NAME_STUDY="lung"
#TODO Get from config.josn
OUTPUT_ROOT_PATH = f"./{NAME_STUDY}/output"

In [2]:
def filter_graph(cluster):
    list_vertices = GRAPH.vs.select(lambda x:x["cluster"]==cluster)
    graph_filtered = GRAPH.induced_subgraph(list_vertices)
    graph_elements = []
    #convert and add vertex
    for e in graph_filtered.vs():
        _map = e.attributes()
        _map["id"] = e.index
        _map["variants"]=None
        graph_elements.append({"data": _map, "classes": e["vertex_type"], "id": e.index, "grabbable": False, })
    #convert and add edges
    for e in graph_filtered.es():
        graph_elements.append({"data": {"source": e.source, "target": e.target}})

    return graph_elements

def pyplot(fig, ci=True, legend=True):
    # Convert mpl fig obj to plotly fig obj, resize to plotly's default
    py_fig = tls.mpl_to_plotly(fig, resize=True)
    
    # Add fill property to lower limit line
    if ci == True:
        style1 = dict(fill='tonexty')
        # apply style
        py_fig['data'][2].update(style1)
        
        # Change color scheme to black
        py_fig['data'].update(dict(line=Line(color='black')))
    
    # change the default line type to 'step'
    py_fig['data'].update(dict(line=Line(shape='hv')))
    # Delete misplaced legend annotations 
    py_fig['layout'].pop('annotations', None)
    
    if legend == True:
        # Add legend, place it at the top right corner of the plot
        py_fig['layout'].update(
            showlegend=True,
            legend=Legend(
                x=1.05,
                y=1
            )
        )
        
    # Send updated figure object to Plotly, show result in notebook
    return py.iplot(py_fig)

In [3]:
GRAPH=None
with open (os.path.join(OUTPUT_ROOT_PATH,"graph.pickle"),"rb") as f:
    GRAPH=pickle.load(f)

CLUSTERS_INDEX=[int(c) for c in set(GRAPH.vs["cluster"])]
GENES=[c for c in set(GRAPH.vs["gene"])]
#Find all numerical column
DF_CLINICAL_DATA = pd.read_csv(os.path.join(OUTPUT_ROOT_PATH, "cluster_clinical_data.csv"),sep="\t")
NUMERIC_COLUMNS_CLINICAL=list(DF_CLINICAL_DATA.select_dtypes(include=np.number).columns)
ALL_COLUMNS_CLINICAL=list(DF_CLINICAL_DATA.columns)
ALL_COLUMNS_CLINICAL.remove("cluster")

## LAYOUT DEFINITION

In [4]:
app = Dash(title=APP_NAME, external_stylesheets=[dbc.themes.BOOTSTRAP, dbc.icons.FONT_AWESOME])

#ICON -> https://fontawesome.com/search

#SIDEBAR
sidebar = html.Div([
    html.Div(
        [
            html.Img(src=APP_LOGO, className="navbar_logo"),
            html.Span(APP_NAME, className="navbar_title"),
        ],
        className="sidebar-header",
    ),
    html.Hr(),
    dbc.Nav([
            #HOME
            dbc.NavLink(
                [
                    html.I(className="fas fa-home"),
                    html.Span("Home", className="navbar_span")
                ],
                href="/",
                active="exact",
                className="navbar_entity"
            ),
            #PATWAY
            dbc.NavLink(
                [
                    html.I(className="fas fa-diagram-project"),
                    html.Span("Pathways Analysis", className="navbar_span"),
                ],
                href="/pathway_analysis",
                active="exact",
                className="navbar_entity"
            ),
            #CLUSTER COMP
            dbc.NavLink(
                [
                    html.I(className="fas fa-code-compare"),
                    html.Span("Cluster Comparision",className="navbar_span"),
                ],
                href="/cluster_comparision",
                active="exact",
                className="navbar_entity"
            ),
            #CLINICAL
            dbc.NavLink(
                [
                    html.I(className="fas fa-table"),
                    html.Span("Clinical Data",className="navbar_span"),
                ],
                href="/clinical_data",
                active="exact",
                className="navbar_entity"
            ),
            #SURVIVAL ANALYSIS
             dbc.NavLink(
                [
                    html.I(className="fa-solid fa-chart-line"),
                    html.Span("Survival Analysis",className="navbar_span"),
                ],
                href="/survival_analysis",
                active="exact",
                className="navbar_entity"
            ),

        ], vertical=True, pills=True
    )],className="sidebar"
)

content = html.Div(id="page-content", className="content")
app.layout = html.Div([dcc.Location(id="url"), sidebar, content])

#******************************************************
PAGE_HOME = [
    #DIALOG INFO NODE
    dbc.Modal(
        [],
        id="modal-lg",
        size="lg",
        is_open=False,
    ),
    #ROW LINE PARAMS
    dbc.Row([
        #CLUSTER SELECTOR
        dbc.Col([
            html.Span("Cluster selected", className="span_selector"),
            dcc.Dropdown(
                CLUSTERS_INDEX,
                0,
                id='dropdown-cluster',
                persistence=True,
                persistence_type = 'memory'
            ),html.Br()
        ], lg=4),html.Hr()
    ]),
    #1 ROW
    dbc.Row([
        dbc.Col([
           html.Span("Number of patients"),
           html.Hr(),
           html.Span(0, id="span_n_patient")
        ], width=2, className="info_block"),
        dbc.Col([
           html.Span("Number of variants"),
           html.Hr(),
           html.Span(0, id="span_n_variants")
        ], width=2, className="info_block"),
        dbc.Col([
           html.Span("Number of genes"),
           html.Hr(),
           html.Span(0, id="span_n_genes")
        ], width=2, className="info_block"),
        dbc.Col([
           html.Span("Variant centroid"),
           html.Hr(),
           html.Span("None", id="span_variant_centroids")
        ], width=2, className="info_block")
    ], justify="evenly"),

    #2 ROW
    dbc.Row([
        #CYTOSCAPE BLOCK
        dbc.Col([
            #CYTOSCAPE GRAPH
            cyto.Cytoscape(
                id='cytoscape-graph',
                layout={'name': 'cola'},
                style={'width': '100%', 'height': '25rem'},
                stylesheet=[
                    #NOME SOPRA
                    {
                        'selector': 'node',
                        'style': {
                            'content': 'data(name)',
                            "font-size": "5px"
                        }
                    },
                    #PAZIENTI TRIANGOLI ROSSI
                    {
                        'selector': '.PATIENT',
                        'style': {
                            'background-color': 'coral',
                            'shape': 'triangle'
                        }
                    },
                    #VARIANTI CERCHI BLUE
                    {
                        'selector': '.VARIANT',
                        'style': {
                            'background-color': 'royalblue',
                            'shape': 'circle'
                        }
                    },
                    #SELECTEDƒco
                    {
                        'selector': ':selected',
                        'style': {
                            'background-color': '#02cd79',
                        },
                    }
                ],
                elements=filter_graph(0),
                minZoom=0.1,
                maxZoom=2,
                responsive=True
            ),                  
            #LAYOUT SELECTOR
            dcc.RadioItems(
                options=["cola", "concentric", "breadthfirst", "klay"],
                value="cola",
                inline=True,
                id='radio-layouts'
            )
        ], lg=6),
        #FIGURE PIE
        dbc.Col([
            dcc.Graph(id="fig_pie")
        ], lg=6)
    ], justify="evenly"),
    #3 ROW
    dbc.Row([
        #FIGURE DEGREE
        dbc.Col([
            dcc.Graph(id="fig_degree")
        ], lg=11),
    ], justify="evenly")
]

#******************************************************
PAGE_PATHWAY_ANALYSIS = [
    #ROW LINE PARAMS
    dbc.Row([
        #CLUSTER SELECTOR
        dbc.Col([
            html.Span("Cluster selected", className="span_selector"),
            dcc.Dropdown(
                CLUSTERS_INDEX,
                0,
                id='dropdown-cluster',
                persistence=True,
                persistence_type = 'memory'
            ),html.Br()
        ], lg=4),
        #PVALUE SELECTOR
        dbc.Col([
            html.Span("PValue threshold", className="span_selector"),
            dcc.Dropdown(
                [0.01, 0.05],
                0.05,
                id='dropdown-pvalue',
                persistence=True,
                persistence_type = 'memory'
            ),html.Br()
        ], lg=4),
        #ADJUSTED PVALUE SELECTOR
        dbc.Col([
            html.Span("Use adjusted PValue", className="span_selector"),
            dcc.Dropdown(
                ["True", "False"],
                "False",
                id='dropdown-adjusted-pvalue',
                persistence=True,
                persistence_type = 'memory'
            ),html.Br()
        ], lg=4),html.Hr()
    ]),
    #FIRST ROW
    dbc.Row([
        #FIGURE GO
        dbc.Col([
            dcc.Graph(id="fig_go"),
            dcc.RadioItems(
            options=[
                {'label': 'Biological Function', 'value': 'biological'},
                {'label': 'Molecular Function', 'value': 'molecular'},
                {'label': 'Cellular Component', 'value': 'cellular'}
            ],
            # Valore predefinito
            value='biological' ,
            labelStyle={'display': 'inline-block'},
            id="radio_fig_go"
        )], lg=6),
        #FIGURE KEGG
        dbc.Col([
            dcc.Graph(id="fig_kegg")
        ], lg=6),
        #FIGURE REACTOME
        dbc.Col([
            dcc.Graph(id="fig_reactome")
        ], lg=6),
        #FIGURE WIKI
        dbc.Col([
            dcc.Graph(id="fig_wiki")
        ], lg=6)
    ])
]

#******************************************************
PAGE_CLUSTER_COMPARISION = [
    #ROW LINE PARAMS
    dbc.Row([
        #CLUSTER SELECTOR MULTI
        dbc.Col([
            html.Span("Cluster selected", className="span_selector"),
            dcc.Dropdown(
                ["ALL"]+CLUSTERS_INDEX,
                [],
                id='dropdown-cluster-multi',
                multi=True,
                persistence=True,
                persistence_type = 'memory'
            ),html.Br()
        ], lg=4),html.Hr()
    ]),
    # ROW VENN
    dbc.Row([
        dbc.Col([
            html.Img(id='plot-venn')
        ], lg=6),
        dbc.Col([
            dcc.Graph(
                id="table_gene_common"
            )
        ], lg=6),        
    ]),
    # ROW BOXS
    dbc.Row([
        dcc.Dropdown(
            ALL_COLUMNS_CLINICAL,
            ALL_COLUMNS_CLINICAL[0],
            id='dropdown-multi-fig1',
            persistence=True,
            persistence_type = 'memory'
        ),
        dcc.Graph(id="fig_multi_fig1")
    ]),
    # ROW PIES
    dbc.Row([
        dcc.Dropdown(
            ALL_COLUMNS_CLINICAL,
            ALL_COLUMNS_CLINICAL[-1],
            id='dropdown-multi-fig2',
            persistence=True,
            persistence_type = 'memory'
        ),
        dcc.Graph(id="fig_multi_fig2")
    ]),
]

#*******************************************************
PAGE_CLINICAL_DATA=[
    #DIALOG INFO NODE
    dbc.Modal(
        [],
        id="modal-lg",
        size="lg",
        is_open=False,
    ),
    #ROW LINE PARAMS
    dbc.Row([
        #CLUSTER SELECTOR
        dbc.Col([
            html.Span("Cluster selected", className="span_selector"),
            dcc.Dropdown(
                CLUSTERS_INDEX,
                0,
                id='dropdown-cluster',
                persistence=True,
                persistence_type = 'memory'
            ),html.Br()
        ], lg=4),html.Hr()
    ]),
    dbc.Row([
        #FIGURE BOX_PLOT_1
        dbc.Col([
            dcc.Dropdown(
                ALL_COLUMNS_CLINICAL,
                ALL_COLUMNS_CLINICAL[0],
                id='dropdown-box-1',
                persistence=True,
                persistence_type = 'memory'
            ),
            dcc.Graph(id="fig_box_plot_1")
        ], lg=6),
        #FIGURE BOX_PLOT_2
        dbc.Col([
            dcc.Dropdown(
                ALL_COLUMNS_CLINICAL,
                ALL_COLUMNS_CLINICAL[-1],
                id='dropdown-box-2',
                persistence=True,
                persistence_type = 'memory'
            ),
            dcc.Graph(id="fig_box_plot_2")
        ], lg=6)
    ]),
    dbc.Row([
        dbc.Col([
            dash_table.DataTable(id="table_clinical_data")
        ])
    ]),
    dbc.Row([
        #GENES SELECTOR MULTI
        dbc.Col([
            html.Span("Genes selected", className="span_selector"),
            dcc.Dropdown(
                GENES,
                [],
                id='dropdown-genes-multi',
                multi=True,
                persistence=True,
                persistence_type = 'memory'
            ),html.Br()
        ], lg=4),html.Hr()
    ]),
    dbc.Row([
            dbc.Col([
                dash_table.DataTable(id="table_clinical_data_gene")
        ])

    ]),
    dbc.Row([
            dbc.Col([
                dash_table.DataTable(id="table_mutational_gene")
        ])

    ]),
]
#*******************************************************
PAGE_SURVIVAL_ANALYSIS=[
    #DIALOG INFO NODE
    dbc.Modal(
        [],
        id="modal-lg",
        size="lg",
        is_open=False,
    ),
    #ROW LINE PARAMS
    dbc.Row([
        #CLUSTER SELECTOR
        dbc.Col([
            html.Span("Cluster selected", className="span_selector"),
            dcc.Dropdown(
                CLUSTERS_INDEX,
                0,
                id='dropdown-cluster',
                persistence=True,
                persistence_type = 'memory'
            ),html.Br()
        ], lg=4),html.Hr()
    ]),
    dbc.Row([
        dbc.Col([
            dcc.Graph(id='survival_figure')
        ], lg=8)
    ]),

    dbc.Row([
        #CLUSTER SELECTOR MULTI
        dbc.Col([
            html.Span("Multi Cluster Selection", className="span_selector"),
            dcc.Dropdown(
                ["ALL"]+CLUSTERS_INDEX,
                [],
                id='dropdown-cluster-multi_survival',
                multi=True,
                persistence=True,
                persistence_type = 'memory'
            ),html.Br()
        ], lg=4),html.Hr()
    ]),
    dbc.Row([
        dbc.Col([
            dcc.Graph(id='survival_figure_comparison')
        ], lg=8),
        dbc.Col([
            dcc.Graph(
                id="table_test_survival"
            )
        ], lg=4), 
    ])

]

## HOME PAGE

In [5]:
#PAGING
@app.callback(
    Output("page-content", "children"),
    Input("url", "pathname")
)
def render_page_content(pathname):
    if pathname == "/":
        return PAGE_HOME
    elif pathname == "/pathway_analysis":
        return PAGE_PATHWAY_ANALYSIS
    elif pathname == "/cluster_comparision":
        return PAGE_CLUSTER_COMPARISION
    elif pathname == "/clinical_data":
        return PAGE_CLINICAL_DATA
    elif pathname == "/survival_analysis":
        return PAGE_SURVIVAL_ANALYSIS
    # If the user tries to reach a different page, return a 404 message
    return html.Div(
        [
            html.H1("404: Not found", className="text-danger"),
            html.Hr(),
            html.P(f"The pathname {pathname} was not recognised..."),
        ],
        className="p-3 bg-light rounded-3",
    )

#SELECT SINGLE NODE GRAPH
@callback(
    Output('modal-lg', 'children'),
    Output('modal-lg', 'is_open'),
    Input('cytoscape-graph', 'tapNodeData'),
    prevent_initial_call=True,
)
def displaySelectedNodeData(data_dict):
    temp=""
    if data_dict["vertex_type"] == "VARIANT":
        term_included=["name","gene","sost_amm","consequence","gene","sost_amm","variant_type"]
        temp=""
        for k, v in data_dict.items():
            if k in term_included:
                temp+=f"**{k}**:{v}\n"
    else:
        term_excluded=["vertex_type","variants","color_vertex","shape_vertex","gene","sost_amm","variant_type","consequence","color","cluster","id","timeStamp"]
        temp=""
        for k, v in data_dict.items():
            if k not in term_excluded:
                temp+=f"**{k}**:{v}\n"

    return [
        dbc.ModalHeader(dbc.ModalTitle(data_dict['name'])),
        dcc.Markdown(temp, className="markdown"),
    ], True

#UPDATE CLUSTER LAYOUT
@callback(
    Output(component_id='cytoscape-graph', component_property='layout'),
    Input(component_id='radio-layouts', component_property='value')
)
def update_graph(layout):
    return {'name': layout}

#SELECT CLUSTER INDEX
@callback(
    Output(component_id='cytoscape-graph', component_property='elements'),
    Output(component_id='fig_pie', component_property='figure'),
    Output(component_id='fig_degree', component_property='figure'),
    Output(component_id='span_n_patient', component_property='children'),
    Output(component_id='span_n_variants', component_property='children'),
    Output(component_id='span_n_genes', component_property='children'),
    Output(component_id='span_variant_centroids', component_property='children'),
    Input(component_id='dropdown-cluster', component_property='value')
)
def update_cluster(cluster):
    #CLUSTER ELEMENTS
    cluster_elements = filter_graph(cluster)
    #FIGURE PIE
    df_gene = pd.read_csv(os.path.join(OUTPUT_ROOT_PATH, "Gene_Count", f"genes_cluster_{cluster}.csv"),sep="\t")
    fig_pie = px.pie(df_gene, values='COUNT', names='GENE', title='Number Mutation for Gene')
    fig_pie.update_traces(textposition="inside",textinfo='label')
    #VARIANT NUMBERS
    df_variant = pd.read_csv(os.path.join(OUTPUT_ROOT_PATH, "Variants_Degree",f"variants_degree_cluster{cluster}.csv"),sep="\t")
    n_variants = len(df_variant)
    #FIGURE DEGREE
    df_variant=df_variant.sort_values(by=['Degree'],ascending=False)[:15]
    fig_degree = px.bar(df_variant,x="Variants",y="Degree",title="Mutation Degree")
    #PATIENTS NUMBER
    n_patients = len([1 for e in cluster_elements if e["data"].get("vertex_type", "") == "PATIENT"])
    #GENE NUMBERS
    n_genes = len(df_gene)
    #VARIANT CENTROID
    if df_variant.iloc[0]["Degree"] == df_variant.iloc[1]["Degree"]:
        variant_centroids = "More than one"
    else:
        variant_centroids = df_variant.iloc[0]["Variants"]
    #RETURN
    return cluster_elements, fig_pie, fig_degree, n_patients, n_variants, n_genes, variant_centroids

## PATHWAY ANALYSIS PAGE

In [6]:
#UPDATE GO FIGURE
@callback(
    Output(component_id='fig_go', component_property='figure'),
    [
        Input(component_id='dropdown-cluster', component_property='value'),
        Input(component_id='dropdown-pvalue', component_property='value'),
        Input(component_id='dropdown-adjusted-pvalue', component_property='value'),
        Input(component_id='radio_fig_go', component_property='value')
    ]
)
def update_go(cluster,pvalue,adjusted_pvalue,process_type):
    df = pd.read_csv(os.path.join(OUTPUT_ROOT_PATH, "Arricchimento_all_genes", "GO", f"{process_type}_{cluster}.csv"))
    if adjusted_pvalue == "True":
        df=df[df["Adjusted.P.value"] < pvalue]
        df = df.sort_values(by=['Adjusted.P.value'])[:25]
        fig=px.bar(df, x='Adjusted.P.value', y='Term',
            hover_data=['Overlap'], color='Adjusted.P.value', title='GO',labels={'Adjusted.P.value': 'Adjusted Pvalue'})
        fig.update_layout(xaxis_title="Adjusted Pvalue",  # Nome dell'asse delle x
        yaxis_title="Terms",legend_title="Adjusted Pvalue")
        return fig
    else:
        df=df[df["P.value"] < pvalue]
        df = df.sort_values(by=['P.value'])[:25]
        fig=px.bar(df, x='P.value', y='Term',
            hover_data=['Overlap'], color='P.value', title='GO',labels={'P.value': 'Pvalue'})
        fig.update_layout(xaxis_title="Pvalue",  # Nome dell'asse delle x
        yaxis_title="Terms",legend_title="Pvalue")
        return fig

#UPDATE KEGG FIGURE
@callback(
    Output(component_id='fig_kegg', component_property='figure'),
    [
        Input(component_id='dropdown-cluster', component_property='value'),
        Input(component_id='dropdown-pvalue', component_property='value'),
        Input(component_id='dropdown-adjusted-pvalue', component_property='value')
    ]
)
def update_kegg(cluster,pvalue,adjusted_pvalue):
    df = pd.read_csv(os.path.join(OUTPUT_ROOT_PATH, "Arricchimento_all_genes", "KEGG", f"kegg_{cluster}.csv"))
    if adjusted_pvalue == "True":
        df=df[df["KEGG_2021_Human.Adjusted.P.value"] < pvalue]
        df = df.sort_values(by=['KEGG_2021_Human.Adjusted.P.value'])[:25]
        fig= px.bar(df, x='KEGG_2021_Human.Adjusted.P.value', y='KEGG_2021_Human.Term',
            hover_data=['KEGG_2021_Human.Overlap'], color='KEGG_2021_Human.Adjusted.P.value',title='KEGG',color_continuous_scale=px.colors.sequential.Viridis,labels={'KEGG_2021_Human.P.value': 'Adjusted Pvalue'})
        fig.update_layout(xaxis_title="Adjusted Pvalue",  # Nome dell'asse delle x
        yaxis_title="Terms",legend_title="Adjusted Pvalue")
        return fig
    else:
        df=df[df["KEGG_2021_Human.P.value"] < pvalue]
        df = df.sort_values(by=['KEGG_2021_Human.P.value'])[:25]
        fig= px.bar(df, x='KEGG_2021_Human.P.value', y='KEGG_2021_Human.Term',
            hover_data=['KEGG_2021_Human.Overlap'], color='KEGG_2021_Human.P.value',title='KEGG',color_continuous_scale=px.colors.sequential.Viridis,
            labels={'KEGG_2021_Human.P.value': 'Pvalue'})
        fig.update_layout(xaxis_title="Pvalue",  # Nome dell'asse delle x
        yaxis_title="Terms", legend_title="Pvalue")
        return fig
    
#UPDATE REACTOME FIGURE
@callback(
    Output(component_id='fig_reactome', component_property='figure'),
    [
        Input(component_id='dropdown-cluster', component_property='value'),
        Input(component_id='dropdown-pvalue', component_property='value'),
        Input(component_id='dropdown-adjusted-pvalue', component_property='value')
    ]
)
def update_kegg(cluster,pvalue,adjusted_pvalue):
    df = pd.read_csv(os.path.join(OUTPUT_ROOT_PATH, "Arricchimento_all_genes", "REACTOME", f"reactome_{cluster}.csv"))
    if adjusted_pvalue == "True":
        df=df[df["Reactome_2022.Adjusted.P.value"] < pvalue]
        df = df.sort_values(by=['Reactome_2022.Adjusted.P.value'])[:25]
        fig= px.bar(df, x='Reactome_2022.P.value', y='Reactome_2022.Term',
            hover_data=['Reactome_2022.Overlap'], color='Reactome_2022.Adjusted.P.value',title='REACTOME',color_continuous_scale=px.colors.sequential.Viridis,labels={'Reactome_2022.P.value': 'Adjusted Pvalue'})
        fig.update_layout(xaxis_title="Adjusted Pvalue",  # Nome dell'asse delle x
        yaxis_title="Terms",legend_title="Adjusted Pvalue")
        return fig
    else:
        df=df[df["Reactome_2022.P.value"] < pvalue]
        df = df.sort_values(by=['Reactome_2022.P.value'])[:25]
        fig= px.bar(df, x='Reactome_2022.P.value', y='Reactome_2022.Term',
            hover_data=['Reactome_2022.Overlap'], color='Reactome_2022.P.value',title='REACTOME',color_continuous_scale=px.colors.sequential.Viridis,
            labels={'Reactome_2022.P.value': 'Pvalue'})
        fig.update_layout(xaxis_title="Pvalue",  # Nome dell'asse delle x
        yaxis_title="Terms", legend_title="Pvalue")
        return fig
    
#UPDATE WIKI FIGURE
@callback(
    Output(component_id='fig_wiki', component_property='figure'),
    [
        Input(component_id='dropdown-cluster', component_property='value'),
        Input(component_id='dropdown-pvalue', component_property='value'),
        Input(component_id='dropdown-adjusted-pvalue', component_property='value')
    ]
)
def update_wiki(cluster,pvalue,adjusted_pvalue):
    df = pd.read_csv(os.path.join(OUTPUT_ROOT_PATH, "Arricchimento_all_genes", "WIKI", f"wiki_{cluster}.csv"))
    if adjusted_pvalue == "True":
        df=df[df["WikiPathway_2023_Human.Adjusted.P.value"] < pvalue]
        df = df.sort_values(by=['WikiPathway_2023_Human.Adjusted.P.value'])[:25]
        fig= px.bar(df, x='WikiPathway_2023_Human.Adjusted.P.value', y='WikiPathway_2023_Human.Term',
            hover_data=['WikiPathway_2023_Human.Overlap'], color='WikiPathway_2023_Human.Adjusted.P.value',title='WikiPathway',color_continuous_scale=px.colors.sequential.Viridis,labels={'WikiPathway_2023_Human.Adjusted.P.value': 'Adjusted Pvalue','WikiPathway_2023_Human.Overlap':'Overlap_Genes'})
        fig.update_layout(xaxis_title="Adjusted Pvalue",  # Nome dell'asse delle x
        yaxis_title="Terms",legend_title="Adjusted Pvalue")
        return fig
    else:
        df=df[df["WikiPathway_2023_Human.P.value"] < pvalue]
        df = df.sort_values(by=['WikiPathway_2023_Human.P.value'])[:25]
        fig= px.bar(df, x='WikiPathway_2023_Human.P.value', y='WikiPathway_2023_Human.Term',
            hover_data=['WikiPathway_2023_Human.Overlap'], color='WikiPathway_2023_Human.P.value',title='WikiPathway',color_continuous_scale=px.colors.sequential.Viridis,
            labels={'WikiPathway_2023_Human.P.value': 'Pvalue','WikiPathway_2023_Human.Overlap':'Overlap_Genes'})
        fig.update_layout(xaxis_title="Pvalue",  # Nome dell'asse delle x
        yaxis_title="Terms", legend_title="Pvalue")
        return fig


## CLUSTER COMPARISION PAGE

In [7]:
#UPDATE COMPARISION
@callback(
    Output(component_id='plot-venn', component_property='src'),
    Input(component_id='dropdown-cluster-multi', component_property='value')
)
def update_venn(list_clusters):
    cluster_gene_list=[]
    for index in list_clusters:
        gene_values = []
        if index == "ALL":
            gene_values = pd.read_csv(os.path.join(OUTPUT_ROOT_PATH, "distribution_gene_cluster.csv"), sep="\t")["Gene"].unique()
        else:
            gene_values = pd.read_csv(os.path.join(OUTPUT_ROOT_PATH, "Gene_Count", f"genes_cluster_{index}.csv"),sep="\t")["GENE"].values
        cluster_gene_list.append(gene_values)
    labels = venn.get_labels(cluster_gene_list, fill=['number'])

    fig, ax= None, None
    match len(list_clusters):
        case 2:
            fig, ax = venn.venn2(labels, names=list_clusters)
        case 3:
            fig, ax = venn.venn3(labels, names=list_clusters)
        case 4:
            fig, ax = venn.venn4(labels, names=list_clusters)
        case 5:
            fig, ax = venn.venn5(labels, names=list_clusters)
        case 6:
            fig, ax = venn.venn6(labels, names=list_clusters)
        case _:
            return None

    ax.set_title('Gene Comparision')
    #SAVE TO BUFFER
    buf = BytesIO()
    fig.savefig(buf, format="png")
    fig_data = base64.b64encode(buf.getbuffer()).decode("ascii")
    fig_bar_matplotlib = f'data:image/png;base64,{fig_data}'
    plt.figure().clear()
    plt.close()
    plt.cla()
    plt.clf()
    return fig_bar_matplotlib

#TABLE GENES_COMMON:
@callback(
    Output(component_id="table_gene_common",component_property="figure"),
    Input(component_id='dropdown-cluster-multi', component_property='value')
)
def update_genes_common(list_clusters):
    cluster_gene_list={}
    all_gene_set=set()
    for index in list_clusters:
        gene_values=[]
        if index == "ALL":
            gene_values = list(pd.read_csv(os.path.join(OUTPUT_ROOT_PATH, "distribution_gene_cluster.csv"), sep="\t")["Gene"].unique())
            cluster_gene_list["ALL"]=gene_values
        else:
            gene_values = list(pd.read_csv(os.path.join(OUTPUT_ROOT_PATH, "Gene_Count", f"genes_cluster_{index}.csv"),sep="\t")["GENE"].unique())
            cluster_gene_list[index]=gene_values
        all_gene_set.update(gene_values)
    
    df = pd.DataFrame({'gene': list(all_gene_set)})
    for cluster, genes in cluster_gene_list.items():
        df[cluster] = df['gene'].apply(lambda x: '🟢' if x in genes else '🔴')

    df = df.sort_values(by=list_clusters,ascending=False)
    values=[df["gene"].values]
    columns_name=["Gene"]
    for i in list_clusters:
        columns_name.append(f"Cluster {i}")
        values.append(df[i])

    fig = go.Figure(data=[go.Table(
        header=dict(values=columns_name,fill_color='paleturquoise',align='left'),
        cells=dict(values=values,fill_color='lavender',align='left'))
    ])
    return fig

#MULTI IMAGE BOX/PIE
def func_multi_plot(list_clusters, column_name):
    fig = None
    if column_name in NUMERIC_COLUMNS_CLINICAL:
        if "ALL" in list_clusters:
            DF_ALL = DF_CLINICAL_DATA.copy()
            DF_ALL['cluster_plot'] = "ALL"
            DF = DF_CLINICAL_DATA[DF_CLINICAL_DATA["cluster"].isin(list_clusters)]
            DF_ALL.dropna(subset=[column_name],inplace=True)
            DF.dropna(subset=[column_name],inplace=True)
            DF['cluster_plot'] = DF['cluster'].apply(lambda x: f'cluster_{x}')
            fig = tap.plot_stats(pd.concat([DF,DF_ALL]), "cluster_plot", column_name)
        else:
            DF=DF_CLINICAL_DATA[DF_CLINICAL_DATA["cluster"].isin(list_clusters)]
            DF.dropna(subset=[column_name],inplace=True)
            DF['cluster_plot'] = DF['cluster'].apply(lambda x: f'cluster_{x}')
            fig = tap.plot_stats(DF, "cluster_plot", column_name)
    else:
        fig = make_subplots(1,len(list_clusters),subplot_titles=[f"cluster {e}" for e in list_clusters], specs=[[{'type':'domain'} for e in list_clusters]])
        for i,index in enumerate(list_clusters):
            _cluster_values=None
            if index=="ALL":
                _cluster_values = DF_CLINICAL_DATA[column_name]
            else:
                _cluster_values = DF_CLINICAL_DATA[DF_CLINICAL_DATA["cluster"] == index][column_name]
            _temp_dict = dict(_cluster_values.value_counts())
            fig.add_trace(go.Pie(labels=list(_temp_dict.keys()),values=list(_temp_dict.values()),scalegroup="one"),1,i+1)
    return fig

#UPDATE MULTI FIG1
@callback(
    Output(component_id='fig_multi_fig1', component_property='figure'),
    [
        Input(component_id='dropdown-cluster-multi', component_property='value'),
        Input(component_id='dropdown-multi-fig1', component_property='value')
    ]
)
def update_multi_fig1(list_clusters, column_name):
    return func_multi_plot(list_clusters, column_name)

#UPDATE MULTI FIG2
@callback(   
    Output(component_id='fig_multi_fig2', component_property='figure'),
    [
        Input(component_id='dropdown-cluster-multi', component_property='value'),
        Input(component_id='dropdown-multi-fig2', component_property='value')
    ])

def update_multi_fig2(list_clusters,column_name):
    return func_multi_plot(list_clusters, column_name)


## CLINICAL DATA PAGE

In [8]:
#SINGLE IMAGE BOX/PIE
def func_single_plot(cluster, column_name):
    cluster_values = DF_CLINICAL_DATA[DF_CLINICAL_DATA["cluster"] == cluster][column_name]

    if column_name in NUMERIC_COLUMNS_CLINICAL:
        fig=px.box(cluster_values, y=column_name)
        return fig
    else:
        _temp_dict = dict(cluster_values.value_counts())
        _temp_df = pd.DataFrame({column_name: _temp_dict.keys(), "count": _temp_dict.values()})
        fig=px.pie(_temp_df,values="count", names=column_name)
        return fig    

#UPDATE BOX_PLOT_1
@callback(
    Output(component_id='fig_box_plot_1', component_property='figure'),
    [
        Input(component_id='dropdown-cluster', component_property='value'),
        Input(component_id='dropdown-box-1', component_property='value')
    ]
)
def update_box_1(cluster, column_name):
    return func_single_plot(cluster, column_name)

#UPDATE BOX_PLOT_2
@callback(
    Output(component_id='fig_box_plot_2', component_property='figure'),
    [
        Input(component_id='dropdown-cluster', component_property='value'),
        Input(component_id='dropdown-box-2', component_property='value')
    ]
)
def update_box_2(cluster, column_name):
    return func_single_plot(cluster, column_name)

#TABLE CLINICAL_DATA:
@callback(
    Output(component_id="table_clinical_data",component_property="data"),
    Input(component_id='dropdown-cluster', component_property='value')
)
def update_table_clinical_data(cluster):
    cluster_values = DF_CLINICAL_DATA[DF_CLINICAL_DATA["cluster"] == cluster]
    return cluster_values.to_dict('records')

#TABLE GENE CLINICAL DATA
@callback(
    Output(component_id="table_clinical_data_gene",component_property="data"),
    Output(component_id="table_mutational_gene",component_property="data"),
    [Input(component_id='dropdown-cluster', component_property='value'),
     Input(component_id='dropdown-genes-multi', component_property='value')]
)
def select_gene_data(cluster,genes):
    #gene_data=pd.read_csv(os.path.join(OUTPUT_ROOT_PATH,"Gene",f"genes_cluster_{cluster}.csv"),names=["Gene"])
    data_mutational=pd.read_csv(f"./{NAME_STUDY}/data_mutational_filtered.txt",sep="\t")
    cluster_values = DF_CLINICAL_DATA[DF_CLINICAL_DATA["cluster"] == cluster]
    patient_cluster=cluster_values["SAMPLE_ID"].unique()
    patient_for_gene=[]
    for gene in genes:
        data_mutational_cluster=data_mutational[data_mutational["Tumor_Sample_Barcode"].isin(patient_cluster)]
        paz_mut=data_mutational_cluster[data_mutational_cluster["Hugo_Symbol"]==gene]["Tumor_Sample_Barcode"].unique()
        patient_for_gene.append(set(paz_mut))
    paz_mut_all= set.intersection(*patient_for_gene)

    mutational_cluster=data_mutational[(data_mutational["Tumor_Sample_Barcode"].isin(paz_mut_all)) & (data_mutational["Hugo_Symbol"].isin(genes))]
    cluster_values=cluster_values[cluster_values["SAMPLE_ID"].isin(paz_mut_all)]
    return cluster_values.to_dict('records'),mutational_cluster.to_dict('records')
    


In [9]:
GENE=["KEAP1","KRAS"]
gene_data=pd.read_csv(os.path.join(OUTPUT_ROOT_PATH,"Gene","genes_cluster_5.csv"),names=["Gene"])
genes=gene_data["Gene"].unique()
data_mutational=pd.read_csv(f"./{NAME_STUDY}/data_mutational_filtered.txt",sep="\t")
cluster_values = DF_CLINICAL_DATA[DF_CLINICAL_DATA["cluster"] == 5]
patient_cluster=cluster_values["SAMPLE_ID"].unique()
sample_genes=[]
temp=[]
for gene in GENE:
    if gene in genes:
        data_mutational_gene=data_mutational[data_mutational["Hugo_Symbol"].isin(GENE)]
        sample_mutational=data_mutational_gene["Tumor_Sample_Barcode"].isin(patient_cluster)
        for sample in sample_mutational:
            if sample in patient_cluster:
                sample_genes.append(sample)
cluster_values=cluster_values[cluster_values["SAMPLE_ID"].isin(sample_genes)]
cluster_values


Columns (47,94) have mixed types. Specify dtype option on import or set low_memory=False.



Unnamed: 0,CANCER_TYPE,CANCER_TYPE_DETAILED,ETHNICITY,GENE_PANEL,HISTOLOGY,MOLECULAR_SUBTYPE,ONCOTREE_CODE,PATIENT_ID,RACE,SAMPLE_ID,SAMPLE_TYPE,SOMATIC_STATUS,TMB_NONSYNONYMOUS,cluster


## SURVIVAL PAGE

In [10]:
#SURVIVAL_PLOT
@callback(
    Output(component_id='survival_figure', component_property='figure'),
    Input(component_id='dropdown-cluster', component_property='value')
)
def update_overall_survival(cluster):
    column_name_status="VITAL_STATUS"
    column_name_month ="OS_INT"
    data=DF_CLINICAL_DATA[DF_CLINICAL_DATA["cluster"]==cluster]
    data.dropna(subset=[column_name_month,column_name_status],inplace=True)
    data[column_name_status] = data[column_name_status].replace({'Yes': 1, 'No': 0})
    #print(data[column_name_month], data[column_name_status])
    kmf = KaplanMeierFitter()
    kmf.fit(data[column_name_month].values, event_observed=data[column_name_status].values)
    # Crea il grafico della curva di sopravvivenza con Plotly
    fig = go.Figure()
    # Aggiungi la curva di sopravvivenza
    fig.add_trace(go.Scatter(
        x=kmf.confidence_interval_.index, 
        y=kmf.confidence_interval_['KM_estimate_upper_0.95'],
        mode="lines",
        line=dict(shape='hv', width=0),
        showlegend=False,
    ))

    fig.add_trace(go.Scatter(
        x=kmf.confidence_interval_.index,
        y=kmf.confidence_interval_['KM_estimate_lower_0.95'],
        mode="lines",
        line=dict(shape='hv', width=0),
        fill='tonexty',
        fillcolor='rgb(153,204,255)',
        showlegend=False
    ))
    fig.update_layout(
        title="Survival Curve",
        xaxis_title="Duration",
        yaxis_title="Survival probability",
        #margin=dict(r=0, t=10, l=0),
        font_size=14,
        xaxis_title_font_size=18,
        yaxis_title_font_size=18
    )
    fig.add_trace(go.Scatter(
        x=kmf.survival_function_.index, y=kmf.survival_function_['KM_estimate'],
        line=dict(shape='hv', width=3, color='rgb(0,0,128)'),
        mode="lines",
        showlegend=False
    ))
    return fig

In [11]:
@callback(
     Output(component_id='survival_figure_comparison', component_property='figure'),
     Output(component_id='table_test_survival',component_property='figure' ),
     Input(component_id='dropdown-cluster-multi_survival', component_property='value')
)
def update_survival_comparison(list_clusters):
    fig = go.Figure()
    fig_stats = None
    kmf = KaplanMeierFitter()
    column_name_status="VITAL_STATUS"
    column_name_month ="OS_INT"
    if "ALL" in list_clusters:
        DF_ALL = DF_CLINICAL_DATA.copy()
        DF_ALL['cluster_plot'] = "ALL"
        DF = DF_CLINICAL_DATA[DF_CLINICAL_DATA["cluster"].isin(list_clusters)]
        DF['cluster_plot'] = DF['cluster']
        DF_ALL.dropna(subset=[column_name_month,column_name_status],inplace=True)
        DF.dropna(subset=[column_name_month,column_name_status],inplace=True)
        DF[column_name_status] = DF[column_name_status].replace({'Yes': 1, 'No': 0})
        DF_ALL[column_name_status] = DF_ALL[column_name_status].replace({'Yes': 1, 'No': 0})
        DF_FINAL=pd.concat([DF,DF_ALL])
        DF_FINAL=DF_FINAL[DF_FINAL["cluster_plot"].isin(list_clusters)]

        for cluster in list(DF_FINAL["cluster_plot"].unique()):
            cluster_data = DF_FINAL[DF_FINAL['cluster_plot'] == cluster]
            #cluster_data.dropna(subset=[column_name_month,column_name_status],inplace=True)
            if len(cluster_data)==0:
                print ("ciao")
                list_clusters.remove(cluster)
            else:
                # Fit e plottaggio della curva di sopravvivenza per il cluster corrente
                kmf.fit(cluster_data[column_name_month], event_observed=cluster_data[column_name_status], label=f"Cluster {cluster}")
                #kmf.plot_survival_function(ci_show=True)
                fig.update_layout(
                title="Survival Curve",
                xaxis_title="Duration",
                yaxis_title="Survival probability",
                font_size=14,
                xaxis_title_font_size=18,
                yaxis_title_font_size=18
                )
                fig.add_trace(go.Scatter(
                x=kmf.survival_function_.index, y=kmf.survival_function_[f"Cluster {cluster}"],
                line=dict(shape='hv', width=3),
                mode="lines",
                name=f"Cluster {cluster}",
                showlegend=True
                ))
                            
        DF_FINAL['cluster_plot'] = DF_FINAL['cluster_plot'].astype(str)
        # Test log-rank per confrontare le curve tra tutti i cluster
        results = pairwise_logrank_test(DF_FINAL[column_name_month], DF_FINAL['cluster_plot'], DF_FINAL[column_name_status])
        data_results=results.summary
        data_results= data_results.rename(columns={'-log2(p)': 'log2_p'})
        data_results = data_results.reset_index()
        data_results['comparison'] = data_results['level_0'].astype(str) + " " + data_results['level_1'].astype(str)
        # Rimuovi le colonne level_0 e level_1, se non necessarie
        data_results = data_results.drop(columns=['level_0', 'level_1'])

        fig_stats = go.Figure(data=[go.Table(
            header=dict(values=list(data_results.columns),fill_color='paleturquoise',align='left'),
            cells=dict(values=[round(data_results.test_statistic,4), round(data_results.p,4),round(data_results.log2_p,4),data_results.comparison],fill_color='lavender',align='left'))
        ])
    else:
        data=DF_CLINICAL_DATA[DF_CLINICAL_DATA["cluster"].isin(list_clusters)]
        data.dropna(subset=[column_name_month,column_name_status],inplace=True)
        data[column_name_status] = data[column_name_status].replace({'Yes': 1, 'No': 0})
        for cluster in list(data["cluster"].unique()):
            cluster_data = data[data['cluster'] == cluster]
            cluster_data.dropna(subset=[column_name_month,column_name_status],inplace=True)
            if len(cluster_data)==0:
                print ("ciao")
                list_clusters.remove(cluster)
            else:
                cluster_data[column_name_status] = cluster_data[column_name_status].replace({'Yes': 1, 'No': 0})
                # Fit e plottaggio della curva di sopravvivenza per il cluster corrente
                kmf.fit(cluster_data[column_name_month], event_observed=cluster_data[column_name_status], label=f"Cluster {cluster}")
                #kmf.plot_survival_function(ci_show=True)
                # Aggiungi la curva di sopravvivenza
                fig.update_layout(
                title="Survival Curve",
                xaxis_title="Duration",
                yaxis_title="Survival probability",
                font_size=14,
                xaxis_title_font_size=18,
                yaxis_title_font_size=18
            )
            fig.add_trace(go.Scatter(
                x=kmf.survival_function_.index, y=kmf.survival_function_[f"Cluster {cluster}"],
                line=dict(shape='hv', width=3),
                mode="lines",
                name=f"Cluster {cluster}",
                showlegend=True
            ))

        # Test log-rank per confrontare le curve tra tutti i cluster
        results = pairwise_logrank_test(data[column_name_month], data['cluster'], data[column_name_status])
        # Mostra i risultati del test log-rank per tutti i confronti tra cluster
        data_results=results.summary
        data_results= data_results.rename(columns={'-log2(p)': 'log2_p'})
        data_results = data_results.reset_index()
        data_results['comparison'] = data_results['level_0'].astype(str) + " " + data_results['level_1'].astype(str)
        # Rimuovi le colonne level_0 e level_1, se non necessarie
        data_results = data_results.drop(columns=['level_0', 'level_1'])

        fig_stats = go.Figure(data=[go.Table(
            header=dict(values=list(data_results.columns),fill_color='paleturquoise',align='left'),
            cells=dict(values=[round(data_results.test_statistic,4), round(data_results.p,4),round(data_results.log2_p,4),data_results.comparison],fill_color='lavender',align='left'))
        ])

    return fig, fig_stats
            
                

## START

In [12]:
#START
if __name__ == '__main__':
    print("Deploy on: http://127.0.0.1:8050/")
    #GLOBAL
    app.run(debug=False, host='0.0.0.0')
    #LOCAL
    #app.run(debug=False)

Deploy on: http://127.0.0.1:8050/




A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

[2024-10-04 09:43:33,858] ERROR in app: Exception on /_dash-update-component [POST]
Traceback (most recent call last):
  File "/home/fede/anaconda3/envs/Tool_Network/lib/python3.11/site-packages/flask/app.py", line 1473, in wsgi_app
    response = self.full_dispatch_request()
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/fede/anaconda3/envs/Tool_Network/lib/python3.11/site-packages/flask/app.py", line 882, in full_dispatch_request
    rv = self.handle_user_exception(e)
         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/fede/anaconda3/envs/Tool_Network/lib/python3.11/site-packages/flask/app.py", line 880, in full_dispatch_request
    rv = self.dispatch_request()
         ^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/fede/anaconda3/envs/Tool_Network/lib/python3.11/site-package