In [19]:
# Import
from dash import Dash, html, dash_table, dcc, callback, Output, Input, State
import pandas as pd
import plotly.express as px
import os
import dash_cytoscape as cyto
import json
import igraph as ig
import dash_bootstrap_components as dbc
import venn
from io import BytesIO
import base64
import matplotlib.pyplot as plt
import numpy as np
import plotly.graph_objects as go

cyto.load_extra_layouts()

APP_NAME = "Network Tool"
APP_LOGO = "/assets/logo.png"
GRAPH_ELEMENTS = None

#TODO Get from config.josn
OUTPUT_ROOT_PATH = "./endometrium/output"

In [20]:
def filter_graph(cluster):
    list_vertices = GRAPH.vs.select(lambda x:x["cluster"]==cluster)
    graph_filtered = GRAPH.induced_subgraph(list_vertices)
    graph_elements = []
    #convert and add vertex
    for e in graph_filtered.vs():
        _map = e.attributes()
        _map["id"] = e.index
        graph_elements.append({"data": _map, "classes": e["vertex_type"], "id": e.index, "grabbable": False, })
    #convert and add edges
    for e in graph_filtered.es():
        graph_elements.append({"data": {"source": e.source, "target": e.target}})

    return graph_elements

In [21]:
#create graph from graphml
GRAPH = ig.Graph.Read_GraphML(os.path.join(OUTPUT_ROOT_PATH, "grafo_cytoscape.graphml"))
CLUSTERS_INDEX = [int(c) for c in set(GRAPH.vs["cluster"])]
#Find all numerical column
DF_CLINICAL_DATA = pd.read_csv(os.path.join(OUTPUT_ROOT_PATH, "cluster_clinical_data.csv"),sep="\t")
NUMERIC_COLUMNS_CLINICAL=list(DF_CLINICAL_DATA.select_dtypes(include=np.number).columns)
NUMERIC_COLUMNS_CLINICAL.remove("cluster")
STRING_COLUMNS_CLINICAL =list


In [22]:
app = Dash(title=APP_NAME, external_stylesheets=[dbc.themes.BOOTSTRAP, dbc.icons.FONT_AWESOME])

#ICON -> https://fontawesome.com/search

#SIDEBAR
sidebar = html.Div([
    html.Div(
        [
            html.Img(src=APP_LOGO, className="navbar_logo"),
            html.Span(APP_NAME, className="navbar_title"),
        ],
        className="sidebar-header",
    ),
    html.Hr(),
    dbc.Nav([
            #HOME
            dbc.NavLink(
                [
                    html.I(className="fas fa-home"),
                    html.Span("Home", className="navbar_span")
                ],
                href="/",
                active="exact",
                className="navbar_entity"
            ),
            #PATWAY
            dbc.NavLink(
                [
                    html.I(className="fas fa-diagram-project"),
                    html.Span("Pathways Analysis", className="navbar_span"),
                ],
                href="/pathway_analysis",
                active="exact",
                className="navbar_entity"
            ),
            #CLUSTER COMP
            dbc.NavLink(
                [
                    html.I(className="fas fa-code-compare"),
                    html.Span("Cluster Comparision",className="navbar_span"),
                ],
                href="/cluster_comparision",
                active="exact",
                className="navbar_entity"
            ),
            #CLINICAL
            dbc.NavLink(
                [
                    html.I(className="fas fa-table"),
                    html.Span("Clinical Data",className="navbar_span"),
                ],
                href="/clinical_data",
                active="exact",
                className="navbar_entity"
            ),
        ], vertical=True, pills=True
    )],className="sidebar"
)

content = html.Div(id="page-content", className="content")
app.layout = html.Div([dcc.Location(id="url"), sidebar, content])

#******************************************************
PAGE_HOME = [
    #DIALOG INFO NODE
    dbc.Modal(
        [],
        id="modal-lg",
        size="lg",
        is_open=False,
    ),
    #ROW LINE PARAMS
    dbc.Row([
        #CLUSTER SELECTOR
        dbc.Col([
            html.Span("Cluster selected", className="span_selector"),
            dcc.Dropdown(
                CLUSTERS_INDEX,
                0,
                id='dropdown-cluster',
                persistence=True,
                persistence_type = 'memory'
            ),html.Br()
        ], lg=4),html.Hr()
    ]),
    #1 ROW
    dbc.Row([
        dbc.Col([
           html.Span("Numer of patients"),
           html.Hr(),
           html.Span(0, id="span_n_patient")
        ], width=2, className="info_block"),
        dbc.Col([
           html.Span("Numer of variants"),
           html.Hr(),
           html.Span(0, id="span_n_variants")
        ], width=2, className="info_block"),
        dbc.Col([
           html.Span("Numer of genes"),
           html.Hr(),
           html.Span(0, id="span_n_genes")
        ], width=2, className="info_block"),
        dbc.Col([
           html.Span("Variant centroid"),
           html.Hr(),
           html.Span("None", id="span_variant_centroids")
        ], width=2, className="info_block")
    ], justify="evenly"),

    #2 ROW
    dbc.Row([
        #CYTOSCAPE BLOCK
        dbc.Col([
            #CYTOSCAPE GRAPH
            cyto.Cytoscape(
                id='cytoscape-graph',
                layout={'name': 'cola'},
                style={'width': '100%', 'height': '25rem'},
                stylesheet=[
                    #NOME SOPRA
                    {
                        'selector': 'node',
                        'style': {
                            'content': 'data(name)',
                            "font-size": "5px"
                        }
                    },
                    #PAZIENTI TRIANGOLI ROSSI
                    {
                        'selector': '.PATIENT',
                        'style': {
                            'background-color': 'coral',
                            'shape': 'triangle'
                        }
                    },
                    #VARIANTI CERCHI BLUE
                    {
                        'selector': '.VARIANT',
                        'style': {
                            'background-color': 'royalblue',
                            'shape': 'circle'
                        }
                    },
                    #SELECTEDƒco
                    {
                        'selector': ':selected',
                        'style': {
                            'background-color': '#02cd79',
                        },
                    }
                ],
                elements=filter_graph(0),
                minZoom=0.25,
                maxZoom=2,
                responsive=True
            ),                  
            #LAYOUT SELECTOR
            dcc.RadioItems(
                options=["cola", "concentric", "breadthfirst", "klay"],
                value="cola",
                inline=True,
                id='radio-layouts'
            )
        ], lg=6),
        #FIGURE PIE
        dbc.Col([
            dcc.Graph(id="fig_pie")
        ], lg=6)
    ], justify="evenly"),
    #3 ROW
    dbc.Row([
        #FIGURE DEGREE
        dbc.Col([
            dcc.Graph(id="fig_degree")
        ], lg=11),
    ], justify="evenly")
]

#******************************************************
PAGE_PATHWAY_ANALYSIS = [
    #ROW LINE PARAMS
    dbc.Row([
        #CLUSTER SELECTOR
        dbc.Col([
            html.Span("Cluster selected", className="span_selector"),
            dcc.Dropdown(
                CLUSTERS_INDEX,
                0,
                id='dropdown-cluster',
                persistence=True,
                persistence_type = 'memory'
            ),html.Br()
        ], lg=4),
        #PVALUE SELECTOR
        dbc.Col([
            html.Span("PValue threshold", className="span_selector"),
            dcc.Dropdown(
                [0.01, 0.05],
                0.05,
                id='dropdown-pvalue',
                persistence=True,
                persistence_type = 'memory'
            ),html.Br()
        ], lg=4),
        #ADJUSTED PVALUE SELECTOR
        dbc.Col([
            html.Span("Use adjusted PValue", className="span_selector"),
            dcc.Dropdown(
                ["True", "False"],
                "False",
                id='dropdown-adjusted-pvalue',
                persistence=True,
                persistence_type = 'memory'
            ),html.Br()
        ], lg=4),html.Hr()
    ]),
    #FIRST ROW
    dbc.Row([
        #FIGURE GO
        dbc.Col([
            dcc.Graph(id="fig_go"),
            dcc.RadioItems(
            options=[
                {'label': 'Biological Function', 'value': 'biological'},
                {'label': 'Molecular Function', 'value': 'molecular'},
                {'label': 'Cellular Component', 'value': 'cellular'}
            ],
            # Valore predefinito
            value='biological' ,
            labelStyle={'display': 'inline-block'},
            id="radio_fig_go"
        )], lg=6),
        #FIGURE KEGG
        dbc.Col([
            dcc.Graph(id="fig_kegg")
        ], lg=6),
        #FIGURE WIKI
        dbc.Col([
            dcc.Graph(id="fig5")
        ], lg=6)
    ])
]

#******************************************************
PAGE_CLUSTER_COMPARISION = [
    #ROW LINE PARAMS
    dbc.Row([
        #CLUSTER SELECTOR MULTI
        dbc.Col([
            html.Span("Cluster selected", className="span_selector"),
            dcc.Dropdown(
                CLUSTERS_INDEX,
                [],
                id='dropdown-cluster-multi',
                multi=True,
                persistence=True,
                persistence_type = 'memory'
            ),html.Br()
        ], lg=4),html.Hr()
    ]),
    html.Img(id='plot-venn')
]

#*******************************************************
PAGE_CLINICAL_DATA=[
    #DIALOG INFO NODE
    dbc.Modal(
        [],
        id="modal-lg",
        size="lg",
        is_open=False,
    ),
    #ROW LINE PARAMS
    dbc.Row([
        #CLUSTER SELECTOR
        dbc.Col([
            html.Span("Cluster selected", className="span_selector"),
            dcc.Dropdown(
                CLUSTERS_INDEX,
                0,
                id='dropdown-cluster',
                persistence=True,
                persistence_type = 'memory'
            ),html.Br()
        ], lg=4),html.Hr()
    ]),
    dbc.Row([
        #FIGURE BOX_PLOT_1
        dbc.Col([
            dcc.Dropdown(
                NUMERIC_COLUMNS_CLINICAL,
                NUMERIC_COLUMNS_CLINICAL[0],
                id='dropdown-box-1',
                persistence=True,
                persistence_type = 'memory'
            ),
            dcc.Graph(id="fig_box_plot_1")
        ], lg=6),
        #FIGURE BOX_PLOT_2
        dbc.Col([
            dcc.Dropdown(
                NUMERIC_COLUMNS_CLINICAL,
                NUMERIC_COLUMNS_CLINICAL[-1],
                id='dropdown-box-2',
                persistence=True,
                persistence_type = 'memory'
            ),
            dcc.Graph(id="fig_box_plot_2")
        ], lg=6)
    ]),
    #ROW_CLINICAL
    dbc.Row([
        dbc.Col([
          dcc.Graph(id="pie_molecular")  
        ],lg=6)
    ])
]

In [23]:
#PAGING
@app.callback(
    Output("page-content", "children"),
    Input("url", "pathname")
)
def render_page_content(pathname):
    if pathname == "/":
        return PAGE_HOME
    elif pathname == "/pathway_analysis":
        return PAGE_PATHWAY_ANALYSIS
    elif pathname == "/cluster_comparision":
        return PAGE_CLUSTER_COMPARISION
    elif pathname == "/clinical_data":
        return PAGE_CLINICAL_DATA
    # If the user tries to reach a different page, return a 404 message
    return html.Div(
        [
            html.H1("404: Not found", className="text-danger"),
            html.Hr(),
            html.P(f"The pathname {pathname} was not recognised..."),
        ],
        className="p-3 bg-light rounded-3",
    )

#SELECT SINGLE NODE GRAPH
@callback(
    Output('modal-lg', 'children'),
    Output('modal-lg', 'is_open'),
    Input('cytoscape-graph', 'tapNodeData'),
    prevent_initial_call=True,
)
def displaySelectedNodeData(data_dict):
    temp=""
    if data_dict["vertex_type"] == "VARIANT":
        term_included=["name","gene","sost_amm","consequence","gene","sost_amm","variant_type"]
        temp=""
        for k, v in data_dict.items():
            if k in term_included:
                temp+=f"**{k}**:{v}\n"
    else:
        term_excluded=["vertex_type","variants","color_vertex","shape_vertex","gene","sost_amm","variant_type","consequence","color","cluster","id","timeStamp"]
        temp=""
        for k, v in data_dict.items():
            if k not in term_excluded:
                temp+=f"**{k}**:{v}\n"

    return [
        dbc.ModalHeader(dbc.ModalTitle(data_dict['name'])),
        dcc.Markdown(temp, className="markdown"),
    ], True

#UPDATE CLUSTER LAYOUT
@callback(
    Output(component_id='cytoscape-graph', component_property='layout'),
    Input(component_id='radio-layouts', component_property='value')
)
def update_graph(layout):
    return {'name': layout}

#SELECT CLUSTER INDEX
@callback(
    Output(component_id='cytoscape-graph', component_property='elements'),
    Output(component_id='fig_pie', component_property='figure'),
    Output(component_id='fig_degree', component_property='figure'),
    Output(component_id='span_n_patient', component_property='children'),
    Output(component_id='span_n_variants', component_property='children'),
    Output(component_id='span_n_genes', component_property='children'),
    Output(component_id='span_variant_centroids', component_property='children'),
    Input(component_id='dropdown-cluster', component_property='value')
)
def update_cluster(cluster):
    #CLUSTER ELEMENTS
    cluster_elements = filter_graph(cluster)
    #FIGURE PIE
    df_gene = pd.read_csv(os.path.join(OUTPUT_ROOT_PATH, "Gene_Count", f"genes_cluster_{cluster}.csv"),sep="\t")
    fig_pie = px.pie(df_gene, values='COUNT', names='GENE', title='Number Mutation for Gene')
    fig_pie.update_traces(textposition="inside",textinfo='label')
    #FIGURE DEGREE
    df_variant = pd.read_csv(os.path.join(OUTPUT_ROOT_PATH, "Variants_Degree",f"variants_degree_cluster{cluster}.csv"),sep="\t")
    df_variant=df_variant.sort_values(by=['Degree'],ascending=False)[:15]
    fig_degree = px.bar(df_variant,x="Variants",y="Degree",title="Mutation Degree")
    #PATIENTS NUMBER
    n_patients = len([1 for e in cluster_elements if e["data"].get("vertex_type", "") == "PATIENT"])
    #VARIANT NUMBERS
    n_variants = len(df_variant)
    #GENE NUMBERS
    n_genes = len(df_gene)
    #VARIANT CENTROID
    if df_variant.iloc[0]["Degree"] == df_variant.iloc[1]["Degree"]:
        variant_centroids = "More than one"
    else:
        variant_centroids = df_variant.iloc[0]["Variants"]
    #RETURN
    return cluster_elements, fig_pie, fig_degree, n_patients, n_variants, n_genes, variant_centroids

#UPDATE GO FIGURE
@callback(
    Output(component_id='fig_go', component_property='figure'),
    [
        Input(component_id='dropdown-cluster', component_property='value'),
        Input(component_id='dropdown-pvalue', component_property='value'),
        Input(component_id='dropdown-adjusted-pvalue', component_property='value'),
        Input(component_id='radio_fig_go', component_property='value')
    ]
)
def update_figure2(cluster,pvalue,adjusted_pvalue,process_type):
    df = pd.read_csv(os.path.join(OUTPUT_ROOT_PATH, "Arricchimento_all_genes", "GO", f"{process_type}_{cluster}.csv"))
    if adjusted_pvalue == "True":
        df=df[df["Adjusted.P.value"] < pvalue]
        df = df.sort_values(by=['Adjusted.P.value'])[:25]
        fig=px.bar(df, x='Adjusted.P.value', y='Term',
            hover_data=['Overlap'], color='Adjusted.P.value', title='GO',labels={'Adjusted.P.value': 'Adjusted Pvalue'})
        fig.update_layout(xaxis_title="Adjusted Pvalue",  # Nome dell'asse delle x
        yaxis_title="Terms",legend_title="Adjusted Pvalue")
        return fig
    else:
        df=df[df["P.value"] < pvalue]
        df = df.sort_values(by=['P.value'])[:25]
        fig=px.bar(df, x='P.value', y='Term',
            hover_data=['Overlap'], color='P.value', title='GO',labels={'P.value': 'Pvalue'})
        fig.update_layout(xaxis_title="Pvalue",  # Nome dell'asse delle x
        yaxis_title="Terms",legend_title="Pvalue")
        return fig

#UPDATE KEGG FIGURE
@callback(
    Output(component_id='fig_kegg', component_property='figure'),
    [
        Input(component_id='dropdown-cluster', component_property='value'),
        Input(component_id='dropdown-pvalue', component_property='value'),
        Input(component_id='dropdown-adjusted-pvalue', component_property='value')
    ]
)
def update_figure3(cluster,pvalue,adjusted_pvalue):
    df = pd.read_csv(os.path.join(OUTPUT_ROOT_PATH, "Arricchimento_all_genes", "KEGG", f"kegg_{cluster}.csv"))
    if adjusted_pvalue == "True":
        df=df[df["KEGG_2021_Human.Adjusted.P.value"] < pvalue]
        df = df.sort_values(by=['KEGG_2021_Human.Adjusted.P.value'])[:25]
        fig= px.bar(df, x='KEGG_2021_Human.Adjusted.P.value', y='KEGG_2021_Human.Term',
            hover_data=['KEGG_2021_Human.Overlap'], color='KEGG_2021_Human.Adjusted.P.value',title='KEGG',color_continuous_scale=px.colors.sequential.Viridis,labels={'KEGG_2021_Human.P.value': 'Adjusted Pvalue'})
        fig.update_layout(xaxis_title="Adjusted Pvalue",  # Nome dell'asse delle x
        yaxis_title="Terms",legend_title="Adjusted Pvalue")
        return fig
    else:
        df=df[df["KEGG_2021_Human.P.value"] < pvalue]
        df = df.sort_values(by=['KEGG_2021_Human.P.value'])[:25]
        fig= px.bar(df, x='KEGG_2021_Human.P.value', y='KEGG_2021_Human.Term',
            hover_data=['KEGG_2021_Human.Overlap'], color='KEGG_2021_Human.P.value',title='KEGG',color_continuous_scale=px.colors.sequential.Viridis,
            labels={'KEGG_2021_Human.P.value': 'Pvalue'})
        fig.update_layout(xaxis_title="Pvalue",  # Nome dell'asse delle x
        yaxis_title="Terms", legend_title="Pvalue")
        return fig


#UPDATE WIKI FIGURE
@callback(
    Output(component_id='fig5', component_property='figure'),
    [
        Input(component_id='dropdown-cluster', component_property='value'),
        Input(component_id='dropdown-pvalue', component_property='value'),
        Input(component_id='dropdown-adjusted-pvalue', component_property='value')
    ]
)
def update_figure5(cluster,pvalue,adjusted_pvalue):
    df = pd.read_csv(os.path.join(OUTPUT_ROOT_PATH, "Arricchimento_all_genes", "WIKI", f"wiki_{cluster}.csv"))
    if adjusted_pvalue == "True":
        df=df[df["WikiPathway_2023_Human.Adjusted.P.value"] < pvalue]
        df = df.sort_values(by=['WikiPathway_2023_Human.Adjusted.P.value'])[:25]
        fig= px.bar(df, x='WikiPathway_2023_Human.Adjusted.P.value', y='WikiPathway_2023_Human.Term',
            hover_data=['WikiPathway_2023_Human.Overlap'], color='WikiPathway_2023_Human.Adjusted.P.value',title='WikiPathway',color_continuous_scale=px.colors.sequential.Viridis,labels={'WikiPathway_2023_Human.Adjusted.P.value': 'Adjusted Pvalue','WikiPathway_2023_Human.Overlap':'Overlap_Genes'})
        fig.update_layout(xaxis_title="Adjusted Pvalue",  # Nome dell'asse delle x
        yaxis_title="Terms",legend_title="Adjusted Pvalue")
        return fig
    else:
        df=df[df["WikiPathway_2023_Human.P.value"] < pvalue]
        df = df.sort_values(by=['WikiPathway_2023_Human.P.value'])[:25]
        fig= px.bar(df, x='WikiPathway_2023_Human.P.value', y='WikiPathway_2023_Human.Term',
            hover_data=['WikiPathway_2023_Human.Overlap'], color='WikiPathway_2023_Human.P.value',title='WikiPathway',color_continuous_scale=px.colors.sequential.Viridis,
            labels={'WikiPathway_2023_Human.P.value': 'Pvalue','WikiPathway_2023_Human.Overlap':'Overlap_Genes'})
        fig.update_layout(xaxis_title="Pvalue",  # Nome dell'asse delle x
        yaxis_title="Terms", legend_title="Pvalue")
        return fig


#UPDATE COMPARISION
@callback(
    Output(component_id='plot-venn', component_property='src'),
    Input(component_id='dropdown-cluster-multi', component_property='value'),
    prevent_initial_call=True,
)
def update_venn(list_clusters):
    gene_lists=[]
    for index in list_clusters:
        df_gene = pd.read_csv(os.path.join(OUTPUT_ROOT_PATH, "Gene_Count", f"genes_cluster_{index}.csv"),sep="\t")
        gene_lists.append(df_gene["GENE"].values)
    labels = venn.get_labels(gene_lists, fill=['number'])

    fig, ax= None, None
    match len(list_clusters):
        case 2:
            fig, ax = venn.venn2(labels, names=list_clusters)
        case 3:
            fig, ax = venn.venn3(labels, names=list_clusters)
        case 4:
            fig, ax = venn.venn4(labels, names=list_clusters)
        case 5:
            fig, ax = venn.venn5(labels, names=list_clusters)
        case 6:
            fig, ax = venn.venn6(labels, names=list_clusters)
        case _:
            return None

    #SAVE TO BUFFER
    buf = BytesIO()
    fig.savefig(buf, format="png")
    fig_data = base64.b64encode(buf.getbuffer()).decode("ascii")
    fig_bar_matplotlib = f'data:image/png;base64,{fig_data}'
    plt.figure().clear()
    plt.close()
    plt.cla()
    plt.clf()
    return fig_bar_matplotlib

#UPDATE BOX_PLOT_1
@callback(
    Output(component_id='fig_box_plot_1', component_property='figure'),
    [
        Input(component_id='dropdown-cluster', component_property='value'),
        Input(component_id='dropdown-box-1', component_property='value')
    ]
)
def update_box_1(cluster, column_name):
    cluster_values = DF_CLINICAL_DATA[DF_CLINICAL_DATA["cluster"] == cluster][column_name].values
    all_values = DF_CLINICAL_DATA[column_name].values

    fig = go.Figure()
    fig.add_trace(go.Box(y=cluster_values, name='Cluster Values',
                    marker_color = 'indianred'))
    fig.add_trace(go.Box(y=all_values, name = 'Other Values',
                    marker_color = 'lightseagreen'))
    return fig

#UPDATE BOX_PLOT_2
@callback(
    Output(component_id='fig_box_plot_2', component_property='figure'),
    [
        Input(component_id='dropdown-cluster', component_property='value'),
        Input(component_id='dropdown-box-2', component_property='value')
    ]
)
def update_box_2(cluster, column_name):
    cluster_values = DF_CLINICAL_DATA[DF_CLINICAL_DATA["cluster"] == cluster][column_name].values
    all_values = DF_CLINICAL_DATA[column_name].values

    fig = go.Figure()
    fig.add_trace(go.Box(y=cluster_values, name='Cluster Values',
                    marker_color = 'indianred'))
    fig.add_trace(go.Box(y=all_values, name = 'Other Values',
                    marker_color = 'lightseagreen'))
    return fig


#UPDATE PIE MOLECULAR
@callback(
    Output(component_id='pie_molecular', component_property='figure'),
    Input(component_id='dropdown-cluster', component_property='value')
)
def update_pie_molecular(cluster):
    df=pd.read_csv(os.path.join(OUTPUT_ROOT_PATH,"prova_classe.csv"),sep="\t")
    df=df[df["cluster"]==cluster]
    fig=px.pie(df,values="Percentage",names="MOLECULAR_SUBTYPE")
    return fig

In [24]:

#START
if __name__ == '__main__':
    print("Deploy on: http://127.0.0.1:8050/")
    #GLOBAL
    #app.run(debug=False, host='0.0.0.0')
    #LOCAL
    app.run(debug=False)

Deploy on: http://127.0.0.1:8050/
