# 2nd CMI-PB Prediction Challenge
## Team Advisor: Barry Grant, Jason Hsiao
## Team member: Peng Cheng, Javier Garcia, Brian Qian, Weikang Guan
## Part 4: Dashboard

In [1]:
# Import necessary Python libraries.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Libraries for Dashboard Visualization
import dash
from dash import html, dcc
from dash import dash_table
import dash_bootstrap_components as dbc
from dash_bootstrap_templates import load_figure_template
import plotly.graph_objects as go
import plotly.express as px

# Libraries for API requests
from io import BytesIO
import sys
import os

pd.DataFrame.iteritems = pd.DataFrame.items

## Loading in Datasets

In [2]:
# Connects to the CMI-PB Laboratory database to fetch the latest training dataset
directory_path = os.path.abspath('../')
if directory_path not in sys.path:
    sys.path.append(directory_path)

api_requests = __import__('00_data_pipeline')

In [3]:
#Fetching the subjects and specimen datasets 
df_subjects = pd.read_csv(BytesIO(api_requests.fetch_data('subject?or=(dataset.eq.2020_dataset,dataset.eq.2021_dataset)').encode()))
df_specimen = pd.read_csv(BytesIO(api_requests.fetch_data('specimen').encode())) # includes 2022 data

#Creating a merged dataset of all the training data into one dataframe
#This allows us to display preliminary exploratory data analysis before feature selection
eda = pd.merge(df_subjects, df_specimen, on= 'subject_id')
eda['Age'] = pd.to_numeric(eda['date_of_boost'].str[:4]) - pd.to_numeric(eda['year_of_birth'].str[:4])

In [4]:
#Loading in the training dataset that were used when training our models for each task
task_1 = pd.read_csv('training_result/df_train1.csv')
task_2 = pd.read_csv('training_result/df_train2.csv')
task_3 = pd.read_csv('training_result/df_train3.csv')

#Loading in the selected features for each task
task11fs = pd.read_csv('training_result/task11_feature_selected.csv').reset_index()
task12fs = pd.read_csv('training_result/task12_feature_selected.csv').reset_index()
task21fs = pd.read_csv('training_result/task21_feature_selected.csv').reset_index()
task22fs = pd.read_csv('training_result/task22_feature_selected.csv').reset_index()
task31fs = pd.read_csv('training_result/task31_feature_selected.csv').reset_index()
task32fs = pd.read_csv('training_result/task32_feature_selected.csv').reset_index()

#Loading in the various models and their r2, mse, and mae scores per task 
task11ev = pd.read_csv('training_result/task11_model_evaluation.csv')
task12ev = pd.read_csv('training_result/task12_model_evaluation.csv')
task21ev = pd.read_csv('training_result/task21_model_evaluation.csv')
task22ev = pd.read_csv('training_result/task22_model_evaluation.csv')
task31ev = pd.read_csv('training_result/task31_model_evaluation.csv')
task32ev = pd.read_csv('training_result/task32_model_evaluation.csv')

#Our final prediction results that were achieved through our models
df = pd.read_csv('prediction_result/Prediction_value.csv').round(3)

## Dashboard Creation

In [None]:
load_figure_template([ 'minty'])

app =  dash.Dash(
    __name__, 
    external_stylesheets=[
        dbc.themes.MINTY, 
        dbc.icons.BOOTSTRAP
    ],
    meta_tags=[
        {"name": "viewport", "content": "width=device-width, initial-scale=1"},
        {'name': 'description', 'content': 'A dashboard displaying immune responses to pertussis vaccinations.'},
    ],
)

app.title = "CMI-PB Responses Pertussis Vaccination"


# Dropdown menu creation
cols_dd = ['1.1 Rank individuals by IgG titers against PT in plasma 14 days post booster',
           '1.2 Rank individuals by fold change in IgG titers against PT from day 0 to day 14 post booster',
           '2.1 Rank individuals by predicted monocyte frequency on day 1 post booster',
           '2.2 Rank individuals by fold change in predicted monocyte frequency from day 0 to day 1 post booster',
           '3.1 Rank individuals by predicted CCL3 gene expression on day 3 post booster',
           '3.2 Rank individuals by fold change in predicted CCL3 gene expression from day 0 to day 3 post booster']

# Creates the dropdown menu for selecting the task to be viewed
drops = html.Div([html.Label('Select Task:'),
                  dcc.Dropdown(
                      id='demo-dropdown',
                      options=[{'label': k, 'value': k} for k in cols_dd],
                      value=cols_dd[0],
                      optionHeight=60
                  )])

# Creates the dropdown menu for selecting the evaluation metric
drops2 = html.Div([html.Label('Select Evaluation Metric:'),
                  dcc.Dropdown(
                      id='metric-dropdown',
                      options=['MSE', 'MAE', 'R-squared'],
                      value='MSE',
                  )])

# Creates the layout of the Dash application
app.layout = html.Div([
    dbc.NavbarSimple(
        children=[
            dbc.NavItem(dbc.NavLink("Data", href="https://www.cmi-pb.org/data")), 
            dbc.NavItem(
                dbc.NavLink(
                    html.Span(
                        [html.I(className="bi bi-github me-1")," GitHub"]),
                        href="https://github.com/brianrqian/DSE260A-Immune-Response",
                )
            ),
        ],
        brand="CMI-PB Responses Pertussis Vaccination",
        brand_href="https://www.cmi-pb.org/", 
        color="dark",  
        dark=True, 
        className='mb-4 sticky-top',
        fluid=True
    ),
    dbc.Container(
        dbc.Stack([
            dbc.Row([
                dbc.Col(dcc.Graph(id='age', style={'height': '500px', 'width': '100%'}), width=3), 
                dbc.Col(dcc.Graph(id='race', style={'height': '500px', 'width': '100%'}), width=3),
                dbc.Col(dcc.Graph(id='sex', style={'height': '500px', 'width': '100%'}), width=3),
                dbc.Col(dcc.Graph(id='bubble-chart', style={'height': '500px', 'width': '100%'}), width=3)
            ]),
            dbc.Row([dbc.Col(drops), dbc.Col(drops2)]),
            dbc.Row([
                dbc.Col(dcc.Graph(id='imm', style={'height': '500px', 'width': '100%'}), width=6),
                dbc.Col(dcc.Graph(id='r2', style={'height': '500px', 'width': '100%'}), width=6)
            ]),
            dbc.Row([
                dbc.Col(dcc.Graph(id='parallel-coords-plot', style={'height': '500px', 'width': '100%'}), width=12)
            ]),
            dbc.Row(
                dbc.Col(
                    html.Div([
                        dbc.Button("Download CSV", color="dark", id="btn_csv"),
                        dcc.Download(id="download-dataframe-csv")
                    ])
                )
            ),
            dbc.Row([
                dbc.Col([
                    dash_table.DataTable(
                    id='result-table',
                    columns=[{"name": i, "id": i} for i in df.columns],
                    data=df.to_dict('records'),
                    page_size=len(df),  
                    style_cell={  
                        'fontSize': '12px',
                        'font-family': 'system-ui',
                        'maxWidth': '150px',
                        'whiteSpace': 'normal',
                        'height': 'auto',
                        'textAlign': 'left'
                    },
                    style_header={
                        'backgroundColor': '#343a40',
                        'color': 'white',
                        'fontWeight': 'bold'
                    },
                    style_table={ 
                        'width': '100%',  
                        'minWidth': '100%'
                    },
                    style_data={
                        'color': 'black'
                    },
                    )
                ])
            ])
        ], 
        gap=4
                 
        ),
        fluid=True,
    )
])


# Function to update and visualize the distribution of Age across the training dataset
@app.callback(
    dash.dependencies.Output('age', 'figure'),
    [dash.dependencies.Input('demo-dropdown', 'value')])
def generate_age_distribution(df):
    df = eda
    fig = px.histogram(df, x='Age', nbins=20, title="Distribution of Age").update_layout(template="minty", bargap=0.2) 
    fig.update_layout(title_x=0.5)
    return fig

# Function to update and visualize the distribution of Sex across the training dataset
@app.callback(
    dash.dependencies.Output('sex', 'figure'),
    [dash.dependencies.Input('demo-dropdown', 'value')])
def generate_sex_distribution(df):
    df = eda
    fig = px.pie(names = eda['biological_sex'], title = "Sex Distribution").update_layout(template="minty",)
    fig.update_layout(title_x=0.5)
    return fig

# Function to update and visualize the distribution of Race across the training dataset
@app.callback(
    dash.dependencies.Output('race', 'figure'),
    [dash.dependencies.Input('demo-dropdown', 'value')])
def generate_race_distribution(df):
    df = eda
    fig = px.histogram(df, x='race', title="Distribution of Race").update_layout(template="minty",)
    fig.update_layout(title_x=0.5)
    return fig

# Function to update and visualize Feature Correlation in a Heatmap
@app.callback(
    dash.dependencies.Output('imm', 'figure'),
    [dash.dependencies.Input('demo-dropdown', 'value')])
def generate_immunogenicity_plot(value):
    try:
        if value == '1.1 Rank individuals by IgG titers against PT in plasma 14 days post booster':
            df_train = task_1
        elif value == '1.2 Rank individuals by fold change in IgG titers against PT from day 0 to day 14 post booster':
            df_train = task_1
        elif value == '2.1 Rank individuals by predicted monocyte frequency on day 1 post booster':
            df_train = task_2
        elif value == '2.2 Rank individuals by fold change in predicted monocyte frequency from day 0 to day 1 post booster':
            df_train = task_2
        elif value == '3.1 Rank individuals by predicted CCL3 gene expression on day 3 post booster':
            df_train = task_3
        elif value == '3.2 Rank individuals by fold change in predicted CCL3 gene expression from day 0 to day 3 post booster':
            df_train = task_3
        else:
            return {}

        df_selected = df_train.iloc[:, :9]
        correlation_matrix = df_selected.corr().round(3)

        fig = px.imshow(correlation_matrix,
                        text_auto=True, 
                        labels=dict(color='Correlation'),
                        x=correlation_matrix.columns,
                        y=correlation_matrix.columns,
                        title="Feature Correlation Heatmap",
                        aspect='auto')
        fig.update_layout(template="minty",title_x=0.5)

        return fig

    except Exception as e:
        print(f"Error: {e}")
        return {}

# Function to update and visualize model evaluation results per task amongst 18 different model types
@app.callback(
    dash.dependencies.Output('r2', 'figure'),
    [dash.dependencies.Input('demo-dropdown', 'value'), 
    dash.dependencies.Input('metric-dropdown', 'value')])
def generate_model_evaluation_chart(value, met):

    datasets = {
        '1.1 Rank individuals by IgG titers against PT in plasma 14 days post booster': task11ev,
        '1.2 Rank individuals by fold change in IgG titers against PT from day 0 to day 14 post booster': task12ev,
        '2.1 Rank individuals by predicted monocyte frequency on day 1 post booster': task21ev,
        '2.2 Rank individuals by fold change in predicted monocyte frequency from day 0 to day 1 post booster': task22ev,
        '3.1 Rank individuals by predicted CCL3 gene expression on day 3 post booster': task31ev,
        '3.2 Rank individuals by fold change in predicted CCL3 gene expression from day 0 to day 3 post booster': task32ev
    }

    data = datasets[value]
    if met == 'MAE':
        ys ='MAE'
    elif met == 'MSE':
        ys = 'MSE'
    else:
        ys = 'R-squared'
        
    fig = px.bar(data, x='Model', y=ys, labels={'x': 'Model', 'y': ys}, title=f"{met} Model Evaluation for {value[:3]}").update_yaxes(rangemode='tozero')
    fig.update_layout(template="minty", title_x=0.5)
    
    return fig

# Function to update and visualize a bubble chart of the most important features
@app.callback(
    dash.dependencies.Output('bubble-chart', 'figure'),
    [dash.dependencies.Input('demo-dropdown', 'value')])
def generate_bubble_chart(value):
        df_train = task_3
        selected_data = df_train.iloc[0, 9:]  

        bubble_df = pd.DataFrame({
            'Feature Index': range(len(selected_data)),
            'Value': selected_data.values,
            'Feature': selected_data.index 
        })

        bubble_df['Size'] = bubble_df['Value'] / bubble_df['Value'].max() * 40

        fig = px.scatter(
            bubble_df, 
            x='Value', 
            y='Feature Index', 
            size='Size', 
            color='Value', 
            title='Bubble Chart for Features',
            hover_data=['Feature'],
            size_max=30 
        )

        max_value = bubble_df['Value'].max()
        fig.update_layout(template="minty",title_x=0.5)
        fig.update_xaxes(range=[0, max_value * 1.1]) 
        fig.update_yaxes(showticklabels=False)
        
        return fig


# Function to update and visualize a Parallel Corrdinates Plot 
# Demonstrating how many unique features were shared and utilized across each task
task_names = ['task11', 'task12', 'task21', 'task22', 'task31', 'task32']

common_features = set(task11fs['Selected Features']).union(task12fs['Selected Features']).union(
    task21fs['Selected Features']).union(task22fs['Selected Features']).union(
    task31fs['Selected Features']).union(task32fs['Selected Features'])
all_data = pd.DataFrame(index=list(common_features))

datasets = [task11fs, task12fs, task21fs, task22fs, task31fs, task32fs]
for i, dataset in enumerate(datasets):
    dataset.set_index('Selected Features', inplace=True)
    all_data = all_data.join(dataset[['Score']].rename(columns={'Score': f'Score_{task_names[i]}'}), how='left').fillna(0)
    
all_data.reset_index(inplace=True)
all_data.rename(columns={'index': 'Features'}, inplace=True)
    
@app.callback(
    dash.dependencies.Output('parallel-coords-plot', 'figure'),
    [dash.dependencies.Input('demo-dropdown', 'value')]) 
def update_parallel_coords(value):
    
    if value == '1.1 Rank individuals by IgG titers against PT in plasma 14 days post booster':
        colors = 'Score_task11'
    elif value == '1.2 Rank individuals by fold change in IgG titers against PT from day 0 to day 14 post booster':
        colors = 'Score_task12'
    elif value == '2.1 Rank individuals by predicted monocyte frequency on day 1 post booster':
        colors = 'Score_task21'
    elif value == '2.2 Rank individuals by fold change in predicted monocyte frequency from day 0 to day 1 post booster':
        colors = 'Score_task22'
    elif value == '3.1 Rank individuals by predicted CCL3 gene expression on day 3 post booster':
        colors = 'Score_task31'
    elif value == '3.2 Rank individuals by fold change in predicted CCL3 gene expression from day 0 to day 3 post booster':
        colors = 'Score_task32'

    fig = px.parallel_coordinates(all_data, color= colors,
                                  dimensions=[col for col in all_data.columns if col.startswith('Score_')],
                                  labels={col: col for col in all_data.columns if col.startswith('Score_')},
                                  color_continuous_scale=px.colors.diverging.Tealrose,
                                  color_continuous_midpoint=0)
    fig.update_layout(template="minty",title_x=0.5)

    return fig

# Function to download a csv file of the prediction results in the format of the CMI-PB challenge submission
@app.callback(
    dash.dependencies.Output("download-dataframe-csv", "data"),
    dash.dependencies.Input("btn_csv", "n_clicks"),
    prevent_initial_call=True
)
def generate_csv(n_clicks):
    return dcc.send_data_frame(df.to_csv, "Files/Prediction_value.csv")

# Hosts the dash application on a local server with the port 8003
if __name__ == '__main__':
    app.server.run(port=8003, host='127.0.0.1')

 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:8003
Press CTRL+C to quit
127.0.0.1 - - [06/Jun/2024 16:03:44] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [06/Jun/2024 16:03:44] "GET /_dash-component-suites/dash/dcc/async-graph.js HTTP/1.1" 304 -
127.0.0.1 - - [06/Jun/2024 16:03:44] "GET /_dash-component-suites/dash/dcc/async-dropdown.js HTTP/1.1" 304 -
127.0.0.1 - - [06/Jun/2024 16:03:44] "GET /_dash-component-suites/dash/dash_table/async-table.js HTTP/1.1" 304 -
127.0.0.1 - - [06/Jun/2024 16:03:44] "GET /_dash-component-suites/dash/dash_table/async-highlight.js HTTP/1.1" 304 -
127.0.0.1 - - [06/Jun/2024 16:03:44] "GET /_dash-component-suites/plotly/package_data/plotly.min.js HTTP/1.1" 304 -
127.0.0.1 - - [06/Jun/2024 16:03:44] "GET /_dash-layout HTTP/1.1" 200 -
127.0.0.1 - - [06/Jun/2024 16:03:44] "GET /_dash-dependencies HTTP/1.1" 200 -
127.0.0.1 - - [06/Jun/2024 16:03:44] "POST /_dash-update-component HTTP/1.1" 200 -
127.0.0.1 - - [06/Jun/2024 16:03:44] "POST /_dash-update-component HTTP/1.1" 200 -
127.0.