In [2]:
# Create an interactive dashboard
import numpy as np
import pandas as pd
import json
import os
from tqdm import tqdm
import pickle
from transformers import AutoTokenizer, AutoModel
import torch
import altair as alt
from dash import Dash, Input, Output, callback, dcc, html, dash_table, State
import dash
from jupyter_dash import JupyterDash
from vega_datasets import data
import dash_vega_components as dvc
import gdown

In [None]:
# Use gdown library to download CSV of study information for use in dashboard
url = "https://drive.google.com/file/d/1ul368hM2XmHAb5h0JnVR5lmSQnhOs0si/view?usp=drive_link"
output = "study_info_df.csv"

gdown.download(url, output, fuzzy=True)

In [3]:
# Read in downlaoded CSV
input_path = "./study_info_df.csv"
study_info_df = pd.read_csv(input_path)
study_info_df

Unnamed: 0,nctId,status,phases,description,inclusion_criteria,exclusion_criteria,intervention_type,intervention_name,disease,outcome_measures,outcome_timeframes,durationMonths
0,NCT03202394,COMPLETED,['PHASE2'],"This is a randomized, double blind, placebo co...",Inclusion Criteria:\n\n1. Has provided (or rel...,:\n\n1. Age \< 18 years or \>75 years old\n2. ...,"['DRUG', 'DRUG']","['BIO-11006', 'Placebo']","['Respiratory Distress Syndrome, Adult']",['Incidence of treatment-emergent adverse even...,['28 days'],32.266667
1,NCT02138214,COMPLETED,['PHASE2'],PRIMARY OBJECTIVES:\n\nI. To determine the rat...,Inclusion Criteria:\n\n* Pre-operative diagnos...,:\n\n* Largest papillary thyroid carcinoma \< ...,"['PROCEDURE', 'PROCEDURE', 'OTHER']","['Thyroidectomy', 'entral lymph node dissectio...","['Stage I Papillary Thyroid Cancer', 'Stage II...",['Percentage of Participants With Transient Hy...,"['Post-operative day 1', 'At day 12', '2 weeks...",65.733333
2,NCT00976274,COMPLETED,['NA'],The details of study objective are followed by...,Inclusion Criteria:(three or more of following...,: (any one of following factors)\n\n* uncontro...,"['DIETARY_SUPPLEMENT', 'DIETARY_SUPPLEMENT']","['Korean red ginseng', 'starch']",['Metabolic Syndrome'],['Change in the Pre- and Post-treatment Systol...,['baseline and 12 weeks'],17.266667
3,NCT05529719,COMPLETED,['NA'],Objective: This study evaluates the effect of ...,Inclusion Criteria:\n\n* 18 to 30 years of age...,:\n\n* Volunteers who did not meet the inclusi...,['OTHER'],['Clinical Pilates Exercises'],['Low Back Pain'],['Prim Outcome - Oswestry Dysability Index'],['12 weeks'],25.266667
4,NCT02323191,COMPLETED,['PHASE1'],"This Phase 1, open-label, multicenter, global ...",Inclusion Criteria:\n\n* Eastern Cooperative O...,\n* Measurable disease at baseline as per RECI...,"['DRUG', 'DRUG']","['Atezolizumab', 'Emactuzumab']",['Solid Cancers'],['Percentage of Participants With Dose Limitin...,"['21 days', '21 days', 'Baseline up to 3 years']",68.033333
...,...,...,...,...,...,...,...,...,...,...,...,...
184620,NCT05883852,RECRUITING,['PHASE3'],The objective of this study is to conduct a ra...,Inclusion Criteria:\n\n* Women aged 18-70；\n* ...,:\n\n* Bilateral breast cancer or carcinoma in...,"['DRUG', 'DRUG', 'DRUG', 'DRUG', 'DRUG', 'DRUG']","['Docetaxel', 'carboplatin', 'Trastuzumab', 'P...",['HER2 Positive Early Breast Cancer'],['iDFS'],['5 years'],98.200000
184621,NCT02180724,ACTIVE_NOT_RECRUITING,['PHASE2'],Clinical studies have shown that targeting the...,Inclusion Criteria:\n\n1. Men and women ≥18 ye...,":\n\n1. Prior malignancy, except for adequatel...","['DRUG', 'DRUG']","['Acalabrutinib (ACP-196)', 'Acalabrutinib (AC...",['Waldenström Macroglobulinemia (WM)'],['Overall Response Rate (ORR) of Acalabrutinib...,['Up to approximately 3.8 years. Data cut at w...,61.633333
184622,NCT00487305,ACTIVE_NOT_RECRUITING,['PHASE1'],* The dose of vaccine will depend upon how man...,Inclusion Criteria:\n\n* Histologically confir...,:\n\n* Uncontrolled active infection or illnes...,['BIOLOGICAL'],['Lethally Irradiated Lymphoma cells with GM-C...,['Follicular Lymphoma'],['To determine the safety and toxicity of admi...,['2 years'],195.800000
184623,NCT05786924,RECRUITING,['PHASE1'],"BDTX-4933-101 is a first-in-human, open-label,...",Key Inclusion Criteria:\n\n1. Disease criteria...,:\n\n1. Cancer that has a known MEK1/2 mutatio...,['DRUG'],['BDTX-4933'],"['Non-small Cell Lung Cancer', 'Histiocytic Ne...",['Dose Escalation: Incidence of dose-limiting ...,"['The first 28-day cycle (Cycle 1)', 'Day 1 of...",38.000000


In [4]:
# Make copy for reference in Dashboard output so as not to clutter the DataTable with long strings such as the descriptions and eligibility criteria and remove from original
description_keyword_df = study_info_df.copy()[
    [
        "nctId",
        "status",
        "durationMonths",
        "description",
        "disease",
        "intervention_type",
        "intervention_name",
    ]
]

study_info_df = study_info_df[
    [
        "nctId",
        "status",
        "durationMonths",
        "disease",
        "intervention_type",
        "intervention_name",
    ]
]

study_info_df

Unnamed: 0,nctId,status,durationMonths,disease,intervention_type,intervention_name
0,NCT03202394,COMPLETED,32.266667,"['Respiratory Distress Syndrome, Adult']","['DRUG', 'DRUG']","['BIO-11006', 'Placebo']"
1,NCT02138214,COMPLETED,65.733333,"['Stage I Papillary Thyroid Cancer', 'Stage II...","['PROCEDURE', 'PROCEDURE', 'OTHER']","['Thyroidectomy', 'entral lymph node dissectio..."
2,NCT00976274,COMPLETED,17.266667,['Metabolic Syndrome'],"['DIETARY_SUPPLEMENT', 'DIETARY_SUPPLEMENT']","['Korean red ginseng', 'starch']"
3,NCT05529719,COMPLETED,25.266667,['Low Back Pain'],['OTHER'],['Clinical Pilates Exercises']
4,NCT02323191,COMPLETED,68.033333,['Solid Cancers'],"['DRUG', 'DRUG']","['Atezolizumab', 'Emactuzumab']"
...,...,...,...,...,...,...
184620,NCT05883852,RECRUITING,98.200000,['HER2 Positive Early Breast Cancer'],"['DRUG', 'DRUG', 'DRUG', 'DRUG', 'DRUG', 'DRUG']","['Docetaxel', 'carboplatin', 'Trastuzumab', 'P..."
184621,NCT02180724,ACTIVE_NOT_RECRUITING,61.633333,['Waldenström Macroglobulinemia (WM)'],"['DRUG', 'DRUG']","['Acalabrutinib (ACP-196)', 'Acalabrutinib (AC..."
184622,NCT00487305,ACTIVE_NOT_RECRUITING,195.800000,['Follicular Lymphoma'],['BIOLOGICAL'],['Lethally Irradiated Lymphoma cells with GM-C...
184623,NCT05786924,RECRUITING,38.000000,"['Non-small Cell Lung Cancer', 'Histiocytic Ne...",['DRUG'],['BDTX-4933']


In [6]:
study_info_df["id"] = study_info_df["nctId"]
study_info_df.set_index("id", inplace=True, drop=False)

study_info_df = study_info_df[study_info_df["durationMonths"].notna()]
# study_info_df = study_info_df[:4000]

app = Dash(__name__)

app.layout = html.Div(
    [
        html.H1("Clinical Trials"),
        dash_table.DataTable(
            id="datatable-row-ids",
            columns=[
                {"name": i, "id": i, "deletable": True}
                for i in study_info_df.columns
                # omit the id column
                if i != "id"
            ],
            data=study_info_df.to_dict("records"),
            editable=True,
            filter_action="native",
            filter_options={
                "placeholder_text": "Filter column...",
                "case": "insensitive",
            },
            sort_action="native",
            sort_mode="multi",
            row_selectable="multi",
            row_deletable=True,
            selected_rows=[],
            page_action="native",
            page_current=0,
            page_size=10,
        ),
        html.H1("Study Description"),
        html.Div(
            id="datatable-row-ids-container",
            style={
                "fontSize": 24,
                #   'color': 'blue'
            },
        ),
        html.H1("Trial Duration"),
        html.Div(
            id="trial-dura-text",
            style={
                "fontSize": 24,
                #   'color': 'blue'
            },
        ),
        html.H1("Embedding Stuff"),
        # SHAP Features for Important things for this things go here
        html.H1("Visualizations"),
        dvc.Vega(id="altair-chart", opt={"renderer": "svg", "actions": False}),
        # Visualization(s) Go Here
    ]
)


@callback(
    Output("datatable-row-ids-container", "children"),
    Output("trial-dura-text", "children"),
    Input("datatable-row-ids", "derived_virtual_row_ids"),
    Input("datatable-row-ids", "selected_row_ids"),
    Input("datatable-row-ids", "active_cell"),
)
def update_graphs(row_ids, selected_row_ids, active_cell):
    # When the table is first rendered, `derived_virtual_data` and
    # `derived_virtual_selected_rows` will be `None`. This is due to an
    # idiosyncrasy in Dash (unsupplied properties are always None and Dash
    # calls the dependent callbacks when the component is first rendered).
    # So, if `rows` is `None`, then the component was just rendered
    # and its value will be the same as the component's dataframe.
    # Instead of setting `None` in here, you could also set
    # `derived_virtual_data=df.to_rows('dict')` when you initialize
    # the component.
    selected_id_set = set(selected_row_ids or [])

    if row_ids is None:
        dff = study_info_df
        # pandas Series works enough like a list for this to be OK
        row_ids = study_info_df["id"]
    else:
        dff = study_info_df.loc[row_ids]

    # setting nctId variable
    active_row_id = active_cell["row_id"] if active_cell else None

    colors = [
        (
            "#FF69B4"
            if id == active_row_id
            else "#7FDBFF" if id in selected_id_set else "#0074D9"
        )
        for id in row_ids
    ]

    description = (
        description_keyword_df.loc[
            description_keyword_df.nctId == active_row_id, "description"
        ].item()
        if active_cell
        else "[Study Description Goes Here]"
    )
    dura = (
        str(
            description_keyword_df.loc[
                description_keyword_df.nctId == active_row_id, "durationMonths"
            ].item()
        )
        + " months"
        if active_cell
        else "[Trial Duration Goes Here]"
    )

    return description, dura


@callback(
    Output("altair-chart", "spec"),
    Input("datatable-row-ids", "derived_virtual_row_ids"),
    Input("datatable-row-ids", "selected_row_ids"),
    Input("datatable-row-ids", "active_cell"),
)
def update_chart(row_ids, selected_row_ids, active_cell):
    if row_ids is None:
        dff = study_info_df.drop_duplicates("id")
        # pandas Series works enough like a list for this to be OK
        row_ids = study_info_df["id"]
    else:
        dff = study_info_df.loc[row_ids].drop_duplicates("id")

    if active_cell:
        chart = (
            alt.Chart(dff)
            .mark_bar()
            .encode(
                alt.X("nctId", sort=None),
                y="durationMonths",
                tooltip=list(study_info_df.columns),
                color=alt.condition(
                    alt.datum.nctId == active_cell["row_id"],
                    alt.value("darkred"),
                    alt.value("steelblue"),
                ),
                opacity=alt.condition(
                    alt.datum.nctId == active_cell["row_id"],
                    alt.value(1.0),
                    alt.value(0.5),
                ),
            )
            .interactive()
        )
    else:
        chart = (
            alt.Chart(dff)
            .mark_bar()
            .encode(
                alt.X("nctId", sort=None),
                y="durationMonths",
                tooltip=list(study_info_df.columns),
            )
            .interactive()
        )

    line = (
        alt.Chart(dff)
        .mark_rule(color="black")
        .encode(
            y="mean(durationMonths):Q",
            size=alt.value(5),
            tooltip="mean(durationMonths):Q",
        )
    )

    # Altair only shows up to 5000 rows for data, which we could increase, but we do not need to see the entire dataset on a single barchart
    try:
        return (
            (chart + line)
            .encode(y=alt.Y(title="Trial Duration (Months)"))
            .configure_axis(labelFontSize=14, titleFontSize=18)
            .to_dict()
        )
    except:
        return None


if __name__ == "__main__":
    app.run(debug=True)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



OSError: Address 'http://127.0.0.1:8050' already in use.
    Try passing a different port to run_server.