# Reviewer_Tool_dev

JupyterReviewer is a package that integrates the manual review processes into Jupyter notebooks and computational analysis workflows.

# ReviewData object

The `ReviewData` object stores all relevant information regarding the data you need to review. The object is designed to eventually add or edit information for each item (row). Features include:

- Organized subtables for data you want to edit, supplementary information to view, and history of changes
- Stores subtables automatically
- Prevents overwriting
- Easy to share or pass review to other users

Instantiating a Review Data object requires a dataframe where each row corresponds to the item you want to review (like a mutation or a sample purity). Each row must have some unique index name.

In [10]:

import pandas as pd
import pathlib
import os
from IPython.display import display
from datetime import datetime, timedelta
import time

import plotly.express as px
from plotly.subplots import make_subplots
from jupyter_dash import JupyterDash
from dash import dcc
from dash import html
from dash.dependencies import Input, Output, State
from dash.exceptions import PreventUpdate
from dash import Dash, dash_table
import dash
import dash_bootstrap_components as dbc
import plotly.express as px

prop_id: APP-dropdown-data-state
0


In [2]:
import pandas as pd
from datetime import datetime
import os
import numpy as np
import warnings

from enum import Enum
class AnnotationType(Enum):
    TEXT = 'text'
    TEXTAREA = 'textarea'
    NUMBER = 'number'
    CHECKLIST = 'checklist'
    RADIOITEM = 'radioitem'

class ReviewDataAnnotation:
    
    def __init__(self, name, 
                 annot_type: AnnotationType, 
                 options: []=[], 
                 validate_input=None,
                 default=None
                ):
        '''
        validate_input: a custom function to verify input. Returns a boolean
        '''
        self.name = name
        self.annot_type = annot_type
        self.options = options
        self.validate_input = validate_input
        self.default = default
        
    def validate(self, x):
        if len(self.options) > 0:
            for item in np.array([x]).flatten():
                if item not in self.options:
                    raise ValueError(f'Input {item} is not in the specified options {self.options} for annotation named {self.name}')
                
        if self.validate_input is not None:
            if not self.validate_input(x):
                raise ValueError(f'Input {x} is invalid for annotation {self.name}. Check validate_input method')
        

class ReviewData:
    
    def __init__(self, 
                 review_dir: str, # path to directory to save info
                 df: pd.DataFrame, # optional if directory above already exists. 
                 annotate_data: [ReviewDataAnnotation], # dictionary naming column and type of data (text, float, checkbox, radio)
                ):
        # check df index
        
        annotate_cols = [ann.name for ann in annotate_data]
        self.annotate_data = annotate_data
        
        self.review_dir = review_dir
        self.data_fn = f'{review_dir}/data.tsv'
        self.annot_fn = f'{review_dir}/annot.tsv'
        self.history_fn = f'{review_dir}/history.tsv'
        
        if not os.path.isdir(self.review_dir):
            os.mkdir(self.review_dir)
            self.data = df
            self.data.to_csv(self.data_fn, sep='\t')
            self.annot = pd.DataFrame(index=df.index, columns=annotate_cols) # Add more columns. If updating an existing column, will make a new one
            self.annot.to_csv(self.annot_fn, sep='\t')
            self.history = pd.DataFrame(columns=annotate_cols + ['index', 'timestamp']) # track all the manual changes, including time stamp
            self.history.to_csv(self.history_fn, sep='\t')
        else:
            self.data = pd.read_csv(self.data_fn, sep='\t', index_col=0)
            self.annot = pd.read_csv(self.annot_fn, sep='\t', index_col=0)
            self.history = pd.read_csv(self.history_fn, sep='\t', index_col=0)
            
        # Add additional annotation columns
        new_annot_cols = [c for c in annotate_cols if c not in self.annot.columns]
        self.annot[new_annot_cols] = np.nan
        
        for annot in self.annotate_data:
            if annot.annot_type in [AnnotationType.CHECKLIST, AnnotationType.RADIOITEM]:
                self.annot[annot_col] = self.annot[annot_col].astype(object)
        
        # Add additional columns to table
        if not df.equals(self.data):
            new_data_cols = [c for c in df.columns if c not in self.data.columns]
            not_new_data_cols = [c for c in df.columns if c in self.data.columns]
            self.data[new_data_cols] = df[new_data_cols]
            
            if not self.data[not_new_data_cols].equals(df[not_new_data_cols]):
                warnings.warn(f'Input data dataframe shares columns with existing data, but are not equal.\n' + 
                              f'Only adding columns {new_data_cols} to the ReviewData.data dataframe\n' + 
                              f'Remaining columns are not going to be updated.' + 
                              f'If you intend to change the ReviewData.data attribute, make a new session directory and prefill the annotation data')
            
    def pre_fill_annot(df: pd.DataFrame):
        self.annot.loc[df.index, [c for c in df.columns if c in self.annot.columns]] = df
        
    def _update(self, data_idx, series):
        self.annot.loc[data_idx, list(series.keys())] = list(series.values())
        series['timestamp'] = datetime.today()
        series['index'] = data_idx
        self.history = self.history.append(series, ignore_index=True)
        
        # write to file
        self.data.to_csv(self.data_fn, sep='\t')
        self.annot.to_csv(self.annot_fn, sep='\t')
        self.history.to_csv(self.history_fn, sep='\t')
        
        

In [3]:
bucket_0c1_cchu_manual_purity_review_session_dir = 'gs://taml_vm_analysis/data/Full-Analysis/1_Full-Analysis-2022-02-22_pran3/0c1_Manual_Purity_Review_cchu'
cchu_purities_df = pd.read_csv(f'{bucket_0c1_cchu_manual_purity_review_session_dir}/manual_purity_review_table.tsv', sep='\t', index_col=0)
cchu_purities_df



Unnamed: 0_level_0,BETA_FLAG_not_enough_drivers,BETA_annot_maf_fn,BETA_clonal_muts,BETA_clonal_muts_genes,BETA_half_purity,BETA_has_beta_solution,BETA_num_clonal_drivers,BETA_ploidy,BETA_purity,BETA_purity_lower,...,manual_purity,manual_purity_lower,manual_purity_upper,manual_ploidy,manual_confidence,manual_flags,last_manual_update,manual_method,MAFLITE,VCF
sample_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
000725_ZS_2668,True,/home/cchu/cgaprojects_ibm_tAML_analysis/data/...,,,,False,,2.0,0.000,0.000,...,0.630,0.570,0.690,2.01,"No purity called, unsure",Post_Allo,2022-02-23 21:43:37.104717,Manual_Other,gs://fc-fed5ee4d-4de5-429a-b88e-681cde1f0558/a...,gs://fc-fed5ee4d-4de5-429a-b88e-681cde1f0558/a...
005982_GD_1875,False,/home/cchu/cgaprojects_ibm_tAML_analysis/data/...,[0],['RTEL1:p.A1062T'],0.488,True,1.0,2.0,0.976,0.860,...,0.910,0.860,0.960,1.95,Confident,,2022-02-23 21:44:00.961518,Keep_auto_call,gs://fc-fed5ee4d-4de5-429a-b88e-681cde1f0558/a...,gs://fc-fed5ee4d-4de5-429a-b88e-681cde1f0558/a...
012413_AT_1634,False,/home/cchu/cgaprojects_ibm_tAML_analysis/data/...,[0],['BRCA1:p.V772A'],0.512,True,2.0,2.0,1.024,0.800,...,1.024,0.800,1.244,2.00,"Purity called, unsure",,2022-02-23 21:44:51.038330,Manual_BETA,gs://fc-fed5ee4d-4de5-429a-b88e-681cde1f0558/a...,gs://fc-fed5ee4d-4de5-429a-b88e-681cde1f0558/a...
016198_VX_1736,False,/home/cchu/cgaprojects_ibm_tAML_analysis/data/...,[0],['TERT:p.R756H'],0.430,True,1.0,2.0,0.860,0.760,...,0.860,0.760,0.964,2.00,"Purity called, unsure",No CNA,2022-02-23 21:51:14.522862,Manual_BETA,gs://fc-fed5ee4d-4de5-429a-b88e-681cde1f0558/a...,gs://fc-fed5ee4d-4de5-429a-b88e-681cde1f0558/a...
022613_PU_3426,True,/home/cchu/cgaprojects_ibm_tAML_analysis/data/...,,,,False,,2.0,0.000,0.000,...,0.460,0.390,0.530,1.88,Confident,No_AML_drivers,2022-02-23 21:53:02.173752,Keep_auto_call,gs://fc-fed5ee4d-4de5-429a-b88e-681cde1f0558/a...,gs://fc-fed5ee4d-4de5-429a-b88e-681cde1f0558/a...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
PQ9867BM,False,/home/cchu/cgaprojects_ibm_tAML_analysis/data/...,"[0, 1]","['TP53:p.W53*', 'ZNF318:p.R1936S']",0.400,True,2.0,2.0,0.800,0.688,...,0.864,0.708,1.024,2.00,Confident,Used DFCI flags to change Beta solution,2022-03-08 21:01:12.668613,Manual_BETA,gs://fc-fed5ee4d-4de5-429a-b88e-681cde1f0558/a...,gs://fc-fed5ee4d-4de5-429a-b88e-681cde1f0558/a...
SA04142016,True,/home/cchu/cgaprojects_ibm_tAML_analysis/data/...,,,,False,,2.0,0.000,0.000,...,0.920,0.870,0.970,1.83,Confident,,2022-03-08 21:01:38.015167,Keep_auto_call,gs://fc-fed5ee4d-4de5-429a-b88e-681cde1f0558/a...,gs://fc-fed5ee4d-4de5-429a-b88e-681cde1f0558/a...
SM120519BM-H,True,/home/cchu/cgaprojects_ibm_tAML_analysis/data/...,,,,False,,2.0,0.000,0.000,...,0.000,0.000,0.000,0.00,"No purity called, unsure","No CNA,No AML drivers",2022-03-08 21:02:25.093167,Manual_Other,gs://fc-fed5ee4d-4de5-429a-b88e-681cde1f0558/a...,gs://fc-fed5ee4d-4de5-429a-b88e-681cde1f0558/a...
WD10052017BM,False,/home/cchu/cgaprojects_ibm_tAML_analysis/data/...,[0],['CTC1:p.R731W'],0.700,True,2.0,2.0,1.400,0.732,...,0.528,0.464,0.596,2.00,"Purity called, unsure",No CNA,2022-03-08 21:04:18.622309,Manual_BETA,gs://fc-fed5ee4d-4de5-429a-b88e-681cde1f0558/a...,gs://fc-fed5ee4d-4de5-429a-b88e-681cde1f0558/a...


In [4]:
test_rd_dir = '/home/cchu/cgaprojects_ibm_tAML_analysis/data/test_getzlab-JupyterReviewer/Reviewer_Tutorial'
test_rd = ReviewData(review_dir=test_rd_dir,
                     df = cchu_purities_df, # optional if directory above already exists. 
                     annotate_data = [ReviewDataAnnotation('purity', 'number', validate_input=lambda x: x < 0.5),
                                      ReviewDataAnnotation('rating', 'number', options=range(10)),
                                      ReviewDataAnnotation('description', 'text'),
                                      ReviewDataAnnotation('class', 'radioitem', options=[f'Option {n}' for n in range(4)]),])
                     
#                      {'purity': 'number', 
#                                       'class': 'text', 
#                                       'rating': 'number', 
#                                       'description': 'text', 
#                                       'another_annot_col': 'checklist'})
test_rd.annot.head()

Unnamed: 0_level_0,purity,class,rating,description
sample_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
000725_ZS_2668,2.0,Option 3,2.0,asdf
005982_GD_1875,0.0,Option 0,0.0,0
012413_AT_1634,0.4,Option 2,10.0,asdfsadf
016198_VX_1736,,,,
022613_PU_3426,,,,


# Simple widgets notebook reviewer

You can use ipython widgets to get interactivity 

# ReviewDataApp object

Use the functionality of ploty Dash to create advanced dashboards for visualizing and interacting with your data. This is made to wrap around any ReviewData object, so it is easy to edit and change as needed without undoing the underlying annotations in the ReviewData

In [346]:
class AppComponent:
    
    def __init__(self, name, 
                 components, 
                 callback=None, 
                 callback_output=[], 
                 callback_input=[],
                 callback_state=[],
                ):
        self.name = name
        self.component = html.Div(components)
        self.callback = callback
        self.callback_output = callback_output
        self.callback_input = callback_input
        self.callback_state = callback_state
        
        # TODO: option to update anotations
        # TODO: reset function (switching samples) and a page function
    
class TestApp:
    def __init__(self, review_data: ReviewData, host='0.0.0.0', port=8051):
        self.prop = None
        self.autofill_buttons = []
        self.autofill_input_dict = {} #{buttonid: {annot_col: Input(compoennt value)}}
        self.more_components = []  # TODO: set custom layout?
        self.review_data = review_data
        self.host = host
        self.port = port
        
        # check component ids are not duplicated
        
    def run_app(self, mode, host='0.0.0.0', port=8050):
        app = JupyterDash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
        app.layout = self.gen_layout()

        @app.callback(output=dict(history_table=Output(f'APP-history-table', 'children'),
                                  annot_panel=self.annotation_panel_component.callback_output,
                                  more_component_outputs={c.name: c.callback_output for c in self.more_components}
                             ), 
                      inputs=dict(dropdown_value=Input('APP-dropdown-data-state', 'value'), 
                                  autofill_buttons=[Input(b.id, 'n_clicks') for b in self.autofill_buttons],
                                  autofill_inputs=self.autofill_input_dict,
                                  submit_annot_button=Input('APP-submit-button-state', 'n_clicks'),
                                  annot_input_state=self.annotation_panel_component.callback_state, #{annot.name: State(f"APP-{annot.name}-{annot.annot_type}-input-state", "value") for annot in self.review_data.annotate_data},
                                  more_component_inputs={c.name: c.callback_input for c in self.more_components}
                                 )
                     ) # TODO: add back more components
        def component_callback(dropdown_value, 
                               autofill_buttons,
                               autofill_inputs,
                               submit_annot_button, 
                               annot_input_state, 
                               more_component_inputs):
            
            ctx = dash.callback_context
            if not ctx.triggered:
                raise PreventUpdate
            else:
                prop_id = ctx.triggered[0]['prop_id'].split('.')[0]
            
            output_dict = {'history_table': dash.no_update, 
                           'annot_panel': {annot_col: dash.no_update for annot_col in self.review_data.annot.columns}, 
                           'more_component_outputs': {c.name: [dash.no_update for i in range(len(c.callback_output))] for c in self.more_components}}
            
            if prop_id == 'APP-dropdown-data-state':

                for i in range(len(self.more_components)):
                    component = self.more_components[i]
                    # reset vs row dependent
                    component_output = component.callback(self.review_data.data.loc[dropdown_value], *more_component_inputs[component.name])
                    output_dict['more_component_outputs'][component.name] = component_output # force this? specify names in the callback outputs?
                    
                output_dict['history_table'] = dbc.Table.from_dataframe(self.review_data.history.loc[self.review_data.history['index'] == dropdown_value])
                output_dict['annot_panel'] = {annot_col: '' for annot_col in self.review_data.annot.columns} # TODO set defaults?
                            
            elif (prop_id == 'APP-submit-button-state') & (submit_annot_button > 0):
                self.review_data._update(dropdown_value, annot_input_state)
                output_dict['history_table'] = dbc.Table.from_dataframe(self.review_data.history.loc[self.review_data.history['index'] == dropdown_value])
            elif 'APP-autofill-' in prop_id:
                component_name = prop_id.split('APP-autofill-')[-1]
                for autofill_annot_col, value in autofill_inputs[prop_id].items():
                    output_dict['annot_panel'][autofill_annot_col] = value
            else:
                # identify component that changed and which outputs are changed
                for i in range(len(self.more_components)):
                    component = self.more_components[i]
                    if sum([c.component_id == prop_id for c in self.more_components[i].callback_input]) > 0:
                        component_output = component.callback(self.review_data.data.loc[dropdown_value], *more_component_inputs[component.name])
                        output_dict['more_component_outputs'][component.name] = component_output # force having output as array specify names in the callback outputs?
                pass
            return output_dict
        
        app.run_server(mode=mode, host=host, port=port, debug=True) 
        
        
    def gen_annotation_panel_component(self):
        annotation_data = self.review_data.annotate_data
        
        
        submit_annot_button = html.Button(id='APP-submit-button-state', n_clicks=0, children='Submit')
        
        # history panel
        
        def annotation_input(annot: ReviewDataAnnotation):
            
            input_component_id = f"APP-{annot.name}-{annot.annot_type}-input-state"
            
            if annot.annot_type == AnnotationType.TEXTAREA.value:
                input_component = dbc.Textarea(size="lg", 
                                               id=input_component_id,
                                               value=annot.default,
                                              ), 
            elif annot.annot_type ==  AnnotationType.TEXT.value:
                input_component = dbc.Input(type="text", 
                                    id=input_component_id, 
                                    placeholder=f"Enter {annot.name}",
                                    value=annot.default,
                                   )
            elif annot.annot_type == AnnotationType.NUMBER.value:
                input_component = dbc.Input(type="number", 
                                    id=input_component_id, 
                                    placeholder=f"Enter {annot.name}",
                                    value=annot.default,
                                   )
            elif annot.annot_type == AnnotationType.CHECKLIST.value:
                flags = np.arange(0, 1, 0.2)
                input_component = dbc.Checklist(options=[{"label": f, "value": f} for f in annot.options],
                                                id=input_component_id, 
                                                value=annot.default),
            elif annot.annot_type == AnnotationType.RADIOITEM.value:
                # TODO: how to add in options
                input_component = dbc.RadioItems(
                                                options=[{"label": f, "value": f} for f in annot.options],
                                                value=annot.default,
                                                id=input_component_id,
                                            ),
            else:
                raise ValueError(f'Invalid annotation type "{annot.annot_type}"')
                
            return dbc.Row([dbc.Label(annot.name, html_for=input_component_id, width=2), dbc.Col(input_component)])
        
        panel_components = self.autofill_buttons + [annotation_input(annot) for annot in self.review_data.annotate_data] + [submit_annot_button]
        panel_inputs = [Input('APP-submit-button-state', 'nclicks')]
        return AppComponent(name='APP-Panel',
                           components=panel_components, 
                           callback_output={annot.name: Output(f"APP-{annot.name}-{annot.annot_type}-input-state", "value") for annot in self.review_data.annotate_data},
                           callback_input=panel_inputs,
                           callback_state={annot.name: State(f"APP-{annot.name}-{annot.annot_type}-input-state", "value") for annot in self.review_data.annotate_data}
                          )
        
    def gen_layout(self):
        
        dropdown = html.Div(dcc.Dropdown(options=self.review_data.data.index, 
                                         value=self.review_data.data.index[0], 
                                         id='APP-dropdown-data-state'))
        
        self.dropdown_component = AppComponent(name='APP-dropdown-component',
                                               components=[dropdown])
        
        history_table = html.Div([dbc.Table.from_dataframe(pd.DataFrame(columns=self.review_data.history.columns))], id='APP-history-table')
        self.history_component = AppComponent(name='APP-history-component',
                                               components=[history_table])
        
        self.annotation_panel_component = self.gen_annotation_panel_component()
        
        layout = html.Div([dbc.Row(self.dropdown_component.component, justify='end'),
                           dbc.Row([dbc.Col(self.annotation_panel_component.component),
                                    dbc.Col(self.history_component.component)
                                   ]),
                           dbc.Row([dbc.Row(c.component) for c in self.more_components])
                          ])

        return layout
    
    def add_table_from_path(self, table_name, component_name, col, table_cols):
        
        table = html.Div(dbc.Table.from_dataframe(pd.read_csv(self.review_data.data.iloc[0][col], sep='\t')[table_cols]), 
                                   id=component_name)
        table_component = AppComponent(component_name, [html.H1(table_name), table], 
                                      lambda r: [dbc.Table.from_dataframe(pd.read_csv(r[col], sep='\t')[table_cols])],
                                      callback_output=[Output(component_name, 'children')], 
                                     )
        self.more_components.append(table_component)
        
    def add_custom_component(self, 
                             component_name, 
                             component_layout,
                             func, 
                             callback_output, 
                             callback_input=[], 
                             add_autofill=False,
                             autofill_dict={}, # annot_col: component output id
                             **kwargs):
        
        if add_autofill:
            autofill_button_component = html.Button(f'Use {component_name} solution', id=f'APP-autofill-{component_name}', n_clicks=0)
            self.autofill_buttons += [autofill_button_component]
            self.autofill_input_dict[autofill_button_component.id] = autofill_dict
            
        component = AppComponent(component_name, component_layout, 
                                  lambda *args: func(*args, **kwargs),
                                  callback_output=callback_output, 
                                  callback_input=callback_input
                                 )
        
        self.more_components.append(component)
        
        
        
    

# Example

In [347]:
from scipy.stats import beta, kruskal
import plotly.graph_objects as go

tumor_f_bin_width = 1.0/500.0
tumor_f_bins = np.arange(0, 1, tumor_f_bin_width)
pval_threshold = 1.1E-4
def plot_beta(maf_df, data_id):
    
    if maf_df.empty:
        raise ValueError("There are no mutations in the maf dataframe.")

    for idx, r in maf_df.iterrows():
        pdf = beta.pdf(tumor_f_bins, r['t_alt_count'] + 1, r['t_ref_count'] + 1)
        maf_df.loc[idx, tumor_f_bins] = pdf / (sum(pdf) * tumor_f_bin_width)

    sum_pdf = maf_df[tumor_f_bins].sum(axis=0)
    sum_pdf = sum_pdf / (sum_pdf.sum() * tumor_f_bin_width)
    if 'tumor_f' not in maf_df.columns:
        maf_df['tumor_f'] = maf_df['t_alt_count'].astype(float) / (maf_df['t_alt_count'] + maf_df['t_ref_count'])
    maf_df = maf_df.sort_values(by='tumor_f',
                                ascending=False).reset_index()

    clonal_muts = [maf_df.index[0]]  # Get the first one
    for j in np.arange(maf_df.shape[0], 1, -1):
        h_stat, pval = kruskal(*maf_df.iloc[:j].apply(lambda x: np.concatenate((np.ones(x['t_alt_count']),
                                                                                np.zeros(x['t_ref_count']))),
                                                      axis=1).tolist())
        if pval > pval_threshold:
            clonal_muts = maf_df.index[:j].tolist()
            break

    subclonal_muts = maf_df.index[clonal_muts[-1] + 1:].tolist() if clonal_muts[-1] < maf_df.shape[0] else []

    clonal_prod_pdf = maf_df.loc[clonal_muts, tumor_f_bins].product(axis=0)
    clonal_prod_pdf = clonal_prod_pdf / (clonal_prod_pdf.sum() * tumor_f_bin_width)
    half_purity = clonal_prod_pdf.argmax()
    purity = clonal_prod_pdf.index[half_purity] * 2

    log_clonal_prod_pdf = np.log10(clonal_prod_pdf)
    log_clonal_prod_pdf = log_clonal_prod_pdf - np.max(log_clonal_prod_pdf)
    cis = log_clonal_prod_pdf[log_clonal_prod_pdf >= -1].index.tolist()
    purity_lower_ci = cis[0] * 2
    purity_upper_ci = cis[-1] * 2
    
    # plotly plot
    # Step 1: make the figure
    maf_df['clonal_status'] = maf_df.index.map(lambda x: 'clonal' if x in clonal_muts else 'subclonal')
    maf_df['Mut_Label'] = maf_df['Hugo_Symbol'] + ':' + maf_df['Start_position'].astype(str) + ':' + maf_df['Protein_Change'].astype(str) + ':' + maf_df['Variant_Classification'].astype(str)
    to_plot_maf_df = maf_df.set_index('Mut_Label')[list(tumor_f_bins)].stack().reset_index()
    to_plot_maf_df['clonal_status'] = to_plot_maf_df['Mut_Label'].map(maf_df[['Mut_Label', 'clonal_status']].set_index('Mut_Label')['clonal_status'])
    to_plot_maf_df['pdf_log10'] = np.log10(to_plot_maf_df[0])
    fig = px.line(to_plot_maf_df, x='level_1', y='pdf_log10', color='clonal_status', 
                  hover_data=['Mut_Label'], title=f'{data_id}: purity = {round(purity, 2)} [{round(purity_lower_ci, 2)} - {round(purity_upper_ci, 2)}]')
    fig.add_trace(go.Scatter(x=tumor_f_bins, y=np.log10(clonal_prod_pdf),
                    mode='lines',
                    name='clonal product pdf'))
    fig.add_trace(go.Scatter(x=tumor_f_bins, y=np.log10(sum_pdf),
                    mode='lines',
                    name='all mutations sum pdf'))
    
    fig.add_vrect(x0=cis[0], x1=cis[-1], line_width=0, fillcolor="red", opacity=0.2)
    fig.add_vline(x=clonal_prod_pdf.index[half_purity], name='Half purity')
    
    ylim_min=10 ** (-4)
    ylim_max=10 ** 2
    fig.update_yaxes(range=[np.log10(ylim_min), np.log10(ylim_max)])

    return fig, purity, purity_lower_ci, purity_upper_ci


In [348]:
import functools
import time

test_app = TestApp(test_rd)

# add components
def gen_data_summary_table(r, cols):
    return [[html.H1(f'{r.name} Data Summary'), dbc.Table.from_dataframe(r[cols].to_frame().reset_index())]]

test_app.add_custom_component('sample-info-component', 
                              html.Div(children=[html.H1('Data Summary'), 
                                                 dbc.Table.from_dataframe(df=pd.DataFrame())],
                                       id='sample-info-component'
                                      ), 
                              callback_output=[Output('sample-info-component', 'children')],
                              func=gen_data_summary_table, 
                              cols=['BETA_ploidy',
                                     'BETA_purity',
                                     'BETA_purity_lower',
                                     'BETA_purity_upper'])

test_app.add_table_from_path('DFCI MAF file', 'maf-component-id', 'DFCI_local_sample_dfci_maf_fn', ['Hugo_Symbol', 'Chromosome', 't_alt_count', 't_ref_count', 'Tumor_Sample_Barcode'])

beta_table_cols = ['CHIP_mut_status', 
                  'aSCNA', 
                  'Hugo_Symbol', 
                  'Chromosome', 
                  'Start_position', 
                  'Variant_Classification', 
                  'Protein_Change', 
                  't_alt_count', 
                  't_ref_count', 
                  'total_count', 
                  'tumor_f', 
                  'gnomADg_AF']
blank_beta_df = pd.DataFrame(columns=beta_table_cols)
blank_beta_df.loc[0, beta_table_cols] = 'Test'

@functools.lru_cache(maxsize=32) # faster to reload
def read_maf(fn):
    return pd.read_csv(fn, sep='\t')

def beta_graph_callback(r, 
                        reload_beta_graph_button, 
                        selected_rows, 
                        beta_table_fn_col, 
                        beta_table_display_col):
    maf_df = read_maf(r[beta_table_fn_col])
    fig, purity, purity_lower_ci, purity_upper_ci = plot_beta(maf_df.loc[selected_rows], r.name)
    return [fig, maf_df[beta_table_display_col].to_dict('records'), selected_rows, purity, 2]
    
test_app.add_custom_component('beta-graph', 
                              html.Div([html.H1("Beta MAF"), 
                                        html.Button('Reload Beta Plot', id='reload-beta-button', n_clicks=0),
                                        dash_table.DataTable(
                                                              id='beta-maf-table',
                                                              columns=[{"name": i, "id": i} for i in beta_table_cols],
                                                              data=blank_beta_df.to_dict('records'),
                                                              filter_action="native",
                                                              sort_action="native",
                                                              sort_mode="multi",
                                                              column_selectable="single",
                                                              row_selectable="multi",
                                                              selected_columns=[],
                                                              selected_rows=[0],
                                                              page_action="native",
                                                              page_current= 0,
                                                              page_size= 12,
                                         ), 
                                        html.Div([html.P('Purity: ', style={'display': 'inline'}), html.P(0, id='beta-graph-purity', style={'display': 'inline'})]), 
                                        html.Div([html.P('Ploidy: ', style={'display': 'inline'}), html.P(0, id='beta-graph-ploidy', style={'display': 'inline'})]), 
                                        dcc.Graph(id='beta-graph', figure={})]), # todo just make name the heading
                              callback_output=[Output('beta-graph', 'figure'), 
                                               Output('beta-maf-table', 'data'), 
                                               Output('beta-maf-table', 'selected_rows'),
                                               Output('beta-graph-purity', 'children'),
                                               Output('beta-graph-ploidy', 'children')
                                              ],
                              callback_input=[Input('reload-beta-button', 'n_clicks'), 
                                               State('beta-maf-table', 'selected_rows')],
                              func=beta_graph_callback, 
                              add_autofill=True,
                              autofill_dict={'purity': Input('beta-graph-purity', 'children')},
                              beta_table_fn_col='BETA_annot_maf_fn',
                              beta_table_display_col=beta_table_cols
                             )



In [349]:
test_app.run_app(mode='external', port=8052)

Dash app running on http://0.0.0.0:8052/


In [109]:
test_rd.data.columns.tolist()

['BETA_FLAG_not_enough_drivers',
 'BETA_annot_maf_fn',
 'BETA_clonal_muts',
 'BETA_clonal_muts_genes',
 'BETA_half_purity',
 'BETA_has_beta_solution',
 'BETA_num_clonal_drivers',
 'BETA_ploidy',
 'BETA_purity',
 'BETA_purity_lower',
 'BETA_purity_upper',
 'BETA_ranked_maf_path',
 'BETA_subclonal_muts',
 'BETA_subclonal_muts_genes',
 'ABSOLUTE_purity',
 'ABSOLUTE_ploidy',
 'ABSOLUTE_purity_upper',
 'ABSOLUTE_purity_lower',
 'ABSOLUTE_absolute_mod_tab_annotated_fn',
 'ABSOLUTE_rdata_path',
 'ABSOLUTE_FLAG_no_good_absolute_solution',
 'AUTO_FINAL_method',
 'AUTO_FINAL_purity',
 'AUTO_FINAL_purity_upper',
 'AUTO_FINAL_purity_lower',
 'AUTO_FINAL_ploidy',
 'AUTO_FINAL_FLAG_no_purity_call',
 'BETA_annot_maf_gsurl',
 'CGA_BIOSPECIMEN_db_id',
 'CGA_BIOSPECIMEN_aliases',
 'CGA_BIOSPECIMEN_participant_id',
 'CGA_BIOSPECIMEN_irb_protocols',
 'CGA_BIOSPECIMEN_collection_date_dfd',
 'CGA_BIOSPECIMEN_tissue_site',
 'CGA_BIOSPECIMEN_tissue_site_detail',
 'CGA_BIOSPECIMEN_original_material_type',
 'CG

{'sample-info-component': [], 'maf-component-id': [], 'beta-graph': [0, [0]]}
{'sample-info-component': [], 'maf-component-id': [], 'beta-graph': [0, [0]]}
prop_id: APP-dropdown-data-state
0
sample-info-component

maf-component-id

beta-graph
0 [0]
