# Reviewer_Tool_dev

JupyterReviewer is a package that integrates the manual review processes into Jupyter notebooks and computational analysis workflows.

# ReviewData object

The `ReviewData` object stores all relevant information regarding the data you need to review. The object is designed to eventually add or edit information for each item (row). Features include:

- Organized subtables for data you want to edit, supplementary information to view, and history of changes
- Stores subtables automatically
- Prevents overwriting
- Easy to share or pass review to other users

Instantiating a Review Data object requires a dataframe where each row corresponds to the item you want to review (like a mutation or a sample purity). Each row must have some unique index name.

In [12]:

import pandas as pd
import pathlib
import os
from IPython.display import display
from datetime import datetime, timedelta
import time

import plotly.express as px
from plotly.subplots import make_subplots
from jupyter_dash import JupyterDash
from dash import dcc
from dash import html
from dash.dependencies import Input, Output, State
from dash.exceptions import PreventUpdate
from dash import Dash, dash_table
import dash
import dash_bootstrap_components as dbc

In [1]:
import pandas as pd
from datetime import datetime
import os
import numpy as np
import warnings

class ReviewData:
    
    def __init__(self, 
                 review_dir: str, # path to directory to save info
                 df: pd.DataFrame, # optional if directory above already exists. 
                 annotate_cols: [str], # including tags? optional
                ):
        # check df index
        
        self.review_dir = review_dir
        self.data_fn = f'{review_dir}/data.tsv'
        self.annot_fn = f'{review_dir}/annot.tsv'
        self.history_fn = f'{review_dir}/history.tsv'
        
        if not os.path.isdir(self.review_dir):
            os.mkdir(self.review_dir)
            self.data = df
            self.data.to_csv(self.data_fn, sep='\t')
            self.annot = pd.DataFrame(index=df.index, columns=annotate_cols) # Add more columns. If updating an existing column, will make a new one
            self.annot.to_csv(self.annot_fn, sep='\t')
            self.history = pd.DataFrame(columns=annotate_cols + ['index', 'timestamp']) # track all the manual changes, including time stamp
            self.history.to_csv(self.history_fn, sep='\t')
        else:
            self.data = pd.read_csv(self.data_fn, sep='\t', index_col=0)
            self.annot = pd.read_csv(self.annot_fn, sep='\t', index_col=0)
            self.history = pd.read_csv(self.history_fn, sep='\t', index_col=0)
            
        # Add additional annotation columns
        new_annot_cols = [c for c in annotate_cols if c not in self.annot.columns]
        self.annot[new_annot_cols] = np.nan
        
        # Add additional columns to table
        if not df.equals(self.data):
            new_data_cols = [c for c in df.columns if c not in self.data.columns]
            not_new_data_cols = [c for c in df.columns if c in self.data.columns]
            self.data[new_data_cols] = df[new_data_cols]
            
            if not self.data[not_new_data_cols].equals(df[not_new_data_cols]):
                warnings.warn(f'Input data dataframe shares columns with existing data, but are not equal.\n' + 
                              f'Only adding columns {new_data_cols} to the ReviewData.data dataframe\n' + 
                              f'Remaining columns are not going to be updated.' + 
                              f'If you intend to change the ReviewData.data attribute, make a new session directory and prefill the annotation data')
            
    def pre_fill_annot(df: pd.DataFrame):
        self.annot.loc[df.index, [c for c in df.columns if c in self.annot.columns]] = df
        
    def _update(self, data_idx, series):
        self.annot.loc[data_idx] = series
        series['timestamp'] = datetime.today()
        series['index'] = data_idx
        self.history = self.history.append(series)
    
    

In [2]:
bucket_0c1_cchu_manual_purity_review_session_dir = 'gs://taml_vm_analysis/data/Full-Analysis/1_Full-Analysis-2022-02-22_pran3/0c1_Manual_Purity_Review_cchu'
cchu_purities_df = pd.read_csv(f'{bucket_0c1_cchu_manual_purity_review_session_dir}/manual_purity_review_table.tsv', sep='\t', index_col=0)
cchu_purities_df



Unnamed: 0_level_0,BETA_FLAG_not_enough_drivers,BETA_annot_maf_fn,BETA_clonal_muts,BETA_clonal_muts_genes,BETA_half_purity,BETA_has_beta_solution,BETA_num_clonal_drivers,BETA_ploidy,BETA_purity,BETA_purity_lower,...,manual_purity,manual_purity_lower,manual_purity_upper,manual_ploidy,manual_confidence,manual_flags,last_manual_update,manual_method,MAFLITE,VCF
sample_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
000725_ZS_2668,True,/home/cchu/cgaprojects_ibm_tAML_analysis/data/...,,,,False,,2.0,0.000,0.000,...,0.630,0.570,0.690,2.01,"No purity called, unsure",Post_Allo,2022-02-23 21:43:37.104717,Manual_Other,gs://fc-fed5ee4d-4de5-429a-b88e-681cde1f0558/a...,gs://fc-fed5ee4d-4de5-429a-b88e-681cde1f0558/a...
005982_GD_1875,False,/home/cchu/cgaprojects_ibm_tAML_analysis/data/...,[0],['RTEL1:p.A1062T'],0.488,True,1.0,2.0,0.976,0.860,...,0.910,0.860,0.960,1.95,Confident,,2022-02-23 21:44:00.961518,Keep_auto_call,gs://fc-fed5ee4d-4de5-429a-b88e-681cde1f0558/a...,gs://fc-fed5ee4d-4de5-429a-b88e-681cde1f0558/a...
012413_AT_1634,False,/home/cchu/cgaprojects_ibm_tAML_analysis/data/...,[0],['BRCA1:p.V772A'],0.512,True,2.0,2.0,1.024,0.800,...,1.024,0.800,1.244,2.00,"Purity called, unsure",,2022-02-23 21:44:51.038330,Manual_BETA,gs://fc-fed5ee4d-4de5-429a-b88e-681cde1f0558/a...,gs://fc-fed5ee4d-4de5-429a-b88e-681cde1f0558/a...
016198_VX_1736,False,/home/cchu/cgaprojects_ibm_tAML_analysis/data/...,[0],['TERT:p.R756H'],0.430,True,1.0,2.0,0.860,0.760,...,0.860,0.760,0.964,2.00,"Purity called, unsure",No CNA,2022-02-23 21:51:14.522862,Manual_BETA,gs://fc-fed5ee4d-4de5-429a-b88e-681cde1f0558/a...,gs://fc-fed5ee4d-4de5-429a-b88e-681cde1f0558/a...
022613_PU_3426,True,/home/cchu/cgaprojects_ibm_tAML_analysis/data/...,,,,False,,2.0,0.000,0.000,...,0.460,0.390,0.530,1.88,Confident,No_AML_drivers,2022-02-23 21:53:02.173752,Keep_auto_call,gs://fc-fed5ee4d-4de5-429a-b88e-681cde1f0558/a...,gs://fc-fed5ee4d-4de5-429a-b88e-681cde1f0558/a...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
PQ9867BM,False,/home/cchu/cgaprojects_ibm_tAML_analysis/data/...,"[0, 1]","['TP53:p.W53*', 'ZNF318:p.R1936S']",0.400,True,2.0,2.0,0.800,0.688,...,0.864,0.708,1.024,2.00,Confident,Used DFCI flags to change Beta solution,2022-03-08 21:01:12.668613,Manual_BETA,gs://fc-fed5ee4d-4de5-429a-b88e-681cde1f0558/a...,gs://fc-fed5ee4d-4de5-429a-b88e-681cde1f0558/a...
SA04142016,True,/home/cchu/cgaprojects_ibm_tAML_analysis/data/...,,,,False,,2.0,0.000,0.000,...,0.920,0.870,0.970,1.83,Confident,,2022-03-08 21:01:38.015167,Keep_auto_call,gs://fc-fed5ee4d-4de5-429a-b88e-681cde1f0558/a...,gs://fc-fed5ee4d-4de5-429a-b88e-681cde1f0558/a...
SM120519BM-H,True,/home/cchu/cgaprojects_ibm_tAML_analysis/data/...,,,,False,,2.0,0.000,0.000,...,0.000,0.000,0.000,0.00,"No purity called, unsure","No CNA,No AML drivers",2022-03-08 21:02:25.093167,Manual_Other,gs://fc-fed5ee4d-4de5-429a-b88e-681cde1f0558/a...,gs://fc-fed5ee4d-4de5-429a-b88e-681cde1f0558/a...
WD10052017BM,False,/home/cchu/cgaprojects_ibm_tAML_analysis/data/...,[0],['CTC1:p.R731W'],0.700,True,2.0,2.0,1.400,0.732,...,0.528,0.464,0.596,2.00,"Purity called, unsure",No CNA,2022-03-08 21:04:18.622309,Manual_BETA,gs://fc-fed5ee4d-4de5-429a-b88e-681cde1f0558/a...,gs://fc-fed5ee4d-4de5-429a-b88e-681cde1f0558/a...


In [3]:
test_rd_dir = '/home/cchu/cgaprojects_ibm_tAML_analysis/data/test_getzlab-JupyterReviewer/Reviewer_Tutorial'
test_rd = ReviewData(review_dir=test_rd_dir,
                     df = cchu_purities_df, # optional if directory above already exists. 
                     annotate_cols = ['purity', 'class', 'rating', 'description', 'another_annot_col'])
test_rd.annot.head()

Unnamed: 0_level_0,purity,class,rating,description,another_annot_col
sample_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
000725_ZS_2668,,,,,
005982_GD_1875,,,,,
012413_AT_1634,,,,,
016198_VX_1736,,,,,
022613_PU_3426,,,,,


# Simple widgets notebook reviewer

You can use ipython widgets to get interactivity 

# ReviewDataApp object

Use the functionality of ploty Dash to create advanced dashboards for visualizing and interacting with your data. This is made to wrap around any ReviewData object, so it is easy to edit and change as needed without undoing the underlying annotations in the ReviewData

In [4]:
    
class ReviewDataAppComponent:
    
    def __init__(self,
                 dash_component, 
                 callback_func, 
                 callback_outputs=[], 
                 callback_inputs=[], 
                 callback_state=[]): # not sure if I can define this somewhere else
        self.dash_component = dash_component
        self.callback_func = callback_func
        self.callback_inputs = callback_inputs
        self.callback_outputs = callback_outputs
        self.callback_state = callback_state
    
    
class ReviewDataApp:
    
    def __init__(self, review_data: ReviewData, components: [ReviewDataAppComponent]): 
        self.review_data = review_data
        # list ids
        # default edit panel
        self.components = components
        
    def add_component():
        # define children
        # define interaction
        pass
    
    def run_app(self, mode='inline', host='0.0.0.0', port='8052'):
        app = JupyterDash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
        app.layout = self.gen_layout()

        print(self.outputs)
        print(self.inputs)
        @app.callback(output=self.outputs, inputs=self.inputs, state=self.state)
        def app_callback(*inputs): # states?
            ctx = dash.callback_context
            print('callback')
            if not ctx.triggered:
                raise PreventUpdate
            else:
                prop_id = ctx.triggered[0]['prop_id'].split('.')[0]
                
            self.run_callbacks(prop_id, inputs)
            return list(inputs)
            
        
        app.run_server(mode=mode, host=host, port=port, debug=True)
        
    def gen_layout(self):
        # iterate through rows component
        dropdown = html.Div(
                            dcc.Dropdown(options=self.review_data.data.index, 
                                         value=self.review_data.data.index, 
                                         id='dropdown-data-state'),
                        )
        
        self.outputs = [arg for cmp in self.components for arg in cmp.callback_outputs]
        self.inputs = [Input('dropdown-data-state', 'value')] + [arg for cmp in self.components for arg in cmp.callback_inputs]
        self.state = [arg for cmp in self.components for arg in cmp.callback_state]
        
        return html.Div([dropdown] + [cmp.dash_component for cmp in self.components])
        
    def run_callbacks(self, prop_id, inputs):
        print(prop_id)
        print(inputs)
        
    


# each component defines how it gets updates and its interaction with the review data object
# do checks to make sure it s

In [13]:
radio_component = html.Div(
                            [
                                dbc.Label("Manual purity method"),
                                dbc.RadioItems(
                                    options=[
                                        {"label": "Keep auto call", "value": 'Keep_auto_call'},
                                        {"label": "Manual ABSOLUTE", "value": 'Manual_ABSOLUTE'},
                                        {"label": "Manual BETA", "value": 'Manual_BETA'},
                                        {"label": "Manual Other", "value": 'Manual_Other'},
                                    ],
                                    value='Keep_auto_call',
                                    id="purity-manual-method-radioitems",
                                ),
                            ]
                        )

output_component = html.H1('Data', id='header-sample-id')

# @app.callback(Output('header-sample-id', 'children'), 
#               Input('purity-manual-method-radioitems', 'value'))
def print_select(v):
    return v
    
a_component = ReviewDataAppComponent(dash_component=html.Div([radio_component, output_component]), 
                                     callback_func=print_select, 
                                     callback_outputs=[Output('header-sample-id', 'children')], 
                                     callback_inputs=[Input('purity-manual-method-radioitems', 'value')])


checklist_component = html.Div(
                            [
                                dbc.Label("Manual purity method"),
                                dbc.Checklist(
                                    options=[
                                        {"label": "1", "value": '1'},
                                        {"label": "2", "value": '2'},
                                        {"label": "3", "value": '3'},
                                    ],
                                    value='Keep_auto_call',
                                    id="checklist-items",
                                ),
                            ]
                        )

checklist_output_component = html.H1('Checklist Data', id='checklist-header-sample-id')

def print_selected(v):
    return v

b_component = ReviewDataAppComponent(dash_component=html.Div([checklist_component, checklist_output_component]), 
                                     callback_func=print_selected, 
                                     callback_outputs=[Output('checklist-header-sample-id', 'children')], 
                                     callback_inputs=[Input('checklist-items', 'value')])


test_rd_app = ReviewDataApp(test_rd, [a_component, b_component])


In [14]:
test_rd_app.run_app(mode='external')

[<Output `header-sample-id.children`>, <Output `checklist-header-sample-id.children`>]
[<Input `dropdown-data-state.value`>, <Input `purity-manual-method-radioitems.value`>, <Input `checklist-items.value`>]
Dash app running on http://0.0.0.0:8052/


In [8]:
class AppComponent:
    
    def __init__(self, components, callback, callback_output=[], callback_input=[], callback_state=[]):
        self.component = html.Div(components)
        self.callback = callback
        self.callback_output = callback_output
        self.callback_input = callback_input
        self.callback_state = callback_state
    
class TestApp:
    def __init__(self, components=[AppComponent], host='0.0.0.0', port=8051):
        self.prop = None
        self.components = components
        self.host = host
        self.port = port
        
    def run_app(self, mode):
        app = JupyterDash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
        app.layout = self.gen_layout()
        
        for component in self.components:
            
            @app.callback(outputs=component.callback_output, 
                          inputs=component.callback_input)
            def component_callback(test):
                return component.callback
        
        app.run_server(mode=mode, host=self.host, port=self.port, debug=True) 
        
        
        
    def gen_layout(self):
        return html.Div([c.component for c in self.components])
    

In [9]:
radio_component = html.Div(
                            [
                                dbc.Label("Manual purity method"),
                                dbc.RadioItems(
                                    options=[
                                        {"label": "Keep auto call", "value": 'Keep_auto_call'},
                                        {"label": "Manual ABSOLUTE", "value": 'Manual_ABSOLUTE'},
                                        {"label": "Manual BETA", "value": 'Manual_BETA'},
                                        {"label": "Manual Other", "value": 'Manual_Other'},
                                    ],
                                    value='Keep_auto_call',
                                    id="purity-manual-method-radioitems",
                                ),
                            ]
                        )

output_component = html.H1('Data', id='header-sample-id')

# @app.callback(Output('header-sample-id', 'children'), 
#               Input('purity-manual-method-radioitems', 'value'))
def print_select(v):
    return v
    
a_component = AppComponent([radio_component, output_component], 
                           print_select, 
                           callback_output=[Output('header-sample-id', 'children')], 
                           callback_input=[Input('purity-manual-method-radioitems', 'value')])



In [10]:
test_app = TestApp([a_component])

In [11]:
test_app.run_app(mode='inline')

OSError: Address 'http://0.0.0.0:8051' already in use.
    Try passing a different port to run_server.

Exception in thread Thread-8:
Traceback (most recent call last):
  File "/usr/lib/python3.7/threading.py", line 917, in _bootstrap_inner
    self.run()
  File "/usr/lib/python3.7/threading.py", line 865, in run
    self._target(*self._args, **self._kwargs)
  File "/home/cchu/.local/lib/python3.7/site-packages/retrying.py", line 49, in wrapped_f
    return Retrying(*dargs, **dkw).call(f, *args, **kw)
  File "/home/cchu/.local/lib/python3.7/site-packages/retrying.py", line 212, in call
    raise attempt.get()
  File "/home/cchu/.local/lib/python3.7/site-packages/retrying.py", line 247, in get
    six.reraise(self.value[0], self.value[1], self.value[2])
  File "/usr/lib/python3/dist-packages/six.py", line 693, in reraise
    raise value
  File "/home/cchu/.local/lib/python3.7/site-packages/retrying.py", line 200, in call
    attempt = Attempt(fn(*args, **kwargs), attempt_number, False)
  File "/home/cchu/.local/lib/python3.7/site-packages/jupyter_dash/jupyter_app.py", line 292, in run
   