# Patient Reviewer

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from JupyterReviewer.Reviewers.PatientReviewer import PatientReviewer
from JupyterReviewer.Reviewers.PatientReviewer import collect_data
from JupyterReviewer.Data import DataAnnotation
from JupyterReviewer.ReviewDataApp import AppComponent
from JupyterReviewer.DataTypes.PatientSampleData import PatientSampleData

import pandas as pd
from dash import html
import numpy as np
from dash import dcc
import plotly.express as px
from dash.dependencies import Input, Output, State

## Edit the code below according to the commented directions

### Generate a participant and a sample file

In [8]:
samples_df, participants_df = collect_data(
    #'../../example_notebooks/example_data/example_patient_reviewer_paired_clean_input.yaml'
    '../../example_notebooks/example_data/example_patient_reviewer_input.yaml'
)

In [9]:
participants_df.set_index('participant_id', inplace=True)
maf_df = pd.read_csv(participants_df.loc['ONC7299', 'maf_fn'], sep='\t')
maf_df

Unnamed: 0,Hugo_Symbol,Entrez_Gene_Id,Center,NCBI_Build,Chromosome,Start_position,End_position,Strand,Variant_Classification,Variant_Type,...,validation_power_wex,validation_tumor_alt_count_wex,validation_tumor_ref_count_wex,validation_normal_alt_count_wex,validation_normal_ref_count_wex,discovery_tumor_alt_count_wex,discovery_tumor_ref_count_wex,discovery_normal_alt_count_wex,discovery_normal_ref_count_wex,min_val_count_wex
0,C1orf233,643988,__UNKNOWN__,__UNKNOWN__,1,1534900,1534900,__UNKNOWN__,Missense_Mutation,SNP,...,1.000000,33,211,0,824,33,211,0,824,2.0
1,ZMYM4,9202,__UNKNOWN__,__UNKNOWN__,1,35858260,35858260,__UNKNOWN__,Missense_Mutation,SNP,...,,0,158,0,135,0,158,0,135,
2,MAP7D1,55700,__UNKNOWN__,__UNKNOWN__,1,36641849,36641849,__UNKNOWN__,Silent,SNP,...,0.891223,4,195,0,398,4,195,0,398,2.0
3,SZT2,23334,__UNKNOWN__,__UNKNOWN__,1,43896017,43896017,__UNKNOWN__,Silent,SNP,...,0.891428,4,145,0,267,4,145,0,267,2.0
4,FOXD2,2306,__UNKNOWN__,__UNKNOWN__,1,47904488,47904488,__UNKNOWN__,Silent,SNP,...,1.000000,34,214,0,781,34,214,0,781,2.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
476,DCAF12L2,340578,__UNKNOWN__,__UNKNOWN__,X,125299141,125299141,__UNKNOWN__,Missense_Mutation,SNP,...,1.000000,61,309,0,617,61,309,0,617,2.0
477,HCFC1,3054,__UNKNOWN__,__UNKNOWN__,X,153219971,153219971,__UNKNOWN__,Silent,SNP,...,0.965294,6,359,0,754,6,359,0,754,2.0
478,ANKRD30A,91074,__UNKNOWN__,__UNKNOWN__,10,37506641,37506645,__UNKNOWN__,Frame_Shift_Del,DEL,...,0.990118,8,119,0,207,8,119,0,207,2.0
479,MEGF11,84465,__UNKNOWN__,__UNKNOWN__,15,66411349,66411353,__UNKNOWN__,Intron,DEL,...,0.891139,4,227,0,135,4,227,0,135,2.0


In [41]:
maf_df_copy = maf_df.copy()
maf_df_copy = maf_df_copy.sort_values(['Hugo_Symbol', 'Start_position']).dropna(axis=1)
maf_df_copy.reset_index(drop=True, inplace=True)

separated_mafs=[]
for i, sample in enumerate(maf_df_copy['Tumor_Sample_Barcode'].unique()):
    separated_mafs.append(maf_df_copy[maf_df_copy['Tumor_Sample_Barcode'] == maf_df_copy.loc[i, 'Tumor_Sample_Barcode']].reset_index())

sample_cols = []
for col in list(maf_df_copy):
    for i in range(len(separated_mafs)-1):
        if separated_mafs[i].loc[0, 'Hugo_Symbol'] != separated_mafs[i+1].loc[0, 'Hugo_Symbol']:
            
        if separated_mafs[i].loc[0, col] != separated_mafs[i+1].loc[0, col] and col not in sample_cols:
                sample_cols.append(col)


In [43]:
separated_mafs[0]

Unnamed: 0,Hugo_Symbol,Entrez_Gene_Id,Center,NCBI_Build,Chromosome,Start_position,End_position,Strand,Variant_Classification,Variant_Type,...,TCGA_8334_pon_low_alt_count,Passed_Filters,validation_tumor_alt_count_wex,validation_tumor_ref_count_wex,validation_normal_alt_count_wex,validation_normal_ref_count_wex,discovery_tumor_alt_count_wex,discovery_tumor_ref_count_wex,discovery_normal_alt_count_wex,discovery_normal_ref_count_wex
0,ABCA1,19,__UNKNOWN__,__UNKNOWN__,9,107582311,107582311,__UNKNOWN__,Silent,SNP,...,0,"IBM_TWIST_FF_and_FFPE,blat,IBM_FF_No_CLL,ffpeO...",5,262,0,268,5,262,0,268
1,ABCA12,26154,__UNKNOWN__,__UNKNOWN__,2,215891618,215891618,__UNKNOWN__,Missense_Mutation,SNP,...,0,"IBM_TWIST_FF_and_FFPE,blat,IBM_FF_No_CLL,ffpeO...",14,134,0,133,14,134,0,133
2,ABCA8,10351,__UNKNOWN__,__UNKNOWN__,17,66915465,66915465,__UNKNOWN__,Missense_Mutation,SNP,...,0,"IBM_TWIST_FF_and_FFPE,blat,IBM_FF_No_CLL,ffpeO...",13,134,0,183,13,134,0,183
3,ABCE1,6059,__UNKNOWN__,__UNKNOWN__,4,146031323,146031323,__UNKNOWN__,Silent,SNP,...,0,"IBM_TWIST_FF_and_FFPE,blat,IBM_FF_No_CLL,ffpeO...",14,124,0,168,14,124,0,168
4,ABCG5,64240,__UNKNOWN__,__UNKNOWN__,2,44065805,44065805,__UNKNOWN__,Nonsense_Mutation,SNP,...,0,"IBM_TWIST_FF_and_FFPE,blat,IBM_FF_No_CLL,ffpeO...",4,286,0,272,4,286,0,272
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
290,ZNF594,84622,__UNKNOWN__,__UNKNOWN__,17,5085451,5085451,__UNKNOWN__,Missense_Mutation,SNP,...,0,"IBM_TWIST_FF_and_FFPE,blat,IBM_FF_No_CLL,ffpeO...",8,204,0,300,8,204,0,300
291,ZNF594,84622,__UNKNOWN__,__UNKNOWN__,17,5085473,5085473,__UNKNOWN__,Silent,SNP,...,0,"IBM_TWIST_FF_and_FFPE,blat,IBM_FF_No_CLL,ffpeO...",10,197,0,290,10,197,0,290
292,ZNF608,57507,__UNKNOWN__,__UNKNOWN__,5,123983503,123983503,__UNKNOWN__,Silent,SNP,...,0,"IBM_TWIST_FF_and_FFPE,blat,IBM_FF_No_CLL,ffpeO...",39,357,0,370,39,357,0,370
293,ZNF821,55565,__UNKNOWN__,__UNKNOWN__,16,71913839,71913839,__UNKNOWN__,Missense_Mutation,SNP,...,0,"IBM_TWIST_FF_and_FFPE,blat,IBM_FF_No_CLL,ffpeO...",34,172,0,248,34,172,0,248


In [24]:
separated_mafs[1]

Unnamed: 0,Hugo_Symbol,Entrez_Gene_Id,Center,NCBI_Build,Chromosome,Start_position,End_position,Strand,Variant_Classification,Variant_Type,...,TCGA_8334_pon_low_alt_count,Passed_Filters,validation_tumor_alt_count_wex,validation_tumor_ref_count_wex,validation_normal_alt_count_wex,validation_normal_ref_count_wex,discovery_tumor_alt_count_wex,discovery_tumor_ref_count_wex,discovery_normal_alt_count_wex,discovery_normal_ref_count_wex
0,ABCA12,26154,__UNKNOWN__,__UNKNOWN__,2,215891618,215891618,__UNKNOWN__,Missense_Mutation,SNP,...,0,"IBM_TWIST_FF_and_FFPE,blat,IBM_FF_No_CLL,ffpeO...",5,82,0,133,5,82,0,133
1,ABCA8,10351,__UNKNOWN__,__UNKNOWN__,17,66915465,66915465,__UNKNOWN__,Missense_Mutation,SNP,...,0,"IBM_TWIST_FF_and_FFPE,blat,IBM_FF_No_CLL,ffpeO...",4,97,0,183,4,97,0,183
2,ABCE1,6059,__UNKNOWN__,__UNKNOWN__,4,146031323,146031323,__UNKNOWN__,Silent,SNP,...,0,"IBM_TWIST_FF_and_FFPE,blat,IBM_FF_No_CLL,ffpeO...",6,82,0,168,6,82,0,168
3,ABHD15,116236,__UNKNOWN__,__UNKNOWN__,17,27893961,27893961,__UNKNOWN__,Silent,SNP,...,0,"IBM_TWIST_FF_and_FFPE,blat,IBM_FF_No_CLL,ffpeO...",4,304,0,494,4,304,0,494
4,ABI3BP,25890,__UNKNOWN__,__UNKNOWN__,3,100548506,100548506,__UNKNOWN__,Intron,SNP,...,0,"IBM_TWIST_FF_and_FFPE,blat,IBM_FF_No_CLL,ffpeO...",9,103,0,239,9,103,0,239
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
181,ZNF594,84622,__UNKNOWN__,__UNKNOWN__,17,5085473,5085473,__UNKNOWN__,Silent,SNP,...,0,"IBM_TWIST_FF_and_FFPE,blat,IBM_FF_No_CLL,ffpeO...",7,142,0,290,7,142,0,290
182,ZNF594,84622,__UNKNOWN__,__UNKNOWN__,17,5086125,5086125,__UNKNOWN__,Missense_Mutation,SNP,...,0,"IBM_TWIST_FF_and_FFPE,blat,IBM_FF_No_CLL,ffpeO...",5,160,1,218,5,160,1,218
183,ZNF608,57507,__UNKNOWN__,__UNKNOWN__,5,123983503,123983503,__UNKNOWN__,Silent,SNP,...,0,"IBM_TWIST_FF_and_FFPE,blat,IBM_FF_No_CLL,ffpeO...",20,243,0,370,20,243,0,370
184,ZNF821,55565,__UNKNOWN__,__UNKNOWN__,16,71913839,71913839,__UNKNOWN__,Missense_Mutation,SNP,...,0,"IBM_TWIST_FF_and_FFPE,blat,IBM_FF_No_CLL,ffpeO...",33,102,0,248,33,102,0,248


In [22]:
maf_df_copy

Unnamed: 0,Hugo_Symbol,Entrez_Gene_Id,Center,NCBI_Build,Chromosome,Start_position,End_position,Strand,Variant_Classification,Variant_Type,...,TCGA_8334_pon_low_alt_count,Passed_Filters,validation_tumor_alt_count_wex,validation_tumor_ref_count_wex,validation_normal_alt_count_wex,validation_normal_ref_count_wex,discovery_tumor_alt_count_wex,discovery_tumor_ref_count_wex,discovery_normal_alt_count_wex,discovery_normal_ref_count_wex
0,ABCA1,19,__UNKNOWN__,__UNKNOWN__,9,107582311,107582311,__UNKNOWN__,Silent,SNP,...,0,"IBM_TWIST_FF_and_FFPE,blat,IBM_FF_No_CLL,ffpeO...",5,262,0,268,5,262,0,268
1,ABCA12,26154,__UNKNOWN__,__UNKNOWN__,2,215891618,215891618,__UNKNOWN__,Missense_Mutation,SNP,...,0,"IBM_TWIST_FF_and_FFPE,blat,IBM_FF_No_CLL,ffpeO...",5,82,0,133,5,82,0,133
2,ABCA12,26154,__UNKNOWN__,__UNKNOWN__,2,215891618,215891618,__UNKNOWN__,Missense_Mutation,SNP,...,0,"IBM_TWIST_FF_and_FFPE,blat,IBM_FF_No_CLL,ffpeO...",14,134,0,133,14,134,0,133
3,ABCA8,10351,__UNKNOWN__,__UNKNOWN__,17,66915465,66915465,__UNKNOWN__,Missense_Mutation,SNP,...,0,"IBM_TWIST_FF_and_FFPE,blat,IBM_FF_No_CLL,ffpeO...",4,97,0,183,4,97,0,183
4,ABCA8,10351,__UNKNOWN__,__UNKNOWN__,17,66915465,66915465,__UNKNOWN__,Missense_Mutation,SNP,...,0,"IBM_TWIST_FF_and_FFPE,blat,IBM_FF_No_CLL,ffpeO...",13,134,0,183,13,134,0,183
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
476,ZNF608,57507,__UNKNOWN__,__UNKNOWN__,5,123983503,123983503,__UNKNOWN__,Silent,SNP,...,0,"IBM_TWIST_FF_and_FFPE,blat,IBM_FF_No_CLL,ffpeO...",39,357,0,370,39,357,0,370
477,ZNF821,55565,__UNKNOWN__,__UNKNOWN__,16,71913839,71913839,__UNKNOWN__,Missense_Mutation,SNP,...,0,"IBM_TWIST_FF_and_FFPE,blat,IBM_FF_No_CLL,ffpeO...",33,102,0,248,33,102,0,248
478,ZNF821,55565,__UNKNOWN__,__UNKNOWN__,16,71913839,71913839,__UNKNOWN__,Missense_Mutation,SNP,...,0,"IBM_TWIST_FF_and_FFPE,blat,IBM_FF_No_CLL,ffpeO...",34,172,0,248,34,172,0,248
479,ZSCAN5A,79149,__UNKNOWN__,__UNKNOWN__,19,56736256,56736256,__UNKNOWN__,Missense_Mutation,SNP,...,0,"IBM_TWIST_FF_and_FFPE,blat,IBM_FF_No_CLL,ffpeO...",1,309,0,351,1,309,0,351


In [29]:
sample_cols

['Tumor_Sample_Barcode',
 't_ref_count',
 'Tumor_Sample_UUID',
 't_alt_count',
 'IBM_TWIST_FF_and_FFPE_pon_loglike',
 'IBM_TWIST_FF_and_FFPE_pon_weight',
 'reference',
 'IBM_FF_No_CLL_pon_loglike',
 'TCGA_8334_pon_loglike',
 'validation_tumor_alt_count_wex',
 'validation_tumor_ref_count_wex',
 'discovery_tumor_alt_count_wex',
 'discovery_tumor_ref_count_wex']

### Create patient reviewer object

In [4]:
reviewer = PatientReviewer()
reviewer.set_review_data(
    # Enter the file path where you would like your pkl file of your review data to be stored
    data_pkl_fn='../../PatientReviewer.pkl',
    # Breifly describe the source of your data and its purpose
    description='first review',
    # Enter the file path to your data to be reviewed
    participant_df=participants_df.set_index('participant_id'),
    sample_df=samples_df.set_index('sample_id'),
    preprocess_data_dir = '../../example_notebooks/example_data/preprocess_data'
)

reviewer.set_review_app(preprocess_data_dir = '../../example_notebooks/example_data/preprocess_data', drivers_fn='../../example_notebooks/example_data/drivers.csv')
reviewer.set_default_review_data_annotations_configuration()
reviewer.set_default_autofill()


cnv figs directory already exists: ../../example_notebooks/example_data/preprocess_data/cnv_figs


### Pass custom column color assignments into the review app with the following syntax:

In [8]:
# reviewer.set_review_app(custom_colors=[
#     [column_id_1, filter_query_1, text_color_1, background_color_1], 
#     [<same for all additional columns/queries>]
# ])

#Example (comment out reviewer.set_review_app() above and uncomment below to try):

# reviewer.set_review_app(
#     preprocess_data_dir = '../../example_notebooks/example_data/preprocess_data', 
#     drivers_fn='../../example_notebooks/example_data/drivers.csv',
#     custom_colors=[
#     ['Variant_Classification', 'Silent', 'Black', 'LightBlue'], 
#     ['Variant_Classification', 'Missense_Mutation', 'White', 'Indigo']
# ])

### Add your own components below 

In [10]:
# reviewer.app.add_component(AppComponent('component name', layout))

# Example, uncomment to try:

# def gen_vaf_plot(data: PatientSampleData, idx):
#     df = pd.read_csv(data.participant_df.loc[idx, 'maf_fn'], sep='\t')
#     return [px.histogram(
#         df,
#         x='Hugo_Symbol',
#         y=df['t_alt_count'] / (df['t_alt_count'] + df['t_ref_count']) 
#     )]

# reviewer.app.add_component(AppComponent(
#     'Variant Allele Fraction',
#     html.Div([dcc.Graph(id='vaf-plot')]),
#     callback_output=[Output('vaf-plot', 'figure')],
#     new_data_callback=gen_vaf_plot
# ))


### Add your own annotations below

In [13]:
# reviewer.app.add_review_data_annotation('annot name', ReviewDataAnnotation())

# Example, uncomment to try:

# reviewer.add_review_data_annotation(
#     'Whole Genome Doubling', 
#     DataAnnotation(
#         'string',
#         options=['Whole Genome Doubled?']
#     )
# )

# reviewer.add_review_data_annotations_app_display('Whole Genome Doubling', 'checklist')

## Run

In [5]:
reviewer.run(mode='external', port=8089)

Dash app running on http://0.0.0.0:8089/
