# Local Mutation Reviewer Example

1. Install IGV here: https://software.broadinstitute.org/software/igv/download (tested on version 2.15)
1. BEFORE running the Mutation Reviewer:
    1. Open IGV
    1. Go to "Google" and log in
    1. Go to "Google" and enter google project id. This is required for requestor pays. If the mutations are loading but the bams are not, this is likely the problem.
1. Run the notebook

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
import numpy as np
import os
from MutationReviewer.Reviewers.GeneralMutationReviewer import GeneralMutationReviewer
import dalmatian

In [3]:
data_dir = './data/'
if not os.path.isdir(data_dir):
    os.mkdir(data_dir)
    
notebook_data_dir = f'{data_dir}/general_local_mutation_reviewer_example/'
if not os.path.isdir(notebook_data_dir):
    os.mkdir(notebook_data_dir)

In [4]:
analysis_workspace = 'broad-getzlab-ibm-taml-t/Ebert-Sperling_MDS-AML-pran-4_Claudia_v1'
wm = dalmatian.WorkspaceManager(analysis_workspace)
sample_bams_df = wm.get_samples()

In [5]:
mutsig_annot_maf_fn = 'gs://fc-secure-c220c99a-d38d-4e44-ac3f-093f46579d69/submissions/57ae3382-beee-47af-a5fa-d6ce33c09cc9/mutation_mutsig2cv_hg19/92ba2984-bfd8-4e49-8f8e-644bd7cec874/call-tool_mutsig2cv_hg19/5_0002_Discovery-Participants.final_analysis_set.maf'
mutsig_annot_maf_df = pd.read_csv(mutsig_annot_maf_fn, sep='\t', encoding='iso-8859-1')
mutsig_annot_maf_df = mutsig_annot_maf_df.iloc[:100]

  mutsig_annot_maf_df = pd.read_csv(mutsig_annot_maf_fn, sep='\t', encoding='iso-8859-1')


In [21]:
data_pkl_fn = f'{notebook_data_dir}/review_data.5.pkl'
review_description = 'Test reviewer'

In [58]:
reviewer = GeneralMutationReviewer()
reviewer.set_review_data(
    data_pkl_fn=data_pkl_fn, 
    description=review_description, 
    mutations_df=mutsig_annot_maf_df,
    bams_df=sample_bams_df.reset_index(),
    mutations_df_sample_col='patient', # on patient level
    chrom_col='chr',
    start_pos_col='pos',
    bam_df_sample_col='participant', # on patient level
    bam_cols='gpdw_DNA_WES_icev1_cram_or_bam_path',
    more_mutation_groupby_cols=['Tumor_Sample_Barcode']
    # bai_col='gpdw_DNA_WES_icev1_crai_or_bai_path'
)
reviewer.set_review_app(
    mutation_table_display_cols=['Hugo_Symbol', 'chr', 'pos', 'type', 'classification', 'gnomADg_AF', 't_alt_count', 't_ref_count', 'tumor_f', 'purity'],
    bam_table_display_cols=['sample_id', 'participant', 'gpdw_DNA_WES_icev1_cram_or_bam_path', 'gpdw_DNA_WES_icev1_crai_or_bai_path'],
    bai_col='gpdw_DNA_WES_icev1_crai_or_bai_path'
)
reviewer.set_default_review_data_annotations_configuration()


The default value of numeric_only in DataFrameGroupBy.sum is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.



In [59]:
reviewer.run(port=8089)

Dash app running on http://0.0.0.0:8089/
socket initialized
Position to view: 1:12,726,028-12,726,068
('gs://fc-02e4b730-cb29-48bb-9c3b-562e075457fe/Getz_Ebert_IBM_13-583_Exomes_PDO-23428_182samples_July2021/RP-1886/Exome/MDA4021076/v3/MDA4021076.bam',)
socket closed
socket initialized
Position to view: 1:12,785,767-12,785,807
('gs://fc-02e4b730-cb29-48bb-9c3b-562e075457fe/Getz_Ebert_IBM_13-583_Exomes_9samples_PDO-23431_July2021/RP-1886/Exome/CM50781/v2/CM50781.bam', 'gs://fc-02e4b730-cb29-48bb-9c3b-562e075457fe/Getz_Ebert_IBM_13-583_Exomes_164samples_Sept2019/RP-1886/Exome/CM52752/v1/CM52752.bam')
socket closed
socket initialized
Position to view: 1:94,467,458-94,467,498
('gs://fc-02e4b730-cb29-48bb-9c3b-562e075457fe/Getz_Ebert_IBM_13-583_Exomes_PDO-23428_182samples_July2021/RP-1886/Exome/1174269-3/v3/1174269-3.bam',)
socket closed


In [11]:
reviewer.get_history()

Unnamed: 0,index,timestamp,source_data_fn,mutation_call,sequencing_tags,alignment_tags,normal_tags,tumor_tags,other_tag_description,Notes
