# Local Mutation Reviewer Example

1. Install IGV here: https://software.broadinstitute.org/software/igv/download (tested on version 2.15)
1. BEFORE running the Mutation Reviewer:
    1. Open IGV
    1. Go to "Google" and log in
    1. Go to "Google" and enter google project id. This is required for requestor pays. If the mutations are loading but the bams are not, this is likely the problem.
1. Run the notebook

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
import numpy as np
import os
from MutationReviewer.Reviewers.MutationReviewer import MutationReviewer
import dalmatian

In [3]:
data_dir = './data/'
if not os.path.isdir(data_dir):
    os.mkdir(data_dir)
    
notebook_data_dir = f'{data_dir}/local_mutation_reviewer_example/'
if not os.path.isdir(notebook_data_dir):
    os.mkdir(notebook_data_dir)

In [4]:
analysis_workspace = 'broad-getzlab-ibm-taml-t/Ebert-Sperling_MDS-AML-pran-4_Claudia_v1'
wm = dalmatian.WorkspaceManager(analysis_workspace)
sample_bams_df = wm.get_samples()

In [5]:
mutsig_annot_maf_fn = 'gs://fc-secure-c220c99a-d38d-4e44-ac3f-093f46579d69/submissions/57ae3382-beee-47af-a5fa-d6ce33c09cc9/mutation_mutsig2cv_hg19/92ba2984-bfd8-4e49-8f8e-644bd7cec874/call-tool_mutsig2cv_hg19/5_0002_Discovery-Participants.final_analysis_set.maf'
mutsig_annot_maf_df = pd.read_csv(mutsig_annot_maf_fn, sep='\t', encoding='iso-8859-1')


  mutsig_annot_maf_df = pd.read_csv(mutsig_annot_maf_fn, sep='\t', encoding='iso-8859-1')


In [6]:
data_pkl_fn = f'{notebook_data_dir}/review_data.4.pkl'
review_description = 'Test reviewer'

In [164]:
reviewer = MutationReviewer()
reviewer.set_review_data(
    data_pkl_fn=data_pkl_fn, 
    description=review_description, 
    mutations_df=mutsig_annot_maf_df,
    bams_df=sample_bams_df.reset_index(),
    mutations_df_sample_col='patient', # on patient level
    chrom_col='chr',
    start_pos_col='pos',
    bam_df_sample_col='participant', # on patient level
    bam_col='gpdw_DNA_WES_icev1_cram_or_bam_path',
    bai_col='gpdw_DNA_WES_icev1_crai_or_bai_path'
)
reviewer.set_review_app(
    mutation_table_display_cols=['Hugo_Symbol', 'chr', 'pos', 'type', 'classification', 'gnomADg_AF', 't_alt_count', 't_ref_count', 'tumor_f', 'purity'],
    bam_table_display_cols=['sample_id', 'gpdw_DNA_WES_icev1_cram_or_bam_path', 'gpdw_DNA_WES_icev1_crai_or_bai_path'],
    bai_col='gpdw_DNA_WES_icev1_crai_or_bai_path',
    genome='hg19',
    track_height=500
)

reviewer.set_default_review_data_annotations_configuration()


Loading existing data pkl file


add_review_data_annotations_app_display() is deprecated. Please use add_annotation_display_component()



In [166]:
reviewer.run(port=8088)

Dash is running on http://0.0.0.0:8088/

Dash app running on http://0.0.0.0:8088/


In [51]:
reviewer.get_history()

Unnamed: 0,index,timestamp,source_data_fn,mutation_call,sequencing_tags,Notes
0,1:865665,2022-11-28 14:44:36.972922,./data//local_mutation_reviewer_example//revie...,Somatic,,
0,1:865665,2022-11-28 14:44:44.704764,./data//local_mutation_reviewer_example//revie...,Somatic,"[Same Start and End, Short Insert Only]",


socket initialized
Position to view: 1:878,233-878,273
('gs://fc-02e4b730-cb29-48bb-9c3b-562e075457fe/Getz_Ebert_IBM_13-583_Exomes_35samples_Feb2020/RP-1886/Exome/JJ01222018BM/v2/JJ01222018BM.bam',)
socket closed
socket initialized
Position to view: 1:865,645-865,685
('gs://fc-02e4b730-cb29-48bb-9c3b-562e075457fe/Getz_Ebert_IBM_13-583_Exomes_35samples_Feb2020/RP-1886/Exome/PQ16103BM/v2/PQ16103BM.bam',)
socket closed
socket initialized
Position to view: 1:877,503-877,543
('gs://fc-02e4b730-cb29-48bb-9c3b-562e075457fe/Getz_Ebert_IBM_13-583_Exomes_31samples_Feb2020/RP-1886/Exome/CM66268/v1/CM66268.bam', 'gs://fc-02e4b730-cb29-48bb-9c3b-562e075457fe/Getz_Ebert_IBM_13-583_Exomes_164samples_Sept2019/RP-1886/Exome/CM67833/v2/CM67833.bam')
socket closed
socket initialized
Position to view: 1:865,645-865,685
('gs://fc-02e4b730-cb29-48bb-9c3b-562e075457fe/Getz_Ebert_IBM_13-583_Exomes_35samples_Feb2020/RP-1886/Exome/PQ16103BM/v2/PQ16103BM.bam',)
socket closed


In [None]:
dashbio.Igv(
            children='igv',
            id='default-igv',
            genome=genome,
            minimumBases=100,

# standalone dash component

In [13]:
%pip install dash-bio

[0mCollecting dash-bio
  Downloading dash_bio-1.0.2.tar.gz (10.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.2/10.2 MB[0m [31m5.2 MB/s[0m eta [36m0:00:00[0m00:01[0m0:01[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
Collecting scikit-learn>=0.20.1
  Downloading scikit_learn-1.2.2-cp38-cp38-macosx_10_9_x86_64.whl (9.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.0/9.0 MB[0m [31m6.4 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting colour
  Downloading colour-0.1.5-py2.py3-none-any.whl (23 kB)
Collecting GEOparse>=1.1.0
  Downloading GEOparse-2.0.3.tar.gz (278 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m278.5/278.5 kB[0m [31m7.5 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
Collecting parmed
  Downloading ParmEd-4.1.0.tar.gz (2.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.2/2.2 MB[0m [31m7.0 MB/s

In [7]:
import pandas as pd
import numpy as np
from jupyter_dash import JupyterDash
from dash import dcc
from dash import html
from dash.dependencies import Input, Output, State
from dash.exceptions import PreventUpdate
import dash
import dash_bootstrap_components as dbc
import inspect
from collections import OrderedDict
from typing import Dict

In [8]:
from MutationReviewer.Reviewers.MutationReviewerEmbeddedIGV import MutationReviewerEmbeddedIGV

In [50]:
reviewer = MutationReviewerEmbeddedIGV()
reviewer.set_review_data(
    data_pkl_fn=data_pkl_fn, 
    description=review_description, 
    mutations_df=mutsig_annot_maf_df,
    bams_df=sample_bams_df.reset_index(),
    mutations_df_sample_col='patient', # on patient level
    chrom_col='chr',
    start_pos_col='pos',
    bam_df_sample_col='participant', # on patient level
    bam_col='gpdw_DNA_WES_icev1_cram_or_bam_path',
    bai_col='gpdw_DNA_WES_icev1_crai_or_bai_path'
)
reviewer.set_review_app(
    mutation_table_display_cols=['Hugo_Symbol', 'chr', 'pos', 'type', 'classification', 'gnomADg_AF', 't_alt_count', 't_ref_count', 'tumor_f', 'purity'],
    bam_table_display_cols=['sample_id', 'participant', 'gpdw_DNA_WES_icev1_cram_or_bam_path', 'gpdw_DNA_WES_icev1_crai_or_bai_path'],
    bai_col='gpdw_DNA_WES_icev1_crai_or_bai_path',
)
reviewer.set_default_review_data_annotations_configuration()


Loading existing data pkl file


add_review_data_annotations_app_display() is deprecated. Please use add_annotation_display_component()



In [51]:
reviewer.run(port=8089)

Dash is running on http://0.0.0.0:8089/

Dash app running on http://0.0.0.0:8089/


Set up new vm

- Install NPM: https://linuxize.com/post/how-to-install-node-js-on-debian-10/
- set up git: https://docs.github.com/en/authentication/connecting-to-github-with-ssh/generating-a-new-ssh-key-and-adding-it-to-the-ssh-agent
- Clone igv.js and follow setup under developer
    - https://www.npmjs.com/package/http-server
- sudo apt install screen
    - run the server in screen
    - port 