# General Mutation Reviewer Example

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
import numpy as np
import os
from MutationReviewer.Reviewers.GeneralMutationReviewer import GeneralMutationReviewer
import dalmatian

In [3]:
data_dir = './data/'
if not os.path.isdir(data_dir):
    os.mkdir(data_dir)
    
notebook_data_dir = f'{data_dir}/general_local_mutation_reviewer_example/'
if not os.path.isdir(notebook_data_dir):
    os.mkdir(notebook_data_dir)

# 1000 genomes bams

In [4]:
from download_1000genomes_bams import download_genomes, download_vcf, format_vcf, subset_patients_vcf

## tp53 VCF

Install bcftools before downloading

In [5]:
vcf_path = '../1k_genomes/tp53.vcf'

In [7]:
download_vcf(
    vcf_path,
    onek_chr_ftp_path="https://ftp.1000genomes.ebi.ac.uk/vol1/ftp/release/20130502/ALL.chr17.phase3_shapeit2_mvncall_integrated_v5b.20130502.genotypes.vcf.gz",
    region_str="17:7571739-7590808",
)

grep: stdout: Broken pipe


In [6]:
vcf_df, patients = format_vcf(vcf_path)

In [7]:
subset_patients = patients[:5]
subset_patients

Index(['HG00096', 'HG00097', 'HG00099', 'HG00100', 'HG00101'], dtype='object')

In [8]:
reformat_patient_vcf_df = subset_patients_vcf(vcf_df, subset_patients)

In [9]:
reformat_patient_vcf_df.head()

Unnamed: 0,CHROM,POS,ID,REF,ALT,QUAL,FILTER,INFO,FORMAT,patient_id
9,17,7572101,.,C,T,100,PASS,AC=286;AF=0.0571086;AN=5008;NS=2504;DP=15895;E...,GT,HG00096
12,17,7572154,.,G,A,100,PASS,AC=2178;AF=0.434904;AN=5008;NS=2504;DP=18905;E...,GT,HG00096
37,17,7573229,.,C,T,100,PASS,AC=824;AF=0.164537;AN=5008;NS=2504;DP=17555;EA...,GT,HG00096
81,17,7574721,.,C,T,100,PASS,AC=65;AF=0.0129792;AN=5008;NS=2504;DP=17045;EA...,GT,HG00096
82,17,7574775,.,C,T,100,PASS,AC=2152;AF=0.429712;AN=5008;NS=2504;DP=15948;E...,GT,HG00096


## Load Bams for subset of patients

In [10]:
output_dir = '../1k_genomes'

In [11]:
bam_paths_fn = download_genomes(
    patient_ids=subset_patients, 
    output_dir=os.path.abspath(output_dir),
    onek_genomes_ftp="ftp.1000genomes.ebi.ac.uk",
    patient_path_str_format="/vol1/ftp/phase3/data/REPLACE/exome_alignment/REPLACE.mapped.ILLUMINA.bwa.GBR.exome.*.bam",
    region_str="17:7571739-7590808", # TP53
    replace_str='REPLACE',
)

100%|██████████████████████████████████████████████████████████████████████████████████| 5/5 [00:02<00:00,  2.25it/s]


In [12]:
bam_paths_df = pd.read_csv(f'{output_dir}/1k_genomes_bam_paths.txt', sep='\t', index_col=0)
bam_paths_df.index.name = 'patient_id'
bam_paths_df = bam_paths_df.reset_index()

In [13]:
bam_paths_df.local_bam_path.tolist()

['/Users/cchu/Desktop/Methods/MutationReviewer/1k_genomes/HG00096.17_7571739_7590808.bam',
 '/Users/cchu/Desktop/Methods/MutationReviewer/1k_genomes/HG00097.17_7571739_7590808.bam',
 '/Users/cchu/Desktop/Methods/MutationReviewer/1k_genomes/HG00099.17_7571739_7590808.bam',
 '/Users/cchu/Desktop/Methods/MutationReviewer/1k_genomes/HG00100.17_7571739_7590808.bam',
 '/Users/cchu/Desktop/Methods/MutationReviewer/1k_genomes/HG00101.17_7571739_7590808.bam']

In [14]:
bam_paths_df.original_ftp_path_bai.tolist()

['https://ftp.1000genomes.ebi.ac.uk//vol1/ftp/phase3/data/HG00096/exome_alignment/HG00096.mapped.ILLUMINA.bwa.GBR.exome.20120522.bam.bai',
 'https://ftp.1000genomes.ebi.ac.uk//vol1/ftp/phase3/data/HG00097/exome_alignment/HG00097.mapped.ILLUMINA.bwa.GBR.exome.20130415.bam.bai',
 'https://ftp.1000genomes.ebi.ac.uk//vol1/ftp/phase3/data/HG00099/exome_alignment/HG00099.mapped.ILLUMINA.bwa.GBR.exome.20130415.bam.bai',
 'https://ftp.1000genomes.ebi.ac.uk//vol1/ftp/phase3/data/HG00100/exome_alignment/HG00100.mapped.ILLUMINA.bwa.GBR.exome.20121211.bam.bai',
 'https://ftp.1000genomes.ebi.ac.uk//vol1/ftp/phase3/data/HG00101/exome_alignment/HG00101.mapped.ILLUMINA.bwa.GBR.exome.20121211.bam.bai']

# Mutation review with local bam paths

We have provided local mini bams covering only TP53 from the 1000Genomes project for a select number of patients so you can quickly get a reviewer running. **Only your local IGV desktop app will take local file paths.** IGV_JS (display within the dashboard itself) will NOT take local file paths.

## Local IGV run on local paths

1. Install IGV here: https://software.broadinstitute.org/software/igv/download (tested on version 2.17)
1. Open IGV > View > Preferences > Advanced > Check `Enable port`
1. Run `reviewer.run(...)`

In [15]:
local_data_path = f'{notebook_data_dir}/1k_genomes.TP53.review_data.local'
local_review_description = 'Test reviewer with local bam paths'

In [16]:
local_reviewer = GeneralMutationReviewer()
local_reviewer.set_review_data(
    data_path=local_data_path, 
    description=local_review_description,     
    mutations_df=reformat_patient_vcf_df,
    mutation_groupby_cols=['CHROM', 'POS'], # columns to groupby
    mutations_df_bam_ref_col='patient_id', 
    chrom_cols='CHROM', # if a list, must be same length as start_pos_cols
    pos_cols='POS',
    bams_df=bam_paths_df,
    bams_df_ref_col='patient_id',
    bam_cols='local_bam_path',
    bai_cols='local_bai_path',
)
local_reviewer.set_default_review_data_annotations_configuration()

In [17]:
local_reviewer.set_review_app(
    mutation_table_display_cols=['CHROM', 'POS', 'REF', 'ALT', 'QUAL', 'patient_id'],
    bam_table_display_cols=[],
    igv_mode='igv_local', # <--
)

In [18]:
local_reviewer.run(port=8095, collapsable=False)

Setting auto_export_path to ./data//general_local_mutation_reviewer_example//1k_genomes.TP53.review_data.local/data.auto_export
Using ./data//general_local_mutation_reviewer_example//1k_genomes.TP53.review_data.local/data.auto_export for auto exporting.
Dash app running on http://0.0.0.0:8095/



You are in test mode. Your data will not be saved.



Snapshots are available in /Users/cchu/Desktop/Methods/MutationReviewer/example_notebooks/igv_snapshots
Snapshots are available in /Users/cchu/Desktop/Methods/MutationReviewer/example_notebooks/igv_snapshots
Snapshots are available in /Users/cchu/Desktop/Methods/MutationReviewer/example_notebooks/igv_snapshots


In [26]:
local_reviewer.get_annot().query('mutation_call != ""')


Data is not frozen. Annotations will not be saved. Please freeze data in the dashboard to save annotations.



Unnamed: 0,mutation_call,sequencing_tags,alignment_tags,normal_tags,tumor_tags,other_tag_description,Notes
17:7578679,Germline,,,,,,


# Mutation review with urls

Below we reference the columns in the bam table that contain the original FTP paths. These can also be gsurls.

```
bam_cols='original_ftp_path'
bai_cols='original_ftp_path_bai'
```

With FTP we are loading the full bam, so rendering IGV may take longer compared to loading local paths.


In [27]:
url_data_path = f'{notebook_data_dir}/1k_genomes.TP53.review_data.use_urls'
url_review_description = 'Test reviewer with FTP bam paths'

In [28]:
reviewer = GeneralMutationReviewer()
reviewer.set_review_data(
    data_path=url_data_path, 
    description=url_review_description,     
    mutations_df=reformat_patient_vcf_df,
    mutation_groupby_cols=['CHROM', 'POS'], # columns to groupby
    mutations_df_bam_ref_col='patient_id', 
    chrom_cols='CHROM', # if a list, must be same length as start_pos_cols
    pos_cols='POS',
    bams_df=bam_paths_df,
    bams_df_ref_col='patient_id',
    bam_cols='original_ftp_path',
    bai_cols='original_ftp_path_bai',
)
reviewer.set_default_review_data_annotations_configuration()

## IGV.js Run

No additional setup needed. IGV will be rendered inside the notebook.

**NOTE:** IGV.js expects files to come from a server (i.e. a gsurl, http://, etc.). It will not read local paths.

In [29]:
reviewer.set_review_app(
    mutation_table_display_cols=['CHROM', 'POS', 'REF', 'ALT', 'QUAL', 'patient_id'],
    bam_table_display_cols=[],
    igv_mode='igv_js',
)

In [31]:
reviewer.run(port=8094, collapsable=False)

Setting auto_export_path to ./data//general_local_mutation_reviewer_example//1k_genomes.TP53.review_data.use_urls/data.auto_export
Using ./data//general_local_mutation_reviewer_example//1k_genomes.TP53.review_data.use_urls/data.auto_export for auto exporting.
Dash app running on http://0.0.0.0:8094/



You are in test mode. Your data will not be saved.



## Desktop/local IGV run

Your desktop IGV can also take urls.


If you are using google buckets, BEFORE attempting to load bams through the reviewer:
1. Open IGV
1. Go to "Google" and log in
1. Go to "Google" and enter google project id. This is required for requestor pays. If the mutations are loading but the bams are not, this is likely the problem.
1. Run `reviewer.run(...)`

In [32]:
reviewer.set_review_app(
    mutation_table_display_cols='CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	patient_id'.split(),
    bam_table_display_cols=[],
    igv_mode='igv_local',
)

In [33]:
reviewer.run(port=8094)

Setting auto_export_path to ./data//general_local_mutation_reviewer_example//1k_genomes.TP53.review_data.use_urls/data.auto_export
Using ./data//general_local_mutation_reviewer_example//1k_genomes.TP53.review_data.use_urls/data.auto_export for auto exporting.



You are in test mode. Your data will not be saved.



Dash app running on http://0.0.0.0:8094/
Snapshots are available in /Users/cchu/Desktop/Methods/MutationReviewer/example_notebooks/igv_snapshots


# Both options

IGV.js still lacks some features that you have in local IGV (i.e. blat). You can include both components in the dash board and use whichever one is appropriate for your review


In [34]:
reviewer.set_review_app(
    mutation_table_display_cols='CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	patient_id'.split(),
    bam_table_display_cols=[],
    igv_mode='both',
)

In [35]:
reviewer.run(port=8094)

Setting auto_export_path to ./data//general_local_mutation_reviewer_example//1k_genomes.TP53.review_data.use_urls/data.auto_export
Using ./data//general_local_mutation_reviewer_example//1k_genomes.TP53.review_data.use_urls/data.auto_export for auto exporting.
Dash app running on http://0.0.0.0:8094/



You are in test mode. Your data will not be saved.



# Local IGV run on a VM

## Set up VM

1. Install VNC server (on VM and local)
1. Start VNC server
1. Port forward

## Install IGV

1. In the VNC browser go to IGV download page
1. Unzip file
1. In terminal run the sh file to open IGV
1. If reading bams from google buckets, go to the "google" tab at the top and log in. Do this BEFORE starting your mutation reviewer
1. Start reviewing!