# Purity Reviewer Example

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import sys

sys.path.append(os.path.abspath( os.path.join("..") ))

from PurityReviewer.Reviewers.MatchedPurityReviewer import MatchedPurityReviewer
from PurityReviewer.Reviewers.ManualPurityReviewer import ManualPurityReviewer
from PurityReviewer.Reviewers.PrecalledPurityReviewer import PrecalledPurityReviewer
from PurityReviewer.AppComponents.utils import download_rdata
import pandas as pd
import numpy as np
import os

# Precalled Purity Review from TCGA characterization pipeline Terra workspace

```
import os
os.environ["GCLOUD_PROJECT"] = <google project>
```

## Load data

In [3]:
import dalmatian

In [4]:
workspace = 'broad-tcga-wgs-terra/TCGA_BRCA_WGS'
wm = dalmatian.WorkspaceManager(workspace)
wm_pairs_df = wm.get_pairs()
wm_samples_df = wm.get_samples()

wm_pairs_df = wm_pairs_df[wm_pairs_df['absolute_rdata_WGS'].notna()] #.set_index('tumor_submitter_id')
# display(wm_pairs_df)

In [5]:
# download rdata locally
local_rdata_dir = 'local_rdata'
wm_pairs_df['local_absolute_rdata'] = download_rdata(wm_pairs_df['absolute_rdata_WGS'], rdata_dir=local_rdata_dir)

## Pass in data

In [6]:
from PurityReviewer.AppComponents.utils import CSIZE_DEFAULT
sex_chr_map = {'23': 'X', '24': 'Y'}
rename_chroms = {x: sex_chr_map[x] if x in sex_chr_map.keys() else x for x in CSIZE_DEFAULT.keys()}
custom_csize = {f'chr{rename_chroms[chrom]}': length for chrom, length in CSIZE_DEFAULT.items()}
custom_csize

{'chr1': 249250621,
 'chr2': 243199373,
 'chr3': 198022430,
 'chr4': 191154276,
 'chr5': 180915260,
 'chr6': 171115067,
 'chr7': 159138663,
 'chr8': 146364022,
 'chr9': 141213431,
 'chr10': 135534747,
 'chr11': 135006516,
 'chr12': 133851895,
 'chr13': 115169878,
 'chr14': 107349540,
 'chr15': 102531392,
 'chr16': 90354753,
 'chr17': 81195210,
 'chr18': 78077248,
 'chr19': 59128983,
 'chr20': 63025520,
 'chr21': 48129895,
 'chr22': 51304566,
 'chrX': 156040895,
 'chrY': 57227415}

In [7]:
from rpy2.robjects import r, pandas2ri
import rpy2.robjects as robjects

def my_custom_parse_absolute_soln(rdata_path: str): # has to be a local path   
    absolute_rdata_cols = ['alpha', 'tau', 'tau_hat', '0_line', '1_line',
                       'sigma_H', 
                       'theta_Q', 
                       'lambda',  
                       'SCNA_likelihood', 
                       'Kar_likelihood', 
                       'SSNVs_likelihood']
    pandas2ri.activate()
    print('New version!')
    r_list_vector = robjects.r['load'](rdata_path)
    r_list_vector = robjects.r[r_list_vector[0]]
    
    mode_res = r_list_vector.rx2('mode.res')
    mode_tab = r_list_vector.rx2('mode.tab') # or segtab?
    mod_tab_df = pd.DataFrame(columns=absolute_rdata_cols)
    mod_tab_df['alpha'] = mode_tab[:, 0]
    mod_tab_df['tau'] = mode_tab[:, 1]
    mod_tab_df['tau_hat'] = mode_tab[:, 7]
    mod_tab_df['0_line'] = mode_tab[:, 3]
    mod_tab_df['step_size'] = mode_tab[:, 4] * 2
    mod_tab_df['1_line'] = mod_tab_df['step_size'] + mod_tab_df['0_line']
    mod_tab_df['sigma_H'] = mode_tab[:, 8]
    mod_tab_df['theta_Q'] = mode_tab[:, 11]
    mod_tab_df['lambda'] = mode_tab[:, 12]
    mod_tab_df['SCNA_likelihood'] = mode_tab[:, 15]
    mod_tab_df['Kar_likelihood'] = mode_tab[:, 17]
    mod_tab_df['SSNVs_likelihood'] = mode_tab[:, 20]

    return mod_tab_df

In [8]:
from PurityReviewer.AppComponents.utils import add_precalled_purities_to_pairs

wm_pairs_df = add_precalled_purities_to_pairs(wm_pairs_df, wm_samples_df)
wm_pairs_df = wm_pairs_df.set_index('tumor_submitter_id')

In [9]:
precalled_purity_reviewer = PrecalledPurityReviewer()
precalled_purity_reviewer.set_review_data(
    data_path = 'precalled_purity_reviewer_output', 
    description= 'BRCA purity review', 
    df=wm_pairs_df,
    index=wm_pairs_df.index,
)


# remove the purity_reviewer_example notebook
# make sure the matched_purity_reviewer_example notebook looks exactly like the purity_reviewer_example notebook on the main branch
# change the index to the tumor submitter id
precalled_purity_reviewer.set_review_app(
    sample_info_cols=['absolute_highres_plot_WGS', 'hapaseg_allelic_segmentation_plot_WGS'],
    acs_col='hapaseg_segfile_WGS', 
    rdata_fn_col='local_absolute_rdata',
    mut_fig_hover_data=['Hugo_Symbol', 'Chromosome', 'Start_position'],
)

precalled_purity_reviewer.set_default_review_data_annotations_configuration()
precalled_purity_reviewer.set_default_autofill()
# if you have a pickle error restart kernel and rerun all the cells

In [10]:
precalled_purity_reviewer.run(port=8099, mode='tab')

Setting auto_export_path to precalled_purity_reviewer_output/data.auto_export
Using precalled_purity_reviewer_output/data.auto_export for auto exporting.
Dash app running on http://0.0.0.0:8099/



You are in test mode. Your data will not be saved.



<IPython.core.display.Javascript object>