# Purity Reviewer Example

In [1]:
%load_ext autoreload
%autoreload 2

In [None]:
from PurityReviewer.Reviewers.MatchedPurityReviewer import MatchedPurityReviewer
from PurityReviewer.Reviewers.ManualPurityReviewer import ManualPurityReviewer
from PurityReviewer.AppComponents.utils import download_rdata
import pandas as pd
import numpy as np
import os

# Run with simuated tumor data
## Setup

Clone the `SimulatedTumorData` repo and install in your environment.
```
cd <path to put SimulatedTumorData repo>
git clone https://github.com/getzlab/SimulatedTumorData

conda activate <your env>
pip install -e <path to put SimulatedTumorData repo>
```

This only needs to be done once.

## Load simulated data

In [None]:
from SimulatedTumorData.src.generate_simulated_data import load_patients_and_samples

In [None]:
samples, participants = load_patients_and_samples(path_to_sim_data="SimulatedTumorData/sim_data")

## Pass in data

In [None]:
from PurityReviewer.AppComponents.utils import parse_absolute_soln, CSIZE_DEFAULT
from PurityReviewer.AppComponents.utils import parse_absolute_soln_simulatedTumorData

In [None]:
data_path = 'simulated_data_purity_review'

In [None]:
matched_reviewer = MatchedPurityReviewer()
matched_reviewer.set_review_data(
    data_path=data_path, 
    description='Matched purity reviewer for simulated data', 
    df=samples,
    index=samples.index, 
)
matched_reviewer.set_review_app(
    sample_info_cols=['ABSOLUTE_plot_fn', 'wxs_purity', 'wxs_ploidy'],
    acs_col='cnv_seg_fn', 
    maf_col='maf_fn',
    rdata_fn_col='ABSOLUTE_RData',
    mut_fig_hover_data=['Hugo_Symbol', 'Chromosome', 'Start_position'],
    csize=CSIZE_DEFAULT,
    custom_parse_absolute_soln=parse_absolute_soln_simulatedTumorData # <-- update with my_custom_parse_absolute_soln()
)

matched_reviewer.set_default_review_data_annotations_configuration()
matched_reviewer.set_default_autofill()

## Run the reviewer

In [None]:
matched_reviewer.run(port=8099, mode='tab', collapsable=False, hide_history_df_cols=['source_data_fn'])

In [None]:
matched_reviewer.get_annot()

# Purity Review from CGA characterization pipeline Terra workspace

## Load data

In [10]:
import dalmatian

```
import os
os.environ["GCLOUD_PROJECT"] = <google project>

wm = dalmatian.WorkspaceManager(<namespace>/<workspace_name>)
pairs_df = wm.get_pairs()

# Download rdata from ABSOLUTE locally
rdata_dir = <directory to save rData>
downloaded_rdata_s = download_rdata(pairs_df['absolute_rdata'], rdata_dir=rdata_dir)
downloaded_rdata_s.name = 'local_absolute_rdata'
pairs_df = pd.concat([pairs_df, downloaded_rdata_s], axis=1)
```

In [None]:
workspace = 'broad-tcga-wgs-terra/TCGA_BRCA_WGS'
wm = dalmatian.WorkspaceManager(workspace)
wm_pairs_df = wm.get_pairs()
wm_pairs_df = wm_pairs_df[wm_pairs_df['absolute_rdata_WGS'].notna()].set_index('tumor_submitter_id')
display(wm_pairs_df)

Unnamed: 0_level_0,participant,tumor_type_code,normal_type_code,case_sample,control_sample,normal_submitter_id,tumor_submitter_id,tumor_type,normal_type,T_picard_alignment_summary_hist_WGS,...,T_mismatch_rate_WGS,recapseg_tn_coverage_WGS,CrossCheck_min_LOD_value,N_mean_coverage_WGS,T_median_insert_size_WGS,N_pct_chimeras_WGS,CrossCheck_min_LOD_lanes,T_mean_coverage_WGS,alleliccapseg_skew_WGS,N_mean_read_length_WGS
pair_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0f6ed829-a59b-48b7-a2ca-3aa9db40e4c2_4c882f6b-fd94-43b7-a6e0-2ef467da9884,27c1c094-690b-4973-8900-a797ad88f98c,TP,NB,0f6ed829-a59b-48b7-a2ca-3aa9db40e4c2,4c882f6b-fd94-43b7-a6e0-2ef467da9884,TCGA-D8-A27T-10A,TCGA-D8-A27T-01A,Primary Tumor,Blood Derived Normal,gs://fc-secure-df24feb9-b120-454d-98f1-2ac4880...,...,,,,,,,,,,
317b72a8-0332-4db8-b9d1-9a26d9907a24_ffa2ca7e-93d7-452d-870e-c7c36c032e4f,ff0f875f-113a-4c0f-8c0a-4259cd496e6d,TP,NB,317b72a8-0332-4db8-b9d1-9a26d9907a24,ffa2ca7e-93d7-452d-870e-c7c36c032e4f,TCGA-C8-A26W-10A,TCGA-C8-A26W-01A,Primary Tumor,Blood Derived Normal,gs://fc-secure-df24feb9-b120-454d-98f1-2ac4880...,...,0.004736,gs://fc-secure-df24feb9-b120-454d-98f1-2ac4880...,70.145458,39.437779,406.0,0.034528,TCGA-C8-A26W-01A-11D-A89A-36.WholeGenome.RP-16...,71.227847,0.9989297,151.0
4584c7e2-8011-4231-a5e6-26a264f6d1b1_015965ba-53b4-4b89-bae3-5dc351212d9f,9ced0c70-c46c-4f98-8f48-840defe4b8b1,TP,NB,4584c7e2-8011-4231-a5e6-26a264f6d1b1,015965ba-53b4-4b89-bae3-5dc351212d9f,TCGA-D8-A1JA-10A,TCGA-D8-A1JA-01A,Primary Tumor,Blood Derived Normal,gs://fc-secure-df24feb9-b120-454d-98f1-2ac4880...,...,0.004527,gs://fc-secure-df24feb9-b120-454d-98f1-2ac4880...,82.564876,35.604842,387.0,0.042127,TCGA-D8-A1JA-01A-11D-A897-36.WholeGenome.RP-16...,88.668252,0.9993195,151.0
45c79141-a125-4305-b1b7-c838de1af8a3_574293a5-ec7b-479d-af12-27b5e1aaeca1,214a4507-d974-4b3e-8525-7408fccc6a0f,TP,NB,45c79141-a125-4305-b1b7-c838de1af8a3,574293a5-ec7b-479d-af12-27b5e1aaeca1,TCGA-BH-A0B7-10A,TCGA-BH-A0B7-01A,Primary Tumor,Blood Derived Normal,gs://fc-secure-df24feb9-b120-454d-98f1-2ac4880...,...,,,,,,,,,,
4874e4e2-c067-490d-9d99-8d3ca194b0a4_de02562d-cbcf-434e-81a3-2eb7f09a3747,dd0f8e5a-383d-4f03-bb71-0e115d4e442c,TP,NB,4874e4e2-c067-490d-9d99-8d3ca194b0a4,de02562d-cbcf-434e-81a3-2eb7f09a3747,TCGA-C8-A12M-10A,TCGA-C8-A12M-01A,Primary Tumor,Blood Derived Normal,gs://fc-secure-df24feb9-b120-454d-98f1-2ac4880...,...,,,,,,,,,,
4c54a99b-a820-4602-9b39-deaca98034a5_0ab0bff3-9c3d-466b-91c2-5c95613a5073,68177e92-4784-45d9-af14-4ce0c00eae74,TP,NB,4c54a99b-a820-4602-9b39-deaca98034a5,0ab0bff3-9c3d-466b-91c2-5c95613a5073,TCGA-A8-A08J-10B,TCGA-A8-A08J-01A,Primary Tumor,Blood Derived Normal,gs://fc-secure-df24feb9-b120-454d-98f1-2ac4880...,...,,,,,,,,,,
4f69c5f3-4e8c-4b71-a5fe-3179412c17e6_a91eabd7-bfbc-43f4-ae3a-46800dedce29,d5be2ab3-f748-480d-954b-385cf01b4c81,TP,NB,4f69c5f3-4e8c-4b71-a5fe-3179412c17e6,a91eabd7-bfbc-43f4-ae3a-46800dedce29,TCGA-D8-A27N-10A,TCGA-D8-A27N-01A,Primary Tumor,Blood Derived Normal,gs://fc-secure-df24feb9-b120-454d-98f1-2ac4880...,...,0.005474,gs://fc-secure-df24feb9-b120-454d-98f1-2ac4880...,90.823794,34.502734,406.0,0.037964,TCGA-D8-A27N-01A-11D-A89A-36.WholeGenome.RP-16...,74.300717,0.9986494,151.0
58bfe278-a80f-4722-b286-4d966214d244_b41594a0-5892-4ae6-a354-ddf09a7772a0,e4fc0909-f284-4471-866d-d8967b6adcbc,TP,NB,58bfe278-a80f-4722-b286-4d966214d244,b41594a0-5892-4ae6-a354-ddf09a7772a0,TCGA-E2-A14P-10A,TCGA-E2-A14P-01A,Primary Tumor,Blood Derived Normal,gs://fc-secure-df24feb9-b120-454d-98f1-2ac4880...,...,,,,,,,,,,
5de50617-0a03-45b7-8ffd-bcf3a0680d6b_aaca1df7-d409-4e6e-b592-f035409fcb27,89e867de-d30e-4bfc-a453-0a76309dabd5,TP,NB,5de50617-0a03-45b7-8ffd-bcf3a0680d6b,aaca1df7-d409-4e6e-b592-f035409fcb27,TCGA-C8-A132-10A,TCGA-C8-A132-01A,Primary Tumor,Blood Derived Normal,gs://fc-secure-df24feb9-b120-454d-98f1-2ac4880...,...,,,,,,,,,,
660954c6-51ed-4b74-8eb0-65c282da3971_580dc219-de38-4381-ba33-5b24bb1c90ec,2585edbd-3488-4100-a4ab-a161252a31ee,TP,NB,660954c6-51ed-4b74-8eb0-65c282da3971,580dc219-de38-4381-ba33-5b24bb1c90ec,TCGA-D8-A140-10A,TCGA-D8-A140-01A,Primary Tumor,Blood Derived Normal,gs://fc-secure-df24feb9-b120-454d-98f1-2ac4880...,...,,,,,,,,,,


In [None]:
# download rdata locally
local_rdata_dir = 'local_rdata'
wm_pairs_df['local_absolute_rdata'] = download_rdata(wm_pairs_df['absolute_rdata_WGS'], rdata_dir=local_rdata_dir)

In [None]:
wm_pairs_df['local_absolute_rdata'].iloc[0]

Unnamed: 0_level_0,PCA__ABSOLUTE__Cancer_DNA_fraction,PCA__ABSOLUTE__Coverage_for_80_pct_power,PCA__ABSOLUTE__Genome_doublings,PCA__ABSOLUTE__Subclonal_genome_fraction,PCA__ABSOLUTE__call_status,PCA__ABSOLUTE__ploidy,PCA__ABSOLUTE__purity,PCA__ABSOLUTE__sample,PCA__ABSOLUTE__solution,WGS__BWAwithMarkDuplicatesandBQSR__aligned_reads__bai__drs_url,WGS__BWAwithMarkDuplicatesandBQSR__aligned_reads__bam__average_read_length,WGS__BWAwithMarkDuplicatesandBQSR__aligned_reads__bam__drs_url,WGS__BWAwithMarkDuplicatesandBQSR__aligned_reads__bam__gdc_release,WGS__BWAwithMarkDuplicatesandBQSR__aligned_reads__bam__mean_coverage,participant,sample_type,sample_type_code,submitter_id,tissue_type
sample_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
000002fc-53a0-420e-b2aa-a40a358bba37,,,,,,,,,,drs://dg.4DFC:d7fcc34b-163a-4df0-9a8c-3cc4e97f...,151,drs://dg.4DFC:72922f34-f7bd-4d66-b099-b25cfbb3...,39,37.374038,271e8024-a008-4bf0-9f6e-177f60096b1b,Blood Derived Normal,NB,TCGA-E2-A15G-10A,Normal
0039a298-3369-4da5-9bfd-19792e9632be,0.73,11.0,0.0,0.14,called,2.01,0.73,TCGA-OL-A5RY-01A-21D-A28A-01,new,drs://dg.4DFC:c56ff381-9b9a-4d82-8547-1acf6d68...,151,drs://dg.4DFC:81f41338-0539-45d4-a656-245ee983...,39,87.247114,8360cec6-daf6-41c9-9a4f-7fc03c958dcc,Primary Tumor,TP,TCGA-OL-A5RY-01A,Tumor
003c8cff-ec0e-4f7c-b38b-93575fd68a23,,,,,,,,,,drs://dg.4DFC:ec3ff1b7-ecc9-439e-b716-18b9294e...,151,drs://dg.4DFC:ca29a575-d626-49e6-8519-c74947ba...,39,32.350460,4e6edfe6-adcb-4c12-8ff4-38a79f5887e8,Blood Derived Normal,NB,TCGA-B6-A0X5-10A,Normal
0050d7c9-ece9-4b6c-8023-1ff2efcb3c9c,0.73,17.0,1.0,0.00,called,3.05,0.64,TCGA-3C-AALI-01A-11D-A41E-01,new,drs://dg.4DFC:39da1845-c58a-4b0c-abc0-3dfa06d4...,151,drs://dg.4DFC:3b30c210-c8e8-49e2-a756-3fac591d...,39,95.615588,55262fcb-1b01-4480-b322-36570430c917,Primary Tumor,TP,TCGA-3C-AALI-01A,Tumor
008dafdd-a3d1-4801-8c0a-8714e2b58e48,0.50,16.0,0.0,0.02,called,1.95,0.51,TCGA-AO-A0JJ-01A-11D-A059-01,new,drs://dg.4DFC:22929a72-0afa-4967-87fc-8461e09f...,151,drs://dg.4DFC:c7125f93-1afd-449e-bbfe-c9c2abd9...,39,93.925069,bef7b135-a727-45e8-850a-cc4cd56c49aa,Primary Tumor,TP,TCGA-AO-A0JJ-01A,Tumor
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
fed3a891-21be-4196-b9ef-0bf7c8f976aa,0.64,11.0,0.0,0.45,called,1.80,0.67,TCGA-GM-A3XL-01A-11D-A22W-01,new,drs://dg.4DFC:469e6cf6-5f73-4469-bcdf-1d16307b...,151,drs://dg.4DFC:3c3e329e-b43d-4068-b70d-13cef90d...,39,92.007449,65cac997-4d39-4501-85ec-4fcb328a8eb5,Primary Tumor,TP,TCGA-GM-A3XL-01A,Tumor
fef9fd2b-5bab-43eb-aa47-1e93adf29099,,,,,,,,,,drs://dg.4DFC:b653d0fd-8363-4ac8-9305-4df35f09...,151,drs://dg.4DFC:d9bd1bf3-37c4-4ee8-a4f1-0e74220f...,39,36.809534,807b62ed-d3f7-4211-83be-754386ff2c96,Blood Derived Normal,NB,TCGA-E9-A3HO-10A,Normal
ff8c73fc-188d-4e36-867c-afdb6fa7b1d8,,,,,,,,,,drs://dg.4DFC:2e632563-7540-42ac-a5bd-8f2c64fa...,151,drs://dg.4DFC:7c71a45b-ab32-4944-b3ce-bdb36c17...,39,40.638454,a6e11b30-3ae8-4dd1-b04c-a730c6a79746,Blood Derived Normal,NB,TCGA-AR-A1AH-10A,Normal
ffa2ca7e-93d7-452d-870e-c7c36c032e4f,,,,,,,,,,drs://dg.4DFC:3f05e1f0-8ee7-4dc5-a42a-33cedc86...,151,drs://dg.4DFC:85164612-8c49-44a1-9493-3738c384...,39,42.442300,ff0f875f-113a-4c0f-8c0a-4259cd496e6d,Blood Derived Normal,NB,TCGA-C8-A26W-10A,Normal


## Pass in data

In [18]:
from PurityReviewer.AppComponents.utils import CSIZE_DEFAULT
sex_chr_map = {'23': 'X', '24': 'Y'}
rename_chroms = {x: sex_chr_map[x] if x in sex_chr_map.keys() else x for x in CSIZE_DEFAULT.keys()}
custom_csize = {f'chr{rename_chroms[chrom]}': length for chrom, length in CSIZE_DEFAULT.items()}
custom_csize

{'chr1': 249250621,
 'chr2': 243199373,
 'chr3': 198022430,
 'chr4': 191154276,
 'chr5': 180915260,
 'chr6': 171115067,
 'chr7': 159138663,
 'chr8': 146364022,
 'chr9': 141213431,
 'chr10': 135534747,
 'chr11': 135006516,
 'chr12': 133851895,
 'chr13': 115169878,
 'chr14': 107349540,
 'chr15': 102531392,
 'chr16': 90354753,
 'chr17': 81195210,
 'chr18': 78077248,
 'chr19': 59128983,
 'chr20': 63025520,
 'chr21': 48129895,
 'chr22': 51304566,
 'chrX': 156040895,
 'chrY': 57227415}

In [19]:
from rpy2.robjects import r, pandas2ri
import rpy2.robjects as robjects

def my_custom_parse_absolute_soln(rdata_path: str): # has to be a local path   
    absolute_rdata_cols = ['alpha', 'tau', 'tau_hat', '0_line', '1_line',
                       'sigma_H', 
                       'theta_Q', 
                       'lambda',  
                       'SCNA_likelihood', 
                       'Kar_likelihood', 
                       'SSNVs_likelihood']
    pandas2ri.activate()
    print('New version!')
    r_list_vector = robjects.r['load'](rdata_path)
    r_list_vector = robjects.r[r_list_vector[0]]
    # r_data_id = r_list_vector.names[0]
    # rdata_tables = r_list_vector.rx2(str(r_data_id))
    
    # mode_res = rdata_tables.rx2('mode.res')
    # mode_tab = mode_res.rx2('mode.tab')
    mode_res = r_list_vector.rx2('mode.res')
    mode_tab = r_list_vector.rx2('mode.tab') # or segtab?
    mod_tab_df = pd.DataFrame(columns=absolute_rdata_cols)
    mod_tab_df['alpha'] = mode_tab[:, 0]
    mod_tab_df['tau'] = mode_tab[:, 1]
    mod_tab_df['tau_hat'] = mode_tab[:, 7]
    mod_tab_df['0_line'] = mode_tab[:, 3]
    mod_tab_df['step_size'] = mode_tab[:, 4] * 2
    mod_tab_df['1_line'] = mod_tab_df['step_size'] + mod_tab_df['0_line']
    mod_tab_df['sigma_H'] = mode_tab[:, 8]
    mod_tab_df['theta_Q'] = mode_tab[:, 11]
    mod_tab_df['lambda'] = mode_tab[:, 12]
    mod_tab_df['SCNA_likelihood'] = mode_tab[:, 15]
    mod_tab_df['Kar_likelihood'] = mode_tab[:, 17]
    mod_tab_df['SSNVs_likelihood'] = mode_tab[:, 20]

    return mod_tab_df

In [None]:
matched_reviewer = MatchedPurityReviewer()
matched_reviewer.set_review_data(
    data_path = 'matched_purity_reviwer_output', 
    description= 'BRCA purity review', 
    df=wm_pairs_df,
    index=wm_pairs_df.index,
)

matched_reviewer.set_review_app(
    sample_info_cols=['absolute_highres_plot_WGS', 'hapaseg_allelic_segmentation_plot_WGS'],
    acs_col='hapaseg_segfile_WGS', 
    maf_col='absolute_downsample_MAF_WGS',
    rdata_fn_col='local_absolute_rdata',
    mut_fig_hover_data=['Hugo_Symbol', 'Chromosome', 'Start_position'],
    csize=CSIZE_DEFAULT,
    # custom_parse_absolute_soln=my_custom_parse_absolute_soln
)

matched_reviewer.set_default_review_data_annotations_configuration()
matched_reviewer.set_default_autofill()

## Run the reviewer

In [None]:
matched_reviewer.run(port=8099, mode='tab')