# Purity Reviewer Example

In [None]:
%load_ext autoreload
%autoreload 2
    

In [None]:
from PurityReviewer.Reviewers.MatchedPurityReviewer import MatchedPurityReviewer
from PurityReviewer.Reviewers.ManualPurityReviewer import ManualPurityReviewer
from PurityReviewer.AppComponents.utils import download_rdata
import pandas as pd
import numpy as np
import os

# Run with simuated tumor data
## Setup

Clone the `SimulatedTumorData` repo and install in your environment.
```
cd <path to put SimulatedTumorData repo>
git clone https://github.com/getzlab/SimulatedTumorData

conda activate <your env>
pip install -e <path to put SimulatedTumorData repo>
```

This only needs to be done once.

## Load simulated data

In [None]:
from SimulatedTumorData.src.generate_simulated_data import load_patients_and_samples

In [None]:
samples, participants = load_patients_and_samples(path_to_sim_data="SimulatedTumorData/sim_data")

## Pass in data

In [None]:
from PurityReviewer.AppComponents.utils import parse_absolute_soln, CSIZE_DEFAULT
from PurityReviewer.AppComponents.utils import parse_absolute_soln_simulatedTumorData

In [None]:
data_path = 'simulated_data_purity_review'

In [None]:
matched_reviewer = MatchedPurityReviewer()
matched_reviewer.set_review_data(
    data_path=data_path, 
    description='Matched purity reviewer for simulated data', 
    df=samples,
    index=samples.index, 
)
matched_reviewer.set_review_app(
    sample_info_cols=['ABSOLUTE_plot_fn', 'wxs_purity', 'wxs_ploidy'],
    acs_col='cnv_seg_fn', 
    maf_col='maf_fn',
    rdata_fn_col='ABSOLUTE_RData',
    mut_fig_hover_data=['Hugo_Symbol', 'Chromosome', 'Start_position'],
    csize=CSIZE_DEFAULT,
    custom_parse_absolute_soln=parse_absolute_soln_simulatedTumorData # <-- update with my_custom_parse_absolute_soln()
)

matched_reviewer.set_default_review_data_annotations_configuration()
matched_reviewer.set_default_autofill()

## Run the reviewer

In [None]:
matched_reviewer.run(port=8099, mode='tab', collapsable=False, hide_history_df_cols=['source_data_fn'])

In [None]:
matched_reviewer.get_annot()

# Purity Review from CGA characterization pipeline Terra workspace

## Load data

In [None]:
import dalmatian

```
import os
os.environ["GCLOUD_PROJECT"] = <google project>

wm = dalmatian.WorkspaceManager(<namespace>/<workspace_name>)
pairs_df = wm.get_pairs()

# Download rdata from ABSOLUTE locally
rdata_dir = <directory to save rData>
downloaded_rdata_s = download_rdata(pairs_df['absolute_rdata'], rdata_dir=rdata_dir)
downloaded_rdata_s.name = 'local_absolute_rdata'
pairs_df = pd.concat([pairs_df, downloaded_rdata_s], axis=1)
```

In [None]:
workspace = 'broad-tcga-wgs-terra/TCGA_OV_WGS'
wm = dalmatian.WorkspaceManager(workspace)
wm_pairs_df = wm.get_pairs().iloc[:5]
display(wm_pairs_df.head())

In [None]:
# download rdata locally
rdata_dir = f'/Users/cchu/Desktop/Methods/JupyterReviewer/data/Purity_Reviewer_Tutorial/local_rdata_2023-01-30'
wm_pairs_df['local_absolute_rdata'] = download_rdata(wm_pairs_df['absolute_rdata'], rdata_dir=rdata_dir)

In [None]:
wm_pairs_df['local_absolute_rdata'].iloc[0]

## Pass in data

```
from PurityReviewer.AppComponents.utils import CSIZE_DEFAULT
sex_chr_map = {'23': 'X', '24': 'Y'}
rename_chroms = {x: sex_chr_map[x] if x in sex_chr_map.keys() else x for x in CSIZE_DEFAULT.keys()}
custom_csize = {f'chr{rename_chroms[chrom]}': length for chrom, length in CSIZE_DEFAULT.items()}
custom_csize

In [None]:
from rpy2.robjects import r, pandas2ri
import rpy2.robjects as robjects

def my_custom_parse_absolute_soln(rdata_path: str): # has to be a local path   
    absolute_rdata_cols = ['alpha', 'tau', 'tau_hat', '0_line', '1_line',
                       'sigma_H', 
                       'theta_Q', 
                       'lambda',  
                       'SCNA_likelihood', 
                       'Kar_likelihood', 
                       'SSNVs_likelihood']
    pandas2ri.activate()
    print('New version!')
    r_list_vector = robjects.r['load'](rdata_path)
    r_list_vector = robjects.r[r_list_vector[0]]
    # r_data_id = r_list_vector.names[0]

    # rdata_tables = r_list_vector.rx2(str(r_data_id))
    
    # mode_res = rdata_tables.rx2('mode.res')
    # mode_tab = mode_res.rx2('mode.tab')
    mode_res = r_list_vector.rx2('mode.res')
    mode_tab = r_list_vector.rx2('mode.tab') # or segtab?
    mod_tab_df = pd.DataFrame(columns=absolute_rdata_cols)
    mod_tab_df['alpha'] = mode_tab[:, 0]
    mod_tab_df['tau'] = mode_tab[:, 1]
    mod_tab_df['tau_hat'] = mode_tab[:, 7]
    mod_tab_df['0_line'] = mode_tab[:, 3]
    mod_tab_df['step_size'] = mode_tab[:, 4] * 2
    mod_tab_df['1_line'] = mod_tab_df['step_size'] + mod_tab_df['0_line']
    mod_tab_df['sigma_H'] = mode_tab[:, 8]
    mod_tab_df['theta_Q'] = mode_tab[:, 11]
    mod_tab_df['lambda'] = mode_tab[:, 12]
    mod_tab_df['SCNA_likelihood'] = mode_tab[:, 15]
    mod_tab_df['Kar_likelihood'] = mode_tab[:, 17]
    mod_tab_df['SSNVs_likelihood'] = mode_tab[:, 20]

    return mod_tab_df

In [None]:
matched_reviewer = MatchedPurityReviewer()
matched_reviewer.set_review_data(
    data_path = <data path>, 
    description= <description>, 
    df=pairs_df,
    index=pairs_df.index,
)
matched_reviewer.set_review_app(
    sample_info_cols=['absolute_highres_plot', 'alleliccapseg_plot', 'wxs_purity'],
    acs_col='alleliccapseg_tsv', 
    maf_col='mutation_validator_validated_maf',
    rdata_fn_col='local_absolute_rdata',
    mut_fig_hover_data=['Hugo_Symbol', 'Chromosome', 'Start_position'],
    csize=CSIZE_DEFAULT,
    custom_parse_absolute_soln=parse_absolute_soln # <-- update with my_custom_parse_absolute_soln()
)

matched_reviewer.set_default_review_data_annotations_configuration()
matched_reviewer.set_default_autofill()

## Run the reviewer

In [None]:
matched_reviewer.run(port=8099, review_data_table_df=wm_pairs_df[['wxs_purity', 'participant']], mode='tab')