# met_site_data_creation_impact.py

### By Chris Fong - MSKCC 2020

This script will create a standardized table of metastatic sites for for a given 
- Clinical sample table downloaded from the cohort tab in cBioPortal 

This script will leveerage RDN's metastatic mapping from IMPACT patient cancer types to distant, lymphatic, regional metastatic disease

This script will ONLY cover metastatic samples from IMPACT cohort. ALL metastatic disease sites are not covered



## Load Libraries

In [1]:
import sys  
sys.path.insert(0, 'mappings')
sys.path.insert(0, 'analysis')
import os
import pandas as pd
import constants_o_sites as const
from organ_mapping_analysis import OrganMappingAnalysisRND
from organ_mapping_rdn_processing import MetastaticSpreadMappingRND

In [2]:
# Console settings
def set_debug_console():
    desired_width = 320
    pd.set_option('display.width', desired_width)
    pd.set_option('display.max_rows', 250)
    pd.set_option('display.max_columns', 500)
    pd.set_option('display.width', 1000)
    pd.set_option('display.expand_frame_repr', False)

set_debug_console()

## Load Data

In [3]:
# Filename for output
fname_save_anno = 'metatrop_met_site_annotations_impact.csv'
fname_save_binary = 'metatrop_met_site_annotations_impact_binary.csv'

In [4]:
## Load cbioportal clinical data file --------------
# Load ID names
fname = 'mskimpact_clinical_data.tsv'
path = '/Users/fongc2/Documents/github/MSK/DARWIN_ETL/data'
pathfilename1 = os.path.join(path, fname)
df_samples1 = pd.read_csv(pathfilename1, header=0, low_memory=False, sep='\t')

# For genie, fix ids
col_id = 'SAMPLE_ID'
col_id2 = 'DMP_ID'
col_ct = 'CANCER_TYPE'
col_sex = 'SEX'
col_prim_site = 'PRIMARY_SITE'
col_met_site = 'METASTATIC_SITE'
col_sample_type = 'SAMPLE_TYPE'
col_rep = {'Patient ID': col_id2,
           'Sample ID': col_id,
           'Sex': col_sex,
           'Cancer Type': col_ct,
           'Primary Tumor Site': col_prim_site,
           'Metastatic Site': col_met_site,
           'Sample Type': col_sample_type}
df_samples1 = df_samples1.rename(columns=col_rep)
df_samples = df_samples1[list(col_rep.values())]
df_metatrop_met = df_samples[df_samples[col_sample_type] == 'Metastasis']


In [5]:
df_metatrop_met.head()

Unnamed: 0,DMP_ID,SAMPLE_ID,SEX,CANCER_TYPE,PRIMARY_SITE,METASTATIC_SITE,SAMPLE_TYPE
2,P-0000012,P-0000012-T03-IM3,Female,Non-Small Cell Lung Cancer,Lung,Neck,Metastasis
4,P-0000015,P-0000015-T01-IM3,Female,Breast Cancer,Breast,Liver,Metastasis
6,P-0000024,P-0000024-T01-IM3,Female,Endometrial Cancer,Uterus,Lung,Metastasis
8,P-0000025,P-0000025-T02-IM5,Female,Endometrial Cancer,Uterus,Peritoneum,Metastasis
9,P-0000025,P-0000025-T03-IM6,Female,Endometrial Cancer,Uterus,Liver,Metastasis


## Load Mapping Objects

### Load mapping tables

In [6]:
# Load RDN mapping
obj_met_map_rdn = MetastaticSpreadMappingRND(path=const.pathname,
                                             fname_all_sites=const.fname_mapping_rdn_all_sites,
                                             fname_hematogenous=const.fname_mapping_rdn_hematogenous,
                                             fname_localext=const.fname_mapping_rdn_localext,
                                             fname_lymphatic=const.fname_mapping_rdn_lymphatic,
                                             fname_site_map=const.fname_mapping_rdn_site_map,
                                             fname_billing_map=const.fname_mapping_rdn_billing_map,
                                             fname_billing_code_dict=const.fname_mapping_rdn_to_billing_codes)

Loading mapping tables


### Load mapping object

In [7]:
# Load annoations object
obj_mapping = OrganMappingAnalysisRND(obj_met_map=obj_met_map_rdn)

## Create Mapping on Clinical Samples Table from cBioPortal

In [8]:
# Annotate IMPACT sample site data
df_met_sites_impact = obj_mapping.annotate_mapping_impact_met_samples(df_samples=df_metatrop_met, 
                                                                      col_primary_site=col_prim_site, 
                                                                      col_met_site=col_met_site, 
                                                                      label_dist_ln=True)

df_met_sites_impact.head()

Unnamed: 0,DMP_ID,SAMPLE_ID,SEX,CANCER_TYPE,PRIMARY_SITE,METASTATIC_SITE,SAMPLE_TYPE,PRIMARY_SITE_RDN_MAP,origin_primary,PRIMARY_SITE_RDN_MAP_MAIN,PRIMARY_SITE_RDN_MAP_SECONDARY,METASTATIC_SITE_RDN_MAP,origin_primary.1,METASTATIC_SITE_RDN_MAP_MAIN,METASTATIC_SITE_RDN_MAP_SECONDARY,LYMPH_SPREAD,LOCAL_EXTENSION,origin,hematogenous_grouping,METASTATIC_SITE_ONCOTREE_RDN,METASTATIC_SITE_BILLING_RDN
0,P-0000012,P-0000012-T03-IM3,Female,Non-Small Cell Lung Cancer,Lung,Neck,Metastasis,chest_lung,primary&mets,chest,lung,neck_unknown,primary&mets,neck,unknown,,,primary&mets,NON_PORTAL,Head and Neck,HEAD_AND_NECK
1,P-0000015,P-0000015-T01-IM3,Female,Breast Cancer,Breast,Liver,Metastasis,chest_breast,primary&mets,chest,breast,abdomen_liver,primary&mets,abdomen,liver,,,primary&mets,LIVER,Liver,LIVER
2,P-0000024,P-0000024-T01-IM3,Female,Endometrial Cancer,Uterus,Lung,Metastasis,pelvis_uterus,primary&mets,pelvis,uterus,chest_lung,primary&mets,chest,lung,,,primary&mets,LUNG,Lung,LUNG
3,P-0000025,P-0000025-T02-IM5,Female,Endometrial Cancer,Uterus,Peritoneum,Metastasis,pelvis_uterus,primary&mets,pelvis,uterus,abdomen_peritoneum.serosa,primary&mets,abdomen,peritoneum.serosa,,LOCAL,primary&mets,PORTAL,Peritoneum,PERITONEUM
4,P-0000025,P-0000025-T03-IM6,Female,Endometrial Cancer,Uterus,Liver,Metastasis,pelvis_uterus,primary&mets,pelvis,uterus,abdomen_liver,primary&mets,abdomen,liver,,,primary&mets,LIVER,Liver,LIVER


## Save annotations

In [9]:
# Save RDN annotations
df_met_sites_impact.to_csv(fname_save_anno, index=False)

## Create binary matrix from mapping

In [10]:
df_binary = obj_mapping.create_binary_met_sites(df=df_met_sites_impact, col_index='SAMPLE_ID', col_count='DMP_ID', col_met_site='METASTATIC_SITE_BILLING_RDN')

In [11]:
df_binary.head(50)

METASTATIC_SITE_BILLING_RDN,SAMPLE_ID,HAS_MET_ADRENAL_GLAND,HAS_MET_BILIARY_TRACT,HAS_MET_BLADDER_OR_URINARY_TRACT,HAS_MET_BONE,HAS_MET_BOWEL,HAS_MET_BREAST,HAS_MET_CNS_BRAIN,HAS_MET_DIST_LYMPH,HAS_MET_GENITAL_FEMALE,HAS_MET_GENITAL_MALE,HAS_MET_HEAD_AND_NECK,HAS_MET_KIDNEY,HAS_MET_LIVER,HAS_MET_LUNG,HAS_MET_LYMPH,HAS_MET_MEDIASTINUM,HAS_MET_OTHER,HAS_MET_OVARY,HAS_MET_PERITONEUM,HAS_MET_PLEURA,HAS_MET_SKIN
0,P-0000012-T03-IM3,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
1,P-0000015-T01-IM3,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0
2,P-0000024-T01-IM3,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0
3,P-0000025-T02-IM5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
4,P-0000025-T03-IM6,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0
5,P-0000030-T01-IM3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0
6,P-0000037-T01-IM3,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0
7,P-0000043-T02-IM3,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0
8,P-0000058-T01-IM3,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0
9,P-0000059-T01-IM3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
