# met_site_data_creation_clinical.py

### By Chris Fong - MSKCC 2020

This script will create a standardized table of metastatic sites for for a given 
- Diagnosis table containing ICD billing codes and 
- Clinical sample table downloaded from the cohort tab in cBioPortal 

The script can compute if a distant LN metastasis occurred for that patient, given their cancer type according to IMPACT data. Similarly, local extension is computed for viseral organs.

If a patient has multiple dx from IMPACT, more than 1 row will exist for that patient, and should be removed.



In [11]:
import sys  
sys.path.insert(0, 'mappings')
sys.path.insert(0, 'analysis')
import os
import pandas as pd
import constants_o_sites as const
from organ_mapping_analysis import OrganMappingAnalysisRND
from organ_mapping_rdn_processing import MetastaticSpreadMappingRND

In [21]:
# Console settings
def set_debug_console():
    desired_width = 320
    pd.set_option('display.width', desired_width)
    pd.set_option('display.max_rows', 250)
    pd.set_option('display.max_columns', 500)
    pd.set_option('display.width', 1000)
    pd.set_option('display.expand_frame_repr', False)

set_debug_console()

In [3]:
fname_save_anno = 'metatrop_met_site_annotations_clinical.csv'

col_icd_billing = 'ICD-9/10 Dx Code'
col_met_site = 'METASTATIC_SITE'
col_primary_site = 'PRIMARY_SITE'

In [4]:
## Load cbioportal clinical data file --------------
# Load ID names
fname = 'mskimpact_clinical_data.tsv'
path = '/Users/fongc2/Documents/github/MSK/DARWIN_ETL/data'
pathfilename1 = os.path.join(path, fname)
df_samples1 = pd.read_csv(pathfilename1, header=0, low_memory=False, sep='\t')
# For genie, fix ids
col_id = 'SAMPLE_ID'
col_id2 = 'DMP_ID'
col_rep = {'Patient ID': col_id2,
           'Sample ID': col_id,
           'Sex': 'SEX',
           'Cancer Type': 'CANCER_TYPE',
           'Primary Tumor Site': 'PRIMARY_SITE',
           'Metastatic Site': 'METASTATIC_SITE'}
df_samples1 = df_samples1.rename(columns=col_rep)
df_samples = df_samples1[list(col_rep.values())]

In [20]:
df_samples.head()

Unnamed: 0,DMP_ID,SAMPLE_ID,SEX,CANCER_TYPE,PRIMARY_SITE,METASTATIC_SITE
0,P-0000004,P-0000004-T01-IM3,Female,Breast Cancer,Breast,
1,P-0000012,P-0000012-T02-IM3,Female,Breast Cancer,Breast,
2,P-0000012,P-0000012-T03-IM3,Female,Non-Small Cell Lung Cancer,Lung,Neck
3,P-0000012,P-0000012-T04-IM6,Female,Non-Small Cell Lung Cancer,Lung,
4,P-0000015,P-0000015-T01-IM3,Female,Breast Cancer,Breast,Liver


In [5]:
# Load met sites directly from diagnosis
fname = 'table_diagnosis_clean.csv'
pathfilename1 = os.path.join(path, fname)
df_dx = pd.read_csv(pathfilename1, header=0, low_memory=False, sep=',')
df_dx_mets = df_dx[df_dx['IS_MET_ICD_BILLING'] == True]
list_col_index = ['DMP_ID', 'AGE_AT_EVENT', col_icd_billing, 'DX_DESCRIPTION']
df_dx_mets = df_dx_mets[list_col_index]

In [19]:
df_dx_mets.head()

Unnamed: 0,DMP_ID,AGE_AT_EVENT,ICD-9/10 Dx Code,DX_DESCRIPTION
1,P-0000004,14484.0,197.7,[u'197.7 - LIVER METS']
2,P-0000004,14484.0,198.5,[u'198.5 - BONE METS']
6,P-0000012,21138.0,196.0,[u'196.0 - LYMPH NODE METS-NECK']
8,P-0000012,21213.0,196.1,[u'196.1 - LYMPH NODE METS-INTRATHOR']
9,P-0000012,21500.0,197.6,[u'197.6 - MET CA-PERITONEUM']


In [14]:
# Load RDN mapping
obj_met_map_rdn = MetastaticSpreadMappingRND(path=const.pathname,
                                             fname_all_sites=const.fname_mapping_rdn_all_sites,
                                             fname_hematogenous=const.fname_mapping_rdn_hematogenous,
                                             fname_localext=const.fname_mapping_rdn_localext,
                                             fname_lymphatic=const.fname_mapping_rdn_lymphatic,
                                             fname_site_map=const.fname_mapping_rdn_site_map,
                                             fname_billing_map=const.fname_mapping_rdn_billing_map,
                                             fname_billing_code_dict=const.fname_mapping_rdn_to_billing_codes)

Loading mapping tables


In [15]:
# Load annoations object
obj_mapping = OrganMappingAnalysisRND(obj_met_map=obj_met_map_rdn)

In [16]:
# Annotate diagnosis table of ICD billings with renzo's mapping of metastatic sites
df_met_sites_dx = obj_mapping.annotate_icd_billing_met_dx(df_dx_mets=df_dx_mets, col_icd_billing=col_icd_billing)


df_met_rdn_anno = obj_mapping.create_sample_to_icd_billing_met_mapping(df_dx=df_dx_mets,
                                                                       col_icd_billing=col_icd_billing,
                                                                       df_samples=df_samples,
                                                                       col_primary_site=col_primary_site,
                                                                       col_met_site=col_met_site,
                                                                       label_dist_ln=True)

In [18]:
df_met_rdn_anno.head()

Unnamed: 0,SAMPLE_ID,DMP_ID,CANCER_TYPE,SEX,PRIMARY_SITE_RDN_MAP,AGE_AT_EVENT,ICD-9/10 Dx Code,DX_DESCRIPTION,METASTATIC_SITE_RDN_MAP,LYMPH_SPREAD,LOCAL_EXTENSION,origin,hematogenous_grouping,PRIMARY_SITE_ONCOTREE_RDN,METASTATIC_SITE_ONCOTREE_RDN,PRIMARY_SITE_BILLING_RDN,METASTATIC_SITE_BILLING_RDN
0,P-0000004-T01-IM3,P-0000004,Breast Cancer,Female,chest_breast,14484.0,197.7,[u'197.7 - LIVER METS'],abdomen_liver,,,primary&mets,LIVER,Breast,Liver,BREAST,LIVER
1,P-0000004-T01-IM3,P-0000004,Breast Cancer,Female,chest_breast,14484.0,198.5,[u'198.5 - BONE METS'],undeterminate_bone,,,primary&mets,NON_PORTAL,Breast,Bone,BREAST,BONE
2,P-0000012-T02-IM3,P-0000012,Breast Cancer,Female,chest_breast,21138.0,196.0,[u'196.0 - LYMPH NODE METS-NECK'],neck_node,DISTANT,,mets,NON_PORTAL,Breast,Distant Lymphatic,BREAST,DIST_LYMPH
3,P-0000012-T02-IM3,P-0000012,Breast Cancer,Female,chest_breast,21213.0,196.1,[u'196.1 - LYMPH NODE METS-INTRATHOR'],mediastinum_node,DISTANT,,primary&mets,NON_PORTAL,Breast,Distant Lymphatic,BREAST,DIST_LYMPH
4,P-0000012-T02-IM3,P-0000012,Breast Cancer,Female,chest_breast,21500.0,197.6,[u'197.6 - MET CA-PERITONEUM'],abdomen_peritoneum.serosa,,,primary&mets,PORTAL,Breast,Peritoneum,BREAST,PERITONEUM


In [17]:
# Save RDN annotations
df_met_rdn_anno.to_csv(fname_save_anno, index=False)