
# Summary File Creator for cBioPortal 

Create cBioPortal summary file(s) and corresponding headers from CDM data and codebook 
then combine into a single file to be pushed to cbioportal.

Configuration is based on the yaml config file (fname_config) 


In [None]:
from cdm_cbioportal_etl.summary import cbioportalSummaryFileCombiner
from cdm_cbioportal_etl.summary import RedcapToCbioportalFormat
from cdm_cbioportal_etl.utils import constants
from cdm_cbioportal_etl.utils import yaml_config_parser


COL_SUMMARY_FNAME_SAVE = constants.COL_SUMMARY_FNAME_SAVE
COL_SUMMARY_HEADER_FNAME_SAVE = constants.COL_SUMMARY_HEADER_FNAME_SAVE

## YAML Configuration File Location

In [None]:
# Location of yaml configuration file
fname_config = '<path_to>/cdm-cbioportal-etl/config/etl_config_all_impact.yml'

## Parse the configuration file

In [None]:
obj_yaml = yaml_config_parser(fname_yaml_config=fname_config)
fname_minio_env = obj_yaml.return_credential_filename()
path_minio_summary_intermediate = obj_yaml.return_intermediate_folder_path()
fname_meta_data = obj_yaml.return_filename_codebook_metadata()
fname_meta_project = obj_yaml.return_filename_codebook_projects()
fname_meta_table = obj_yaml.return_filename_codebook_tables()
production_or_test = obj_yaml.return_production_or_test_indicator()

fname_manifest_patient = obj_yaml.return_manifest_filename_patient()
fname_summary_template_patient = obj_yaml.return_template_info()['fname_p_sum_template_cdsi']
fname_summary_patient = obj_yaml.return_filenames_deid_datahub()['summary_patient']

fname_manifest_sample = obj_yaml.return_manifest_filename_sample()
fname_summary_template_sample = obj_yaml.return_template_info()['fname_s_sum_template_cdsi']
fname_summary_sample = obj_yaml.return_filenames_deid_datahub()['summary_sample']

# Create summary
patient_or_sample = 'patient'


## Create Summary File
### Create object to load configuration info and codebook

In [None]:
obj_format_cbio = RedcapToCbioportalFormat(
    fname_minio_env=fname_minio_env,
    path_minio_summary_intermediate=path_minio_summary_intermediate,
    fname_metadata=fname_meta_data,
    fname_metaproject=fname_meta_project,
    fname_metatables=fname_meta_table
)

### Create summary files and corresonding headers

In [None]:
## Create individual summary and header files, with a manifest file summarizing the outputs
obj_format_cbio.create_summaries_and_headers(
    patient_or_sample=patient_or_sample,
    fname_manifest=fname_manifest_patient,
    fname_template=fname_summary_template_patient,
    production_or_test=production_or_test
)

### Combine summary and header files

In [None]:
obj_p_combiner = cbioportalSummaryFileCombiner(
    fname_minio_env=fname_minio_env,
    fname_manifest=fname_manifest_patient,
    fname_current_summary=fname_summary_template_patient,
    patient_or_sample=patient_or_sample,
    production_or_test=production_or_test
)

### Return combined result and save

In [None]:
df_cbio_summary = obj_p_combiner.return_final()
df_cbio_summary.head(10)

In [None]:
# Save the merged summaries to file
obj_p_combiner.save_update(fname=fname_summary_patient)


