# cBioPortal API Usage

### Import Modules

In [1]:
import pandas as pd

import api_tools as apit
import access_cBioPortal as cbio

### Load cBioPortal API using Swagger

We define an object `cbioportal`, serving as our connection to the cBioPortal API.

Reference: [cBioPortal "API and API Clients" documentation](https://docs.cbioportal.org/6.-web-api-and-clients/api-and-api-clients)

In [2]:
client = apit.get_swagger_api_client(url='https://www.cbioportal.org/api/api-docs')

### Get Molecular Profiles

In [3]:
df_molecular_profiles = cbio.get_all_from_bioportal_endpoint(client, "Molecular Profiles", out_path="molecular_profiles.csv")
df_molecular_profiles.head()

Unnamed: 0,datatype,description,genericAssayType,molecularAlterationType,molecularProfileId,name,patientLevel,pivotThreshold,showProfileInAnalysisTab,sortOrder,study,studyId
0,DISCRETE,Discrete copy-number calls from the RAE algori...,,COPY_NUMBER_ALTERATION,prad_mskcc_2014_cna,Putative copy-number alterations (RAE),False,,True,,,prad_mskcc_2014
1,DISCRETE,Discrete copy-number data from targeted sequen...,,COPY_NUMBER_ALTERATION,pan_origimed_2020_cna,Copy-number alterations,False,,True,,,pan_origimed_2020
2,MAF,Targeted sequencing of various tumor types via...,,MUTATION_EXTENDED,pan_origimed_2020_mutations,Mutations,False,,True,,,pan_origimed_2020
3,FUSION,Fusions from targeted sequencing via Cancer Se...,,STRUCTURAL_VARIANT,pan_origimed_2020_fusion,Fusions,False,,True,,,pan_origimed_2020
4,DISCRETE,Putative copy-number calls derived from IMPACT...,,COPY_NUMBER_ALTERATION,crc_nigerian_2020_cna,Putative copy-number alterations (IMPACT341),False,,True,,,crc_nigerian_2020


### Get Studies

In [4]:
df_studies = cbio.get_all_from_bioportal_endpoint(client, "Studies", out_path="studies.csv")
df_studies.head()

Unnamed: 0,allSampleCount,cancerType,cancerTypeId,citation,cnaSampleCount,completeSampleCount,description,groups,importDate,massSpectrometrySampleCount,...,mrnaRnaSeqV2SampleCount,name,pmid,publicStudy,readPermission,referenceGenome,rppaSampleCount,sequencedSampleCount,status,studyId
0,104,,prad,Hieronymus et al. PNAS 2014,,,Genome-wide copy number profiling in 104 prima...,PUBLIC,2021-04-23 00:00:00,,...,,"Prostate Adenocarcinoma (MSKCC, PNAS 2014)",25024180.0,True,True,hg19,,,0,prad_mskcc_2014
1,10194,,mixed,,,,"Landscape of genomic alterations in 10,194 sol...",,2021-09-24 00:00:00,,...,,China Pan-cancer (OrigiMed2020),,True,True,hg19,,,0,pan_origimed_2020
2,64,,coadread,,,,Targeted sequencing of 64 colorectal tumor/nor...,,2021-10-26 00:00:00,,...,,Disparities in metastatic colorectal cancer be...,,True,True,hg19,,,0,crc_nigerian_2020
3,92,,acc,,,,TCGA Adrenocortical Carcinoma. Source data fro...,PUBLIC,2021-10-29 00:00:00,,...,,"Adrenocortical Carcinoma (TCGA, Firehose Legacy)",,True,True,hg19,,,0,acc_tcga
4,24,,acyc,Stephens et al. JCI 2013,,,Whole exome sequencing of 24 ACCs.,ACYC;PUBLIC,2021-10-29 00:00:00,,...,,"Adenoid Cystic Carcinoma (Sanger/MDA, JCI 2013)",23778141.0,True,True,hg19,,,0,acyc_sanger_2013


### Get Sample Lists

In [5]:
df_sample_lists = cbio.get_all_from_bioportal_endpoint(client, "Sample Lists", out_path="sample_lists.csv")
df_sample_lists.head()

Unnamed: 0,category,description,name,sampleCount,sampleIds,sampleListId,studyId
0,all_cases_in_study,All samples (104 samples),All samples,,,prad_mskcc_2014_all,prad_mskcc_2014
1,all_cases_with_cna_data,Samples with CNA data (104 samples),Samples with CNA data,,,prad_mskcc_2014_cna,prad_mskcc_2014
2,all_cases_with_mutation_and_cna_data,Samples with mutation and cna data (10194 samp...,Samples profiled for mutation and cna,,,pan_origimed_2020_cnaseq,pan_origimed_2020
3,all_cases_in_study,All samples (10194 samples),All samples,,,pan_origimed_2020_all,pan_origimed_2020
4,all_cases_with_cna_data,Samples with CNA data (10194 samples),Samples profiled for CNA,,,pan_origimed_2020_cna,pan_origimed_2020


### Get Cancer Types

In [6]:
df_cancer_types = cbio.get_all_from_bioportal_endpoint(client, "Cancer Types", out_path="cancer_types.csv")
df_cancer_types.head()

Unnamed: 0,cancerTypeId,dedicatedColor,name,parent,shortName
0,aa,LightYellow,Aggressive Angiomyxoma,soft_tissue,AA
1,aastr,Gray,Anaplastic Astrocytoma,difg,AASTR
2,abc,LimeGreen,Activated B-cell Type,dlbclnos,ABC
3,abl,LightSalmon,Acute Basophilic Leukemia,amlnos,ABL
4,aca,Purple,Adrenocortical Adenoma,adrenal_gland,ACA


### Get Patients

In [7]:
df_patients = cbio.get_all_from_bioportal_endpoint(client, "Patients", out_path="patients.csv")
df_patients.head()

Unnamed: 0,cancerStudy,patientId,studyId,uniquePatientKey,uniqueSampleKey
0,,PCA0318,prad_mskcc_2014,UENBMDMxODpwcmFkX21za2NjXzIwMTQ,
1,,PCA0301,prad_mskcc_2014,UENBMDMwMTpwcmFkX21za2NjXzIwMTQ,
2,,PCA0334,prad_mskcc_2014,UENBMDMzNDpwcmFkX21za2NjXzIwMTQ,
3,,PCA0375,prad_mskcc_2014,UENBMDM3NTpwcmFkX21za2NjXzIwMTQ,
4,,PCA0314,prad_mskcc_2014,UENBMDMxNDpwcmFkX21za2NjXzIwMTQ,


### Get Samples

In [8]:
df_samples = cbio.get_all_from_bioportal_endpoint(client, "Samples", out_path="samples.csv")
df_samples.head()

Parsing Study 100 of 339
Parsing Study 200 of 339
Parsing Study 300 of 339


Unnamed: 0,copyNumberSegmentPresent,patientId,profiledForFusions,sampleId,sampleType,sequenced,studyId,uniquePatientKey,uniqueSampleKey
0,,AdCC5T,,AdCC5T,Primary Solid Tumor,,acbc_mskcc_2015,QWRDQzVUOmFjYmNfbXNrY2NfMjAxNQ,QWRDQzVUOmFjYmNfbXNrY2NfMjAxNQ
1,,AdCC4T,,AdCC4T,Primary Solid Tumor,,acbc_mskcc_2015,QWRDQzRUOmFjYmNfbXNrY2NfMjAxNQ,QWRDQzRUOmFjYmNfbXNrY2NfMjAxNQ
2,,AdCC2T,,AdCC2T,Primary Solid Tumor,,acbc_mskcc_2015,QWRDQzJUOmFjYmNfbXNrY2NfMjAxNQ,QWRDQzJUOmFjYmNfbXNrY2NfMjAxNQ
3,,AdCC9T,,AdCC9T,Primary Solid Tumor,,acbc_mskcc_2015,QWRDQzlUOmFjYmNfbXNrY2NfMjAxNQ,QWRDQzlUOmFjYmNfbXNrY2NfMjAxNQ
4,,AdCC8T,,AdCC8T,Primary Solid Tumor,,acbc_mskcc_2015,QWRDQzhUOmFjYmNfbXNrY2NfMjAxNQ,QWRDQzhUOmFjYmNfbXNrY2NfMjAxNQ


### Get Patient Cancer Types

In [9]:
df_clinical_data = cbio.get_all_from_bioportal_endpoint(client, "Clinical Data", out_path="clinical_data.csv")
df_clinical_data.head()

Parsing Study 100 of 339
Parsing Study 200 of 339
Parsing Study 300 of 339


Unnamed: 0,patientId,sampleId,studyId,name,cancerTypeId,dedicatedColor,parent,shortName
0,AdCC5T,AdCC5T,acbc_mskcc_2015,Invasive Breast Carcinoma,brca,HotPink,breast,BRCA
1,AdCC4T,AdCC4T,acbc_mskcc_2015,Invasive Breast Carcinoma,brca,HotPink,breast,BRCA
2,AdCC2T,AdCC2T,acbc_mskcc_2015,Invasive Breast Carcinoma,brca,HotPink,breast,BRCA
3,AdCC9T,AdCC9T,acbc_mskcc_2015,Invasive Breast Carcinoma,brca,HotPink,breast,BRCA
4,AdCC8T,AdCC8T,acbc_mskcc_2015,Invasive Breast Carcinoma,brca,HotPink,breast,BRCA


### Get Mutations

In [10]:
cbio.get_all_from_bioportal_endpoint(
    client, "Mutations", out_path="mutations.csv"
)

Getting mutation list 100 of 13839
Getting mutation list 200 of 13839
Getting mutation list 300 of 13839
Getting mutation list 400 of 13839
Getting mutation list 500 of 13839
Getting mutation list 600 of 13839
Getting mutation list 700 of 13839
Getting mutation list 800 of 13839
Getting mutation list 900 of 13839
Getting mutation list 1000 of 13839
Getting mutation list 1100 of 13839
Getting mutation list 1200 of 13839
Getting mutation list 1300 of 13839
Getting mutation list 1400 of 13839
Getting mutation list 1500 of 13839
Getting mutation list 1600 of 13839
Getting mutation list 1700 of 13839
Getting mutation list 1800 of 13839
Getting mutation list 1900 of 13839
Getting mutation list 2000 of 13839
Getting mutation list 2100 of 13839
Getting mutation list 2200 of 13839
Getting mutation list 2300 of 13839
Getting mutation list 2400 of 13839
Getting mutation list 2500 of 13839
Getting mutation list 2600 of 13839
Getting mutation list 2700 of 13839
Getting mutation list 2800 of 13839
G