#### We have chosen to move away from the pyBioPortal program and move to using direct API calls for cBioPortal. 
The reason is that we are concerned about continued support of the pyBioPortal program and that the program is unaffiliated with this wrapper.

### list all studies, via requests

In [1]:
import requests

BASE_URL = "https://www.cbioportal.org/api"
HEADERS = {"Accept": "application/json"}

study_id = "pptc_2019"


### Get all studies

In [2]:
def get_studies():
    """Fetch available studies from cBioPortal"""
    url = f"{BASE_URL}/studies"
    response = requests.get(url, headers=HEADERS)

    if response.status_code == 200:
        return response.json()
    else:
        print(f"Error {response.status_code}: {response.text}")
        return None

# Example Usage
studies = get_studies()
# if studies:
#     for study in studies[:]:  # Print all studies
#         print(f"Study ID: {study['studyId']} - Name: {study['name']}")

### List pediatric studies

In [3]:
peds = [
    "pptc_2019", #Pediatric Preclinical Testing Consortium  (CHOP, Cell Rep 2019)
    "mixed_pipseq_2017", #Pediatric Pan-cancer (Columbia U, Genome Med 2016)
    "es_dfarber_broad_2014", #Pediatric Ewing Sarcoma (DFCI, Cancer Discov 2014)
    "pediatric_dkfz_2017", #Pediatric Pan-Cancer (DKFZ, Nature 2017)
    "es_iocurie_2014", #Ewing Sarcoma (Institut Curie, Cancer Discov 2014)
    "pancan_mappyacts_2022", #Pediatric European MAPPYACTS Trial (Gustave Roussy, Cancer Discov 2022)
    "mbl_pcgp", #Medulloblastoma (PCGP, Nature 2012)
    "all_stjude_2015", #Acute Lymphoblastic Leukemia (St Jude, Nat Genet 2015)
    "all_stjude_2013", #Hypodiploid Acute Lymphoid Leukemia (St Jude, Nat Genet 2013)
    "all_phase2_target_2018_pub", #Pediatric Acute Lymphoid Leukemia - Phase II (TARGET, 2018)
    "aml_target_2018_pub", #Pediatric Acute Myeloid Leukemia (TARGET, 2018)
    "nbl_target_2018_pub", #Pediatric Neuroblastoma (TARGET, 2018)
    "rt_target_2018_pub", #Pediatric Rhabdoid Tumor (TARGET, 2018)
    "wt_target_2018_pub" #Pediatric Wilms' Tumor (TARGET, 2018)
]


### Get pptc_2019 study info

In [4]:
def verify_study_exists(study_id):
    """Check if the study exists in cBioPortal."""
    url = f"{BASE_URL}/studies/{study_id}"
    response = requests.get(url, headers=HEADERS)

    print(f"Request URL: {url}")  
    print(f"Response Status Code: {response.status_code}")  

    if response.status_code == 200:
        print(f"Study {study_id} exists!")
        return response.json()
    else:
        print(f"Error {response.status_code}: {response.text}")
        return None

# Run the check
verify_study_exists("pptc_2019")


Request URL: https://www.cbioportal.org/api/studies/pptc_2019
Response Status Code: 200
Study pptc_2019 exists!


{'name': 'Pediatric Preclinical Testing Consortium  (CHOP, Cell Rep 2019)',
 'description': "Whole-exome sequencing of 261 patient derived xenografts (PDXs) samples from high-risk childhood cancers (with no matched normals). Website: <a href='http://www.ncipptc.org/'>PPTC</a>",
 'publicStudy': True,
 'pmid': '31693904',
 'citation': 'Rotika et al. Cell Rep 2019',
 'groups': 'PUBLIC',
 'status': 0,
 'importDate': '2024-12-21 20:50:55',
 'allSampleCount': 261,
 'sequencedSampleCount': 261,
 'cnaSampleCount': 252,
 'mrnaRnaSeqSampleCount': 244,
 'mrnaRnaSeqV2SampleCount': 0,
 'mrnaMicroarraySampleCount': 0,
 'miRnaSampleCount': 0,
 'methylationHm27SampleCount': 0,
 'rppaSampleCount': 0,
 'massSpectrometrySampleCount': 0,
 'completeSampleCount': 222,
 'readPermission': True,
 'treatmentCount': 0,
 'structuralVariantCount': 101,
 'studyId': 'pptc_2019',
 'cancerTypeId': 'mixed',
 'cancerType': {'name': 'Mixed Cancer Types',
  'dedicatedColor': 'Black',
  'shortName': 'MIXED',
  'parent': 'o

In [5]:
def get_study_info(study_id):
    """Fetch details for a specific study from cBioPortal"""
    url = f"{BASE_URL}/studies/{study_id}"
    response = requests.get(url, headers=HEADERS)

    if response.status_code == 200:
        return response.json()
    else:
        print(f"Error {response.status_code}: {response.text}")
        return None

# Example Usage
study_info = get_study_info(study_id)

if study_info:
    print(study_info)


{'name': 'Pediatric Preclinical Testing Consortium  (CHOP, Cell Rep 2019)', 'description': "Whole-exome sequencing of 261 patient derived xenografts (PDXs) samples from high-risk childhood cancers (with no matched normals). Website: <a href='http://www.ncipptc.org/'>PPTC</a>", 'publicStudy': True, 'pmid': '31693904', 'citation': 'Rotika et al. Cell Rep 2019', 'groups': 'PUBLIC', 'status': 0, 'importDate': '2024-12-21 20:50:55', 'allSampleCount': 261, 'sequencedSampleCount': 261, 'cnaSampleCount': 252, 'mrnaRnaSeqSampleCount': 244, 'mrnaRnaSeqV2SampleCount': 0, 'mrnaMicroarraySampleCount': 0, 'miRnaSampleCount': 0, 'methylationHm27SampleCount': 0, 'rppaSampleCount': 0, 'massSpectrometrySampleCount': 0, 'completeSampleCount': 222, 'readPermission': True, 'treatmentCount': 0, 'structuralVariantCount': 101, 'studyId': 'pptc_2019', 'cancerTypeId': 'mixed', 'cancerType': {'name': 'Mixed Cancer Types', 'dedicatedColor': 'Black', 'shortName': 'MIXED', 'parent': 'other', 'cancerTypeId': 'mixed'

### Fetch clinical attributes

In [6]:
def get_clinical_attributes(study_id):
    """Fetch clinical attributes for a study from cBioPortal."""
    url = f"{BASE_URL}/clinical-attributes?studyId={study_id}"
    response = requests.get(url, headers=HEADERS)

    print(f"Request URL: {url}")  # Debugging
    print(f"Response Status Code: {response.status_code}")  # Debugging

    if response.status_code == 200:
        return response.json()
    else:
        print(f"Error {response.status_code}: {response.text}")  # Debugging
        return None

# Example Usage
clinical_attributes = get_clinical_attributes(study_id)

if clinical_attributes:
    print("Available Clinical Attributes:")
    for attr in clinical_attributes[:10]:
        print(f"- {attr['clinicalAttributeId']}: {attr['description']}")


Request URL: https://www.cbioportal.org/api/clinical-attributes?studyId=pptc_2019
Response Status Code: 200
Available Clinical Attributes:
- ADJUVANT_CHEMO: Adjuvant Chemotherapy
- ADJUVANT_TX: Adjuvant treatment.
- AGE: Age at which a condition or disease was first diagnosed.
- CANCER_TYPE: Cancer Type
- CANCER_TYPE_DETAILED: Cancer Type Detailed
- ER_STATUS_BY_IHC: breast carcinoma progesterone receptor status
- FOLLOWUP_YEARS: Followup in years
- FUSION_STATUS: Yes/No indicator of fusion status
- GENOMIC_ALTERATIONS: Number of genomic alterations
- IHC_HER2: IHC-HER2


### Fetch data for attribute in study

In [7]:
def get_clinical_data(study_id, attribute_id, data_type="PATIENT"):
    """Fetch clinical data for a specific attribute from cBioPortal."""
    url = f"{BASE_URL}/studies/{study_id}/clinical-data/"
    response = requests.get(url, headers=HEADERS)

    print(f"Request URL: {url}")  # Debugging
    print(f"Response Status Code: {response.status_code}")  # Debugging

    if response.status_code == 200:
        return response.json()
    else:
        print(f"Error {response.status_code}: {response.text}")  # Debugging
        return None

# Example Usage
attribute_id = "AGE" 
clinical_data = get_clinical_data(study_id, attribute_id)

if clinical_data:
    print("Clinical Data Results:")
    for entry in clinical_data[:10]:  # Print first 10 results
        print(entry)


Request URL: https://www.cbioportal.org/api/studies/pptc_2019/clinical-data/
Response Status Code: 200
Clinical Data Results:
{'uniqueSampleKey': 'QUxMLTAzOnBwdGNfMjAxOQ', 'uniquePatientKey': 'UDAwMDI6cHB0Y18yMDE5', 'sampleId': 'ALL-03', 'patientId': 'P0002', 'studyId': 'pptc_2019', 'clinicalAttributeId': 'CANCER_SUBTYPE_CURATED', 'value': 'MLL-ALL'}
{'uniqueSampleKey': 'QUxMLTAzOnBwdGNfMjAxOQ', 'uniquePatientKey': 'UDAwMDI6cHB0Y18yMDE5', 'sampleId': 'ALL-03', 'patientId': 'P0002', 'studyId': 'pptc_2019', 'clinicalAttributeId': 'CANCER_TYPE', 'value': 'Leukemia'}
{'uniqueSampleKey': 'QUxMLTAzOnBwdGNfMjAxOQ', 'uniquePatientKey': 'UDAwMDI6cHB0Y18yMDE5', 'sampleId': 'ALL-03', 'patientId': 'P0002', 'studyId': 'pptc_2019', 'clinicalAttributeId': 'CANCER_TYPE_DETAILED', 'value': 'Acute Leukemias of Ambiguous Lineage'}
{'uniqueSampleKey': 'QUxMLTAzOnBwdGNfMjAxOQ', 'uniquePatientKey': 'UDAwMDI6cHB0Y18yMDE5', 'sampleId': 'ALL-03', 'patientId': 'P0002', 'studyId': 'pptc_2019', 'clinicalAttribute

### Available molecular profiles

In [8]:
def get_molecular_profiles_for_study(study_id):
    """Fetch molecular profiles for a specific study and filter results."""
    url = f"{BASE_URL}/molecular-profiles"
    response = requests.get(url, headers=HEADERS)

    if response.status_code == 200:
        profiles = response.json()
        # Filter profiles for the specified study ID
        filtered_profiles = [profile for profile in profiles if profile['studyId'] == study_id]
        return filtered_profiles
    else:
        print(f"Error {response.status_code}: {response.text}")
        return None

# Example Usage
profiles = get_molecular_profiles_for_study(study_id)

if profiles:
    print(f"✅ Molecular Profiles for {study_id}:")
    for profile in profiles:
        print(f"- {profile['molecularProfileId']} ({profile['molecularAlterationType']})")
else:
    print(f"❌ No molecular profiles found for {study_id}.")



✅ Molecular Profiles for pptc_2019:
- pptc_2019_gistic (COPY_NUMBER_ALTERATION)
- pptc_2019_mutations (MUTATION_EXTENDED)
- pptc_2019_structural_variants (STRUCTURAL_VARIANT)
- pptc_2019_rna_seq_mrna (MRNA_EXPRESSION)
- pptc_2019_rna_seq_mrna_median_Zscores (MRNA_EXPRESSION)


### Get mutations

In [9]:
## not returning mutations

def get_mutations(molecular_profile_id):
    """Fetch mutation data for a specific molecular profile"""
    url = f"{BASE_URL}/molecular-profiles/{molecular_profile_id}"
    response = requests.get(url, headers=HEADERS)

    if response.status_code == 200:
        return response.json()
    else:
        print(f"Error {response.status_code}: {response.text}")
        return None

# Example Usage
molecular_profile_id = "pptc_2019_mutations" 
mutations = get_mutations(molecular_profile_id)

if mutations:
    print("✅ Mutation Data:")
    for mutation in mutations:  # Print first 10 results
        print(mutation)
else:
    print("❌ No mutation data found.")


✅ Mutation Data:
molecularAlterationType
datatype
name
description
showProfileInAnalysisTab
patientLevel
molecularProfileId
studyId
study


### Sample List ID

In [15]:
def get_sample_lists():
    """Fetch all samples lists"""
    url = f"{BASE_URL}/sample-lists"
    response = requests.get(url, headers=HEADERS)

    if response.status_code == 200:
        return response.json()
    else:
        print(f"Error {response.status_code}: {response.text}")
        return None

# Example Usage
all_sample_lists = get_sample_lists()

# for sample_lists in all_sample_lists:
#     print(sample_lists)

{'category': 'all_cases_in_study', 'name': 'All samples', 'description': 'All samples (93 samples)', 'sampleListId': 'all_stjude_2015_all', 'studyId': 'all_stjude_2015'}
{'category': 'all_cases_with_mutation_data', 'name': 'Samples with mutation data', 'description': 'Samples with mutation data (93 samples)', 'sampleListId': 'all_stjude_2015_sequenced', 'studyId': 'all_stjude_2015'}
{'category': 'all_cases_with_sv_data', 'name': 'Samples with SV data', 'description': 'All samples (93 samples)', 'sampleListId': 'all_stjude_2015_sv', 'studyId': 'all_stjude_2015'}
{'category': 'all_cases_in_study', 'name': 'All samples', 'description': 'All samples (44 samples)', 'sampleListId': 'all_stjude_2013_all', 'studyId': 'all_stjude_2013'}
{'category': 'all_cases_with_mutation_data', 'name': 'Samples with mutation data', 'description': 'Samples with mutation data (44 samples)', 'sampleListId': 'all_stjude_2013_sequenced', 'studyId': 'all_stjude_2013'}
{'category': 'all_cases_in_study', 'name': 'Al

In [23]:
def get_study_sample_lists(study_id):
    """Fetch study list IDs for a specific study"""
    url = f"{BASE_URL}/studies/{study_id}/sample-lists"
    response = requests.get(url, headers=HEADERS)

    if response.status_code == 200:
        return response.json()
    else:
        print(f"Error {response.status_code}: {response.text}")
        return None

# Example Usage
study_id = "pptc_2019" 
study_sample_lists = get_study_sample_lists(study_id)

# not working, says there are no sample lists found)
# if mutations:
#     print("✅ Study sample lists:")
#     for sample_lists in study_sample_lists:  # Print first 10 results
#         print(sample_lists)
# else:
#     print("❌ No sample lists found.")

study_sample_lists

[{'category': 'all_cases_with_mrna_rnaseq_data',
  'name': 'Samples with mRNA data (RNA Seq)',
  'description': 'Samples with mRNA expression data (244 samples)',
  'sampleListId': 'pptc_2019_rna_seq_mrna',
  'studyId': 'pptc_2019'},
 {'category': 'all_cases_in_study',
  'name': 'All samples',
  'description': 'All samples (261 samples)',
  'sampleListId': 'pptc_2019_all',
  'studyId': 'pptc_2019'},
 {'category': 'all_cases_with_cna_data',
  'name': 'Samples with CNA data',
  'description': 'Samples with CNA data (252 samples)',
  'sampleListId': 'pptc_2019_cna',
  'studyId': 'pptc_2019'},
 {'category': 'all_cases_with_mutation_and_cna_data',
  'name': 'Samples with mutation and CNA data',
  'description': 'Samples with mutation and CNA data (232 samples)',
  'sampleListId': 'pptc_2019_cnaseq',
  'studyId': 'pptc_2019'},
 {'category': 'all_cases_with_mutation_and_cna_and_mrna_data',
  'name': 'Complete samples',
  'description': 'Samples with mutation, CNA and expression data (222 samp

In [35]:
import requests

BASE_URL = "https://www.cbioportal.org/api"
HEADERS = {"Content-Type": "application/json"}

def get_mutations(molecular_profile_id, sample_list_id):
    """Fetch mutation data for a specific molecular profile using a sample list ID"""
    url = f"{BASE_URL}/molecular-profiles/{molecular_profile_id}/mutations/fetch"
    payload = {"sampleListId": sample_list_id}  # Include required parameter in the payload
    response = requests.post(url, headers=HEADERS, json=payload)

    if response.status_code == 200:
        return response.json()
    else:
        print(f"Error {response.status_code}: {response.text}")
        return None

# Example Usage
molecular_profile_id = "pptc_2019_mutations" 
sample_list_id = "pptc_2019_mutations"  # Replace with a valid sample list ID
mutations = get_mutations(molecular_profile_id, sample_list_id)

if mutations:
    print("✅ Mutation Data:")
    for mutation in mutations:
        print(mutation)
else:
    print("❌ No mutation data found.")



❌ No mutation data found.


In [36]:
import requests

BASE_URL = "https://www.cbioportal.org/api"
HEADERS = {"Content-Type": "application/json"}

def get_sample_ids(study_id):
    """Fetch case data for the study and extract sample IDs."""
    url = f"{BASE_URL}/studies/{study_id}/cases"
    response = requests.get(url, headers=HEADERS)
    
    if response.status_code == 200:
        cases = response.json()
        # Extract the sampleId from each case
        sample_ids = [case.get("sampleId") for case in cases if case.get("sampleId")]
        return sample_ids
    else:
        print(f"Error {response.status_code}: {response.text}")
        return None

study_id = "pptc_2019"
sample_ids = get_sample_ids(study_id)

if sample_ids:
    print("Sample IDs:")
    for sid in sample_ids:
        print(sid)
else:
    print("No sample IDs found.")


Error 404: {"timestamp":1742314035276,"status":404,"error":"Not Found","path":"/api/studies/pptc_2019/cases"}
No sample IDs found.


In [33]:
import requests

BASE_URL = "https://www.cbioportal.org/api"
HEADERS = {"Content-Type": "application/json"}

def get_case_lists(study_id):
    """Fetch case lists for a specific study."""
    url = f"{BASE_URL}/studies/{study_id}/case-lists"
    response = requests.get(url, headers=HEADERS)
    if response.status_code == 200:
        return response.json()
    else:
        print(f"Error {response.status_code}: {response.text}")
        return None

study_id = "pptc_2019"
case_lists = get_case_lists(study_id)

if case_lists:
    # Inspect all case lists
    for cl in case_lists:
        print(f"Case List ID: {cl.get('caseListId')}")
        sample_ids = cl.get("sampleIds", [])
        print("Sample IDs:")
        for sid in sample_ids:
            print(sid)
        print("-" * 40)
else:
    print("No case lists found.")


Error 404: {"timestamp":1742313215625,"status":404,"error":"Not Found","path":"/api/studies/pptc_2019/case-lists"}
No case lists found.


In [34]:
import requests

BASE_URL = "https://www.cbioportal.org/api"
HEADERS = {"Content-Type": "application/json"}

def get_molecular_profile(molecular_profile_id):
    """Fetch molecular profile details including sample IDs."""
    url = f"{BASE_URL}/molecular-profiles/{molecular_profile_id}"
    response = requests.get(url, headers=HEADERS)
    if response.status_code == 200:
        return response.json()
    else:
        print(f"Error {response.status_code}: {response.text}")
        return None

molecular_profile_id = "pptc_2019_mutations"
profile = get_molecular_profile(molecular_profile_id)

if profile and "sampleIds" in profile:
    sample_ids = profile["sampleIds"]
    print("✅ Sample IDs:")
    for sid in sample_ids:
        print(sid)
else:
    print("❌ No sample IDs found.")


❌ No sample IDs found.
