In [158]:
%pip install pandas numpy requests
import requests
import pprint, json
import pandas as pd

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [159]:
#Insert SEER API Key here - this can be created by following instructions at this link: https://api.seer.cancer.gov/usage
SEER_API_KEY = "YOUR_API_KEY"

#Output Folder Location
outputFolder = "tumourTNMstaging/"

In [None]:
#A list of tumour types for which TNM staging is desired
tumourTypes = [
    "lung",
    "colon", "rectum", "anus",
    "stomach","liver", "esophagus", "bile_ducts_distal", "bile_ducts_intrahepat", "bile_ducts_perihilar",
    "prostate", "bladder", "kidney_parenchyma", "kidney_renal_pelvis",
    "cervix", "breast", "ovary",
    "skin","melanoma_skin",
    "bone"
]

#tumourTypes=['lymphoma']

#Set up the API key and headers
baseURL = "https://api.seer.cancer.gov/rest/staging/tnm"
headers = {
    "X-SEERAPI-Key":SEER_API_KEY
}

##First make a request to get a list of the versions of TNM staging
versionsURL = baseURL+"/versions"
response = requests.get(versionsURL, headers=headers)
#If request successful, pick the most recent version of TNM staging
if (response.status_code==200):
    TNMversions=json.loads(response.text)
    mostRecentVersion = max(TNMversions, key=lambda x: x['last_modified'])['version']
else:
    pprint.pprint(response._content)


#Loop through all tumour types for which staging is desired
for tumourType in tumourTypes:
    #Create a dataframe to serve as the output store
    tumourStaging=pd.DataFrame(columns=["Tumour type", "Staging TNM", "Staging item", "Staging item display", "Description", "Registrar Notes"])
    
    ##Next get a list of tables for a tumour type
    tableIDs={}
    tablesURL = baseURL+"/"+mostRecentVersion+"/schema/"+tumourType+"/tables"
    
    response = requests.get(tablesURL, headers=headers)
    if (response.status_code==200):
        tables=json.loads(response.text)

        #For debugging, print tables
        #pprint.pprint(tables)
        
        #Cycle through the response to find the table IDs for CLINICAL T, N and M
        tableIDs['Clinical T'] = [table for table in tables if table.get("title") == "Clinical T"][0]['id']
        tableIDs['Clinical N'] = [table for table in tables if table.get("title") == "Clinical N"][0]['id']
        tableIDs['Clinical M'] = [table for table in tables if table.get("title") == "Clinical M"][0]['id']
    
        #Cycle through the response to find the table IDs for PATHOLOGIC T, N and M

        #NB: For the anus tumour type, there is a spelling error in the NIH TNM v2.0 schema - 'Pathologic' is misspelt as 'Pathologicl'! This is a manual correction
        try:
            tableIDs['Pathologic T'] = [table for table in tables if table.get("title") == "Pathologic T"][0]['id']
        except IndexError:
            tableIDs['Pathologic T'] = [table for table in tables if table.get("title") == "Pathologicl T"][0]['id']
            
        tableIDs['Pathologic N'] = [table for table in tables if table.get("title") == "Pathologic N"][0]['id']
        tableIDs['Pathologic M'] = [table for table in tables if table.get("title") == "Pathologic M"][0]['id']
    
    else:
        pprint.pprint(response._content)
    
    
    ##Next, pull the specific table, for each of clinical and pathologic T,N,M
    for stagingComponent, tableID in tableIDs.items():
        pullTableURL= baseURL+"/"+mostRecentVersion+"/table/"+tableID
        response = requests.get(pullTableURL, headers=headers)
        if (response.status_code==200):
            tables=json.loads(response.text)
    
            #Create a table in the correct format, containing the return
            tableItem = pd.DataFrame(tables['rows'], columns=["Staging item", "Staging item display", "Description", "Registrar Notes"])
            tableItem["Tumour type"] = tumourType
            tableItem["Staging TNM"] = stagingComponent
    
            #Append the table to the master staging table
            tumourStaging = pd.concat([tumourStaging,tableItem])
            
            #pprint.pprint(tables['rows'])
    
        else:
            pprint.pprint(response._content)

    
    #Now write the full staging to a CSV
    tumourStaging.to_csv(outputFolder+tumourType.capitalize()+" NIH TNM Staging v"+mostRecentVersion+".csv", index=False)