In [30]:
import requests
import json
import pandas as pd

In [31]:
cases_endpt = 'https://api.gdc.cancer.gov/cases'
files_endpt = "https://api.gdc.cancer.gov/files"

# The 'fields' parameter is passed as a comma-separated string of single names.
fields = [
    #"file_name",
    #"analysis.workflow_type",
    #"experimental_strategy",
    "cases.project.primary_site",
    #"cases.project.project_id"
]

fields = ','.join(fields)

In [39]:
# This set of filters is nested under an 'and' operator.
filters = {
    "op": "and",
    "content":[
        {
        "op": "in",
        "content":{
            "field": "files.data_type",
            "value": ["Gene Expression Quantification"]
            }
        },
        {
        "op": "in",
        "content":{
               "field": "files.analysis.workflow_type",
                "value": ["HTSeq - FPKM"]
                }
        },
        {
        "op": "in",
        "content":{
            "field": "files.data_format",
            "value": ["TXT"]
            }
        }
    ]
}

In [40]:
# A POST is used, so the filter parameters can be passed directly as a Dict object.
params = {
    "filters": filters,
    "fields": fields,
    "format": "TSV",
    "size": "100000",
    "return_type": "manifest"
    }

In [41]:
# The parameters are passed to 'json' rather than 'params' in this case
response = requests.post(files_endpt, headers = {"Content-Type": "application/json"}, json = params)

print(response.content.decode("utf-8"))

id	filename	md5	size	state
33daf7a6-92d6-4193-af31-43ba0dc42f7d	e78cab9d-1b43-4416-8dbc-78d51558d895.FPKM.txt.gz	95639d4bee15a24f4c2d1547676410f4	514368	released
c469b713-b5eb-45f4-8562-aca7d9f5e6d1	eeddee84-15ec-401f-95ee-de2bb7015168.FPKM.txt.gz	6142a426bdbe7537210fc00c7b03ebbb	545236	released
bbdac6db-5362-48fc-866c-94720ce421c8	45197059-d0fc-49bb-819b-bd7621427236.FPKM.txt.gz	e51d0355e4c5e9c945f341f644cd8d75	537717	released
d386a312-190e-40f8-85b9-9022ab696d91	86cb28d4-5dcd-4e7e-a4c9-c5a5b16a9739.FPKM.txt.gz	90055bd6e2f7a959bafe74fa2ce1fad6	562147	released
5e8d1839-40ab-432b-9262-6a72540c5e18	9f223240-1020-4450-a6ec-a6168a9211e5.FPKM.txt.gz	968c18169c965ee682a01222ecffb1e6	568707	released
095a793d-715d-4c45-9f37-84f2b5439a96	d7be2882-fa19-4a0f-937e-286066dd0642.FPKM.txt.gz	9cb11dc26db38aa0b28755fa30231f8e	568460	released
151ac13e-1e9f-40c0-97cd-5afb46a5bd3a	297978f4-9907-482f-89db-3a0a2a0f19a8.FPKM.txt.gz	d66e074e7840bdd1da427f3342b2dd8c	564037	released
302f9260-5ed6-45ff-b6e7-5ac3

In [42]:
len(response.content.decode("utf-8"))

1561804

In [43]:
manifest = open("data/manifest.txt","w")

In [44]:
manifest.write(response.content.decode("utf-8"))

In [45]:
manifest.close()