In [1]:
import requests
import json
import pandas as pd

In [2]:
files_endpt = "https://api.gdc.cancer.gov/files"

# The 'fields' parameter is passed as a comma-separated string of single names.
fields = [
    #"file_name",
    #"analysis.workflow_type",
    #"experimental_strategy",
    "cases.project.primary_site",
    #"cases.project.project_id"
]

fields = ','.join(fields)

In [61]:
# This set of filters is nested under an 'and' operator.
filters = {
    "op": "and",
    "content":[
        {
        "op": "in",
        "content":{
            "field": "files.data_type",
            "value": ["Gene Expression Quantification"]
            }
        },
        {
        "op": "in",
        "content":{
               "field": "files.analysis.workflow_type",
                "value": ["HTSeq - Counts"]
                }
        },
        {
        "op": "in",
        "content":{
            "field": "files.data_format",
            "value": ["TXT"]
            }
        },
        {
        "op": "in",
        "content":{
            "field": "cases.project.project_id",
            "value": ["TCGA-COAD"]
            }
        }
        
    ]
}

In [62]:
# A POST is used, so the filter parameters can be passed directly as a Dict object.
params = {
    "filters": filters,
    "fields": fields,
    "format": "TSV",
    "size": "100000",
    "return_type": "manifest"
    }

In [63]:
# The parameters are passed to 'json' rather than 'params' in this case
response = requests.post(files_endpt, headers = {"Content-Type": "application/json"}, json = params)

print(response.content.decode("utf-8"))

id	filename	md5	size	state
ac2e69fd-8e13-4d95-8210-595d09f616c6	d15c1117-5ccd-47da-bb0a-51e5a71535ff.htseq.counts.gz	06215c71c3b1167e4b79df9dc613c57f	245736	released
7af900ab-0a1a-471e-89a2-f887e2dcf433	0319a4cc-ee3f-4c6f-83e1-7cdd5f865dc0.htseq.counts.gz	dc9fa2748ff54309e025f053905e1373	250057	released
4571794b-22d1-49fc-9b84-c4ec3e6411b8	4ea4ed16-8597-426c-9485-0fc8a8d81973.htseq.counts.gz	442f6994a8247397e9310248e7715de6	242518	released
24c0b854-79c3-436a-87a7-4d0bbfac626a	9e189255-33ba-4802-934b-dd8a7898ef0a.htseq.counts.gz	d15e5cea960e499ef776fe2d539b81d9	237651	released
87c522d2-986f-44d3-9af2-2cec01c5d9e0	a6cd69d5-ea54-4183-aac5-2b6efb311db3.htseq.counts.gz	137e342c7e2bb0018e72deab17fc36bf	243891	released
6a750710-5ed9-4d24-b2bf-3a4e3211878f	e53e1a83-1979-4e12-bbb7-79b37d0cfe03.htseq.counts.gz	78e409955c6516c216bedbb43ce5e0c7	247315	released
7d560f54-fa24-429d-a51d-5c7d431c38bf	2d5fbcf8-4347-4787-b04d-e84c88316776.htseq.counts.gz	39798a5df25e1ad84a94d02750c12310	249506	released


In [64]:
len(response.content.decode("utf-8"))

72446

In [65]:
with open("data/manifest.txt","w") as manifest:
    manifest.write(response.content.decode("utf-8"))

In [66]:
manifest.close()