In [None]:
import requests
import json
import pandas as pd

In [None]:
files_endpt = "https://api.gdc.cancer.gov/files"

# The 'fields' parameter is passed as a comma-separated string of single names.
fields = [
    "file_name",
    #"analysis.workflow_type",
    #"experimental_strategy",
    "cases.project.primary_site",
    "cases.project.disease_type",
    #"cases.project.project_id"
]

fields = ','.join(fields)

In [None]:
#miRNA filters
filters = {
    "op": "and",
    "content":[
        {
        "op": "in",
        "content":{
            "field": "files.data_type",
            "value": ["miRNA Expression Quantification"]
            }
        },
          {
        "op": "in",
        "content":{
               "field": "files.experimental_strategy",
                "value": ["miRNA-Seq"]
                }
        },
        {
        "op": "in",
        "content":{
            "field": "files.data_format",
            "value": ["TXT"]
            }
        },
        {
        "op": "in",
        "content":{
            "field": "cases.project.primary_site",
            "value": ["Breast", "Brain", "Kidney", "Bronchus and lung", "Thyroid gland", "Corpus uteri", "Prostate gland", "Ovary", "Stomach"]
            }
        }
        
    ]
}

In [None]:
# Colon filters
filters = {
    "op": "and",
    "content":[
        {
        "op": "in",
        "content":{
            "field": "files.data_type",
            "value": ["Gene Expression Quantification"]
            }
        },
        {
        "op": "in",
        "content":{
               "field": "files.analysis.workflow_type",
                "value": ["HTSeq - Counts"]
                }
        },
        {
        "op": "in",
        "content":{
            "field": "files.data_format",
            "value": ["TXT"]
            }
        },
        {
        "op": "in",
        "content":{
            "field": "cases.project.project_id",
            "value": ["TCGA-COAD"]
            }
        }
        
    ]
}

In [None]:
# A POST is used, so the filter parameters can be passed directly as a Dict object.
##remove manifest to have file fields
params = {
    "filters": filters,
    "fields": fields,
    "format": "TSV",
    "size": "100000"
    #"return_type": "manifest"
    }

In [None]:
# The parameters are passed to 'json' rather than 'params' in this case
response = requests.post(files_endpt, headers = {"Content-Type": "application/json"}, json = params)

print(response.content.decode("utf-8"))

In [None]:
len(response.content.decode("utf-8"))

In [None]:
with open("manifest.txt","w") as manifest:
    manifest.write(response.content.decode("utf-8"))
    manifest.close()

## Files

In [None]:
with open("files.txt","w") as files:
    files.write(response.content.decode("utf-8"))
    files.close()

In [None]:
df_files = pd.read_csv("files.txt", sep='\t', index_col=0)
df_files.drop("id", axis=1, inplace=True)
df_files.columns=['disease_type','primary_site']
df_files.head()

In [None]:
df_files.to_csv("files_new.dat", index=True, header=True)