# FILER API endpoints in Jupyter

FILER data access API -- 2021-04-13 powerpoint examples


In [1]:
import urllib.parse
import json, requests, pprint

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

#### FILER web API endpoints:  

`get_data_region`  

`get_metadata`  

`get_overlapping_tracks_by_coord`  

In [2]:
request_url_FILER = 'https://tf.lisanwanglab.org/FILER'
request_url_GADB = 'https://tf.lisanwanglab.org/GADB/www'

endpt_data_region = 'get_data_region.php' 
endpt_metadata = 'get_metadata.php'
endpt_overlap_track = 'get_overlapping_tracks_by_coord.php'

<br>  

### Helper functions

In [3]:
def get_query_url(requestURL, endpoint):
    return ''.join([requestURL, '/', endpoint, '?'])


def process_request(requestUri, payload=None):
    ''' process request '''
    
    response = requests.get(requestUri, params=payload)
    rjson = None
    
    if response:
        print('Success!')
    else:
        print('An error has occurred:' + str(response.status_code))
    
    try: 
        rjson = response.json()
        print('Response JSON decoded')
    except ValueError:
        print('Error occurred: response JSON decode failed')
        print('response.content =\t', response.content.decode())
    
    return rjson


<br>  

# Accessing individual data tracks in FILER (Web)

2021 April 13 FILER API ppt slide #5

<div>
<img src="attachment:8c033777-710d-4364-8ad4-d3916f0bc391.png" width="700"/>
</div>  



#### get_data_region - example 1:

In [4]:
requestURL = 'https://tf.lisanwanglab.org/FILER'
endpt = 'get_data_region.php'
_params = {
    'region':'chr1:10000-1500000',
    'track':'NGEN000610'
}


## get endpoint URL
_url_endpoint = get_query_url(requestURL, endpt)

## process request
rjson = process_request(_url_endpoint, _params)



print(_url_endpoint)
print(len(rjson))
if isinstance(rjson, list):
    pprint.pprint(rjson[:2])
else:
    pprint.pprint(rjson)


Success!
Response JSON decoded
https://tf.lisanwanglab.org/FILER/get_data_region.php?
84
[{'chr': 'chr1',
  'end': 780403,
  'name': 'Peak_1892',
  'pValue': 1405.68005,
  'peak': 1193,
  'qValue': 1402.31372,
  'score': 133,
  'signalValue': 62.055,
  'start': 777957,
  'strand': '.'},
 {'chr': 'chr1',
  'end': 827904,
  'name': 'Peak_8748',
  'pValue': 304.29565,
  'peak': 674,
  'qValue': 301.79691,
  'score': 36,
  'signalValue': 20.25406,
  'start': 826644,
  'strand': '.'}]


#### get_data_region - example 2:

In [5]:
requestURL = 'https://tf.lisanwanglab.org/FILER'
endpt = 'get_data_region.php'
_params = {
    'region':'chr1:10000-900000',
    'track':'NGEN000610', 
    'includeMetadata':1
}


## get endpoint URL
_url_endpoint = get_query_url(requestURL, endpt)

## process request
rjson = process_request(_url_endpoint, _params)



print(_url_endpoint)
print(len(rjson))
if isinstance(rjson, list):
    pprint.pprint(rjson[:2])
else:
    pprint.pprint(rjson)

Success!
Response JSON decoded
https://tf.lisanwanglab.org/FILER/get_data_region.php?
6
{'Identifier': 'NGEN000610',
 'features': [{'chr': 'chr1',
               'end': 780403,
               'name': 'Peak_1892',
               'pValue': 1405.68005,
               'peak': 1193,
               'qValue': 1402.31372,
               'score': 133,
               'signalValue': 62.055,
               'start': 777957,
               'strand': '.'},
              {'chr': 'chr1',
               'end': 827904,
               'name': 'Peak_8748',
               'pValue': 304.29565,
               'peak': 674,
               'qValue': 301.79691,
               'score': 36,
               'signalValue': 20.25406,
               'start': 826644,
               'strand': '.'},
              {'chr': 'chr1',
               'end': 877445,
               'name': 'Peak_48086',
               'pValue': 6.17021,
               'peak': 171,
               'qValue': 4.50416,
               'score': 10,
      

<br>  

## Accessing track metadata

2021 April 13 FILER API ppt slide #6

<div>
<img src="attachment:3be30a25-ccae-45eb-ba3e-809d8e6110ae.png" width="600"/>
</div>  


#### get_metadata - example 1

In [6]:
requestURL = 'https://tf.lisanwanglab.org/FILER'
endpt = 'get_metadata.php'
_params = {
    'genomeBuild':'hg19', 
    'dataSource':'DASHR2', 
    'tissueCategory':'Connective Tissue'
}


## get endpoint URL
_url_endpoint = get_query_url(requestURL, endpt)

## process request
rjson = process_request(_url_endpoint, _params)



print(_url_endpoint)
print(len(rjson))
if isinstance(rjson, list):
    pprint.pprint(rjson[:2])
else:
    pprint.pprint(rjson)

Success!
Response JSON decoded
https://tf.lisanwanglab.org/FILER/get_metadata.php?
48
[{'Antibody': 'Not applicable',
  'Assay': 'short total RNA-Seq',
  'Biological replicate(s)': 'Not applicable',
  'Biosample type': 'primary cell',
  'Biosamples term id': 'Not applicable',
  'Data Category': 'Called peaks',
  'Data Source': 'DASHR2',
  'Date added to GADB': '7/1/2018',
  'Downloaded date': '6/30/2018',
  'ENCODE Experiment id': 'Not applicable',
  'File format': 'bed bed6+DASHR',
  'File name': 'articular-chondrocyte-of-knee-joint-rep1_ENCSR000CVB_peaks_annot.bed.gz',
  'File size': 273634,
  'Genome build': 'hg19',
  'Identifier': 'NGDS000325',
  'Link out URL': 'http://dashr2.lisanwanglab.org/index.php',
  'Number of intervals': 2599,
  'Output type': 'annotated peaks',
  'Processed File Download URL': 'https://tf.lisanwanglab.org/GADB/Annotationtracks/DASHRv2/short_total_RNA-seq/hg19/ENCODE_dataportal_hg19/articular-chondrocyte-of-knee-joint-rep1_ENCSR000CVB_peaks_annot.bed.gz',


#### get_metadata - example 2

In [7]:
requestURL = 'https://tf.lisanwanglab.org/FILER'
endpt = 'get_metadata.php'
_params = {
    'genomeBuild':'hg19', 
    'cellType':'IMR-90'
}


## get endpoint URL
_url_endpoint = get_query_url(requestURL, endpt)

## process request
rjson = process_request(_url_endpoint, _params)



print(_url_endpoint)
print(len(rjson))
if isinstance(rjson, list):
    pprint.pprint(rjson[:2])
else:
    pprint.pprint(rjson)

Success!
Response JSON decoded
https://tf.lisanwanglab.org/FILER/get_metadata.php?
388
[{'Antibody': 'Not applicable',
  'Assay': 'RNA-PET',
  'Biological replicate(s)': '1, 2',
  'Biosample type': 'cell line',
  'Biosamples term id': 'EFO:0001196',
  'Data Category': 'Clusters',
  'Data Source': 'ENCODE',
  'Date added to GADB': '8/10/2018',
  'Downloaded date': '6/4/2018',
  'ENCODE Experiment id': 'ENCSR000BCT',
  'File format': 'bed bed12',
  'File name': 'ENCFF000LGM.bed.gz',
  'File size': 2625288,
  'Genome build': 'hg19',
  'Identifier': 'NGEN000048',
  'Link out URL': 'https://www.encodeproject.org',
  'Number of intervals': 78633,
  'Output type': 'clusters',
  'Processed File Download URL': 'https://tf.lisanwanglab.org/GADB/Annotationtracks/ENCODE/data/RNA-PET/bed12/hg19/ENCFF000LGM.bed.gz',
  'Processed file md5': 'b22f5eaef445e36851369aada31ce067',
  'Raw File URL': 'https://www.encodeproject.org/files/ENCFF000LGM/@@download/ENCFF000LGM.bigBed',
  'Raw file download': 'wge

#### get_metadata - example 3

In [8]:
requestURL = 'https://tf.lisanwanglab.org/FILER'
endpt = 'get_metadata.php'
_params = {
    'genomeBuild':'hg19', 
    'assayType':'DNase-seq'
}


## get endpoint URL
_url_endpoint = get_query_url(requestURL, endpt)

## process request
rjson = process_request(_url_endpoint, _params)



print(_url_endpoint)
print(len(rjson))
if isinstance(rjson, list):
    pprint.pprint(rjson[:2])
else:
    pprint.pprint(rjson)

Success!
Response JSON decoded
https://tf.lisanwanglab.org/FILER/get_metadata.php?
1785
[{'Antibody': 'Not applicable',
  'Assay': 'DNase-seq',
  'Biological replicate(s)': 1,
  'Biosample type': 'cell line',
  'Biosamples term id': 'EFO:0005713',
  'Data Category': 'Called peaks',
  'Data Source': 'ENCODE',
  'Date added to GADB': '8/10/2018',
  'Downloaded date': '6/4/2018',
  'ENCODE Experiment id': 'ENCSR000EID',
  'File format': 'bed narrowPeak',
  'File name': 'ENCFF237NQC.bed.gz',
  'File size': 1412420,
  'Genome build': 'hg19',
  'Identifier': 'NGEN004055',
  'Link out URL': 'https://www.encodeproject.org',
  'Number of intervals': 144749,
  'Output type': 'peaks',
  'Processed File Download URL': 'https://tf.lisanwanglab.org/GADB/Annotationtracks/ENCODE/data/DNase-seq/narrowpeak/hg19/ENCFF237NQC.bed.gz',
  'Processed file md5': '5691284a892227cb4d024b6b583dc934',
  'Raw File URL': 'https://www.encodeproject.org/files/ENCFF237NQC/@@download/ENCFF237NQC.bigBed',
  'Raw file dow

#### get_metadata - example 4

In [9]:
requestURL = 'https://tf.lisanwanglab.org/FILER'
endpt = 'get_metadata.php'
_params = {
    'genomeBuild':'hg19', 
    'dataSource':'ROADMAP', 
    'assayType':'ChIP-seq'
}


## get endpoint URL
_url_endpoint = get_query_url(requestURL, endpt)

## process request
rjson = process_request(_url_endpoint, _params)



print(_url_endpoint)
print(len(rjson))
if isinstance(rjson, list):
    pprint.pprint(rjson[:2])
else:
    pprint.pprint(rjson)

Success!
Response JSON decoded
https://tf.lisanwanglab.org/FILER/get_metadata.php?
4800
[{'Antibody': 'consolidated',
  'Assay': 'ChIP-seq',
  'Biological replicate(s)': 'Not applicable',
  'Biosample type': 'cell line',
  'Biosamples term id': 'Not applicable',
  'Data Category': 'Genomic annotation',
  'Data Source': 'ROADMAP',
  'Date added to GADB': '8/25/2018',
  'Downloaded date': '8/15/2018',
  'ENCODE Experiment id': 'Not applicable',
  'File format': 'bed bed4',
  'File name': 'E114_15_coreMarks_mnemonics.bed.gz',
  'File size': 2685062,
  'Genome build': 'hg19',
  'Identifier': 'NGRM005136',
  'Link out URL': 'http://www.roadmapepigenomics.org/',
  'Number of intervals': 464406,
  'Output type': 'ChromHMM',
  'Processed File Download URL': 'https://tf.lisanwanglab.org/GADB/Annotationtracks/RoadMap/ChIP-seq/bed4/hg19/E114_15_coreMarks_mnemonics.bed.gz',
  'Processed file md5': '5420560442f55c16af53adee779335a7',
  'Raw File URL': 'https://egg2.wustl.edu/roadmap/data/byFileType

<br>  

2021 April 13 FILER API ppt slide #7

<div>
<img src="attachment:3086a2cf-1554-46f9-9bba-b5aee4461e1b.png" width="600"/>
</div>  


#### get_metadata - example 5:

In [10]:
requestURL = 'https://tf.lisanwanglab.org/FILER'
endpt = 'get_metadata.php'
_params = {
    'genomeBuild':'hg19', 
    'filterString':'."Data Source"=="DASHR2"' 
}


## get endpoint URL
_url_endpoint = get_query_url(requestURL, endpt)

## process request
rjson = process_request(_url_endpoint, _params)



print(_url_endpoint)
print(len(rjson))
if isinstance(rjson, list):
    pprint.pprint(rjson[:2])
else:
    pprint.pprint(rjson)

Success!
Response JSON decoded
https://tf.lisanwanglab.org/FILER/get_metadata.php?
632
[{'Antibody': 'Not applicable',
  'Assay': 'short total RNA-Seq',
  'Biological replicate(s)': 'Not applicable',
  'Biosample type': 'immortalized cell line',
  'Biosamples term id': 'Not applicable',
  'Data Category': 'Called peaks',
  'Data Source': 'DASHR2',
  'Date added to GADB': '7/1/2018',
  'Downloaded date': '6/30/2018',
  'ENCODE Experiment id': 'Not applicable',
  'File format': 'bed bed6+DASHR',
  'File name': 'A375-rep1_ENCSR376XXO_peaks_annot.bed.gz',
  'File size': 276558,
  'Genome build': 'hg19',
  'Identifier': 'NGDS000305',
  'Link out URL': 'http://dashr2.lisanwanglab.org/index.php',
  'Number of intervals': 2613,
  'Output type': 'annotated peaks',
  'Processed File Download URL': 'https://tf.lisanwanglab.org/GADB/Annotationtracks/DASHRv2/short_total_RNA-seq/hg19/ENCODE_dataportal_hg19/A375-rep1_ENCSR376XXO_peaks_annot.bed.gz',
  'Processed file md5': 'afc1f3ce10dc2487c6b256b8f8

#### get_metadata - example 6:

In [11]:
requestURL = 'https://tf.lisanwanglab.org/FILER'
endpt = 'get_metadata.php'
_params = {
    'genomeBuild':'hg19', 
    'filterString':'.Identifier=="NGEN006898"' 
}


## get endpoint URL
_url_endpoint = get_query_url(requestURL, endpt)

## process request
rjson = process_request(_url_endpoint, _params)



print(_url_endpoint)
print(len(rjson))
if isinstance(rjson, list):
    pprint.pprint(rjson[:2])
else:
    pprint.pprint(rjson)

Success!
Response JSON decoded
https://tf.lisanwanglab.org/FILER/get_metadata.php?
1
[{'Antibody': 'H3K27ac-human',
  'Assay': 'ChIP-seq',
  'Biological replicate(s)': '1, 2',
  'Biosample type': 'cell line',
  'Biosamples term id': 'EFO:0002095',
  'Data Category': 'Called peaks',
  'Data Source': 'ENCODE',
  'Date added to GADB': '8/10/2018',
  'Downloaded date': '6/4/2018',
  'ENCODE Experiment id': 'ENCSR391NPE',
  'File format': 'bed narrowPeak',
  'File name': 'ENCFF422DXB.bed.gz',
  'File size': 1292904,
  'Genome build': 'hg19',
  'Identifier': 'NGEN006898',
  'Link out URL': 'https://www.encodeproject.org',
  'Number of intervals': 45976,
  'Output type': 'replicated peaks',
  'Processed File Download URL': 'https://tf.lisanwanglab.org/GADB/Annotationtracks/ENCODE/data/ChIP-seq/narrowpeak/hg19/1/ENCFF422DXB.bed.gz',
  'Processed file md5': 'abe390cc298fb0ec736a15571f96df63',
  'Raw File URL': 'https://www.encodeproject.org/files/ENCFF422DXB/@@download/ENCFF422DXB.bigBed',
  'R

#### get_metadata - example 7:

In [12]:
requestURL = 'https://tf.lisanwanglab.org/FILER'
endpt = 'get_metadata.php'
_params = {
    'genomeBuild':'hg19', 
    'filterString':'."File name" | contains("ENCSR376XXO")' 
}


## get endpoint URL
_url_endpoint = get_query_url(requestURL, endpt)

## process request
rjson = process_request(_url_endpoint, _params)



print(_url_endpoint)
print(len(rjson))
if isinstance(rjson, list):
    pprint.pprint(rjson[:2])
else:
    pprint.pprint(rjson)

Success!
Response JSON decoded
https://tf.lisanwanglab.org/FILER/get_metadata.php?
4
[{'Antibody': 'Not applicable',
  'Assay': 'short total RNA-Seq',
  'Biological replicate(s)': 'Not applicable',
  'Biosample type': 'immortalized cell line',
  'Biosamples term id': 'Not applicable',
  'Data Category': 'Called peaks',
  'Data Source': 'DASHR2',
  'Date added to GADB': '7/1/2018',
  'Downloaded date': '6/30/2018',
  'ENCODE Experiment id': 'Not applicable',
  'File format': 'bed bed6+DASHR',
  'File name': 'A375-rep1_ENCSR376XXO_peaks_annot.bed.gz',
  'File size': 276558,
  'Genome build': 'hg19',
  'Identifier': 'NGDS000305',
  'Link out URL': 'http://dashr2.lisanwanglab.org/index.php',
  'Number of intervals': 2613,
  'Output type': 'annotated peaks',
  'Processed File Download URL': 'https://tf.lisanwanglab.org/GADB/Annotationtracks/DASHRv2/short_total_RNA-seq/hg19/ENCODE_dataportal_hg19/A375-rep1_ENCSR376XXO_peaks_annot.bed.gz',
  'Processed file md5': 'afc1f3ce10dc2487c6b256b8f8b3

<br>  

<br>  

## Accessing overlapping tracks / metadata 

<br>  

2021 April 13 FILER API ppt slide #8

<div>
<img src="attachment:d981c220-d483-4db7-9332-84d145573a25.png" width="600"/>
</div>  


#### get_overlapping_tracks_by_coord - example 1

In [13]:
requestURL = 'https://tf.lisanwanglab.org/FILER'
endpt = 'get_overlapping_tracks_by_coord.php'
_params = {
    'region':'chr1:1300000-1400000', 
    'genomeBuild':'hg19', 
    'outputFormat':'json', 
    'filterString':'.metadata."Data source"=="DASHR2"' 
}


## get endpoint URL
_url_endpoint = get_query_url(requestURL, endpt)

## process request
rjson = process_request(_url_endpoint, _params)



print(_url_endpoint)
print(len(rjson))
if isinstance(rjson, list):
    pprint.pprint(rjson[:2])
else:
    pprint.pprint(rjson)

Success!
Response JSON decoded
https://tf.lisanwanglab.org/FILER/get_overlapping_tracks_by_coord.php?
0
[]


<br>  

2021 April 13 FILER API ppt slide #9

<div>
<img src="attachment:aa8eb0f1-2d5d-4e7f-af19-911be13bb360.png" width="600"/>
</div>  





#### get_overlapping_tracks_by_coord - example 2

In [14]:
requestURL = 'https://tf.lisanwanglab.org/FILER'
endpt = 'get_overlapping_tracks_by_coord.php'
_params = {
    'region':'chr1:1400000-1500000', 
    'genomeBuild':'hg19', 
    'outputFormat':'json', 
    'filterString':'.metadata."Data source" =="DASHR2" and .metadata."Tissue category"=="Lung"' 
}


## get endpoint URL
_url_endpoint = get_query_url(requestURL, endpt)

## process request
rjson = process_request(_url_endpoint, _params)



print(_url_endpoint)
print(len(rjson))
if isinstance(rjson, list):
    pprint.pprint(rjson[:2])
else:
    pprint.pprint(rjson)

Success!
Response JSON decoded
https://tf.lisanwanglab.org/FILER/get_overlapping_tracks_by_coord.php?
0
[]


#### get_overlapping_tracks_by_coord - example 3

In [15]:
requestURL = 'https://tf.lisanwanglab.org/FILER'
endpt = 'get_overlapping_tracks_by_coord.php'
_params = {
    'genomeBuild':'hg19', 
    'region':'chr1:1103243-1103243', 
    'filterString':'.', 
    'outputFormat':'json'
}


## get endpoint URL
_url_endpoint = get_query_url(requestURL, endpt)

## process request
rjson = process_request(_url_endpoint, _params)



print(_url_endpoint)
print(len(rjson))
if isinstance(rjson, list):
    pprint.pprint(rjson[:2])
else:
    pprint.pprint(rjson)

Success!
Response JSON decoded
https://tf.lisanwanglab.org/FILER/get_overlapping_tracks_by_coord.php?
1859
[{'Antibody': 'Not applicable',
  'Assay': 'DNase-seq',
  'Biological replicate(s)': 1,
  'Biosample type': 'cell line',
  'Biosamples term id': 'EFO:0005713',
  'Data Category': 'Called peaks',
  'Data Source': 'ENCODE',
  'Date added to GADB': '8/10/2018',
  'Downloaded date': '6/4/2018',
  'ENCODE Experiment id': 'ENCSR000EID',
  'File format': 'bed narrowPeak',
  'File name': 'ENCFF237NQC.bed.gz',
  'File size': 1412420,
  'Genome build': 'hg19',
  'Identifier': 'NGEN004055',
  'Link out URL': 'https://www.encodeproject.org',
  'Number of intervals': 144749,
  'Output type': 'peaks',
  'Processed File Download URL': 'https://tf.lisanwanglab.org/GADB/Annotationtracks/ENCODE/data/DNase-seq/narrowpeak/hg19/ENCFF237NQC.bed.gz',
  'Processed file md5': '5691284a892227cb4d024b6b583dc934',
  'Raw File URL': 'https://www.encodeproject.org/files/ENCFF237NQC/@@download/ENCFF237NQC.bigBe