# Array Express Programming Access

Uses the method listed in their website. 

Ref: https://www.ebi.ac.uk/arrayexpress/help/programmatic_access.html

In [1]:
import requests
from zipfile import ZipFile
from io import BytesIO
import urllib

# Get a list of files along with their url for a given accession number of an experiment in the Array Express database

In [2]:
def getResponse(accession_num):
    '''This function returns the list of available files and their urls given an accession_number
       Input: The Array Express accession number of the experiment
       Output: A list of files available for download along with their url 
       Input type: str
       Output type: list of tuples'''
    
    base_url = 'https://www.ebi.ac.uk/arrayexpress/json/v3/experiments/{}/files/'
    url = base_url.format(accession_num)
    response = requests.get(url)
    if response.status_code!= 200:
        return "Bad Response"
    
    jdict = response.json()

    if jdict['files']['total-experiments']>1:
        exp_dict = {}
        for num_experiment in range(jdict['files']['total-experiments']):
            experiment_dict = jdict['files']['experiment'][num_experiment]
            exp_dict[experiment_dict['accession']] = [(fdict['name'],fdict['url']) for fdict in experiment_dict['file']]
        return exp_dict
    
    experiment_dict = jdict['files']['experiment']
    list_of_files = [(fdict['name'],fdict['url']) for fdict in experiment_dict['file']]
    return list_of_files

    
files = getResponse('E-TABM-337')

# View the filenames

In [3]:
files

[('E-TABM-337.eSet.r',
  'https://www.ebi.ac.uk/arrayexpress/files/E-TABM-337/E-TABM-337.eSet.r'),
 ('E-TABM-337.idf.txt',
  'https://www.ebi.ac.uk/arrayexpress/files/E-TABM-337/E-TABM-337.idf.txt'),
 ('E-TABM-337.idf.txt_original',
  'https://www.ebi.ac.uk/arrayexpress/files/E-TABM-337/E-TABM-337.idf.txt_original'),
 ('E-TABM-337.processed.1.zip',
  'https://www.ebi.ac.uk/arrayexpress/files/E-TABM-337/E-TABM-337.processed.1.zip'),
 ('E-TABM-337.raw.1.zip',
  'https://www.ebi.ac.uk/arrayexpress/files/E-TABM-337/E-TABM-337.raw.1.zip'),
 ('E-TABM-337.raw.2.zip',
  'https://www.ebi.ac.uk/arrayexpress/files/E-TABM-337/E-TABM-337.raw.2.zip'),
 ('E-TABM-337.README.txt',
  'https://www.ebi.ac.uk/arrayexpress/files/E-TABM-337/E-TABM-337.README.txt'),
 ('E-TABM-337.sdrf.txt',
  'https://www.ebi.ac.uk/arrayexpress/files/E-TABM-337/E-TABM-337.sdrf.txt'),
 ('A-MEXP-864.adf.txt',
  'https://www.ebi.ac.uk/arrayexpress/files/A-MEXP-864/A-MEXP-864.adf.txt'),
 ('A-MEXP-864.adf.xls',
  'https://www.ebi.

Here, we are mainly interested in E-TABM-337.processed.1.zip file. The following function shows how to download and extract a zip file from the arrayexpress server

# Download and Extract Zip

In [4]:
def extractZip(fileurl,output_file_path):
    '''Given an input fileurl and an output_file_path, this function extracts a zipped file to that specific path'''
    
    request = requests.get(fileurl)
    zipfile = ZipFile(BytesIO(request.content))
    zipfile.extractall(output_file_path)
    return

extractZip(files[3][1],'ArrayExpressData/E-TABM-337')

# Download a text file

We are also interested in text files which are very common and contains important information. 

In [5]:
def downloadText(fileurl,output_file_path,output_file_name='myfile.txt'):
    '''Given an input fileurl and an output_file_path, this function extracts a text file to that specific path'''
    
    urllib.request.urlretrieve(fileurl, output_file_path+output_file_name)
    return

file_url = 'https://www.ebi.ac.uk/arrayexpress/files/A-MEXP-864/A-MEXP-864.reporters.txt'

downloadText(file_url,'ArrayExpressData/E-TABM-337/','Contig-ORF.txt')

*Please note that the request call often does not list all the filenames that are available for a particular experiment and this is not related to pagination. The Api is not updated.*