In [2]:
import requests
import zipfile
import io
import json
import os

In [3]:
def get_download_metadata():
    r = requests.get('https://api.fda.gov/download.json')
    return r.json()

In [4]:
def get_file_paths(download_metadata, endpoint):
    partitions = download_metadata['results']['drug'][endpoint]['partitions']
    file_paths = [p['file'] for p in partitions]
    return file_paths

In [5]:
def download_file(from_path, to_dir, endpoint):
    r = requests.get(from_path) # call API to download zipped file from given path
    z = zipfile.ZipFile(io.BytesIO(r.content)) # read file content into ZipFile object
    for n in z.namelist(): # for each of the file names in the ZipFile 
        data = json.loads(z.read(n)) # load its data as a dictionary
        to_path = os.path.join(to_dir, endpoint, n) # assemble path to be written to
        os.makedirs(to_dir, exist_ok=True) # create directory if doesn't exist
        with open(to_path, 'w') as f:
            json.dump(data['results'], f) # write the results data to that path

In [6]:
def download_files(endpoints, to_dir):
    download_metadata = get_download_metadata()
    for ep in endpoints:
        file_paths = get_file_paths(download_metadata, ep)
        for fp in file_paths:
            download_file(fp, to_dir, ep)

In [7]:
download_files(['ndc', 'drugsfda', 'label'], 'data')