In [1]:
import os
import requests
from pathlib import Path
import zipfile

In [2]:
BASE_URL = "https://download.open.fda.gov/drug/label/"
URLS = [
    f"{BASE_URL}drug-label-{i:04d}-of-0013.json.zip"
    for i in range(1, 14)
]

In [3]:
def download_openfda_labels(save_dir: str, keep_zip=False):
    Path(save_dir).mkdir(parents=True, exist_ok=True)

    for url in URLS:
        zip_path = os.path.join(save_dir, os.path.basename(url))
        print(f"Downloading {url} → {zip_path}")

        response = requests.get(url, stream=True)
        response.raise_for_status()
        with open(zip_path, "wb") as f:
            for chunk in response.iter_content(chunk_size=8192):
                if chunk:
                    f.write(chunk)
        print(f"Saved: {zip_path}")

        with zipfile.ZipFile(zip_path, "r") as zip_ref:
            zip_ref.extractall(save_dir)
            print(f"Extracted: {zip_ref.namelist()}")

        if not keep_zip:
            os.remove(zip_path)
            print(f"Removed zip: {zip_path}")

In [None]:
if __name__ == "__main__":
    target_directory = "../data/openfda/original/"
    download_openfda_labels(target_directory)