## Setup download data from the Data Registry

In [None]:
# @title Data registry functions{ display-mode: "form" }
import requests

DATA_REGISTRY_BASE_URL = "https://data.open-contracting.org/en/"
PUBLICATIONS_URL = f"{DATA_REGISTRY_BASE_URL}publications.json"


def get_publications():
    publications = requests.get(PUBLICATIONS_URL, timeout=10).json()
    for publication in publications:
        publication["label"] = f"{publication['country']} - {publication['title']}"
    return publications


def get_publication_select_box():
    return widgets.Dropdown(
        options=sorted([entry["label"] for entry in get_publications()]),
        description="Publication:",
        disabled=False,
    )


def get_available_years(publication):
    years = ["full"]
    if publication["date_from"] and publication["date_to"]:
        year_from = int(publication["date_from"][:4])
        year_to = int(publication["date_to"][:4])
        years.extend(list(range(year_from, year_to + 1)))
    return years


def get_years_select_box(publication_select_box):
    selected_publication = next(
        entry for entry in get_publications() if entry["label"] == publication_select_box.value
    )
    return (
        widgets.Dropdown(
            options=get_available_years(selected_publication),
            description="Year:",
            disabled=False,
        ),
        selected_publication,
    )


def download_file(selected_publication, selected_year):
    file_name = f"{selected_publication['source_id']}-{selected_year}.jsonl"
    download_url = (
        f'{DATA_REGISTRY_BASE_URL}publication/{selected_publication["id"]}/download?name={selected_year}.jsonl.gz'
    )
    response = requests.get(download_url, timeout=10)
    with tempfile.NamedTemporaryFile() as gz_file:
        gz_file.write(response.content)
        with gzip.open(gz_file.name) as i, Path(file_name).open("wb") as o:
            shutil.copyfileobj(i, o)
    return file_name