In [2]:
import json
from pathlib import Path
import requests
import subprocess
import toml

# fetching data
____________

In [75]:
target_url = 'https://odata4.cbs.nl/CBS/83765NED/Observations'

## With Curl

In [76]:
path1 = Path('./temp/temp1.json')
path2 = Path('./temp/temp2.json')

In [77]:
subprocess.run(f"curl -fL {target_url} >> {path1}", shell=True)

CompletedProcess(args='curl -fL https://odata4.cbs.nl/CBS/83765NED/Observations >> temp/temp1.json', returncode=0)

In [78]:
with open(path1, 'r') as f:
    temp1 = json.load(f)

In [79]:
target_url2 = temp1['@odata.nextLink']

In [88]:
target_url2

'https://odata4.cbs.nl/CBS/83765NED/Observations?$skip=100000'

In [80]:
subprocess.run(f"curl -fL {target_url2} >> {path2}", shell=True)

CompletedProcess(args='curl -fL https://odata4.cbs.nl/CBS/83765NED/Observations?$skip=100000 >> temp/temp2.json', returncode=0)

## With Requests

In [83]:
r = requests.get(target_url).json()

In [84]:
r1 = r['value']

In [85]:
target_url2_r = r['@odata.nextLink']

In [87]:
target_url2_r

'https://odata4.cbs.nl/CBS/83765NED/Observations?$skip=100000'

In [92]:
r = requests.get(target_url2_r).json()

In [93]:
r2 = r['value']

## Comparing results

In [90]:
with open(path2, 'r') as f:
    temp2 = json.load(f)

In [91]:
temp1['value'][0]

{'Id': 0,
 'Measure': 'T001036',
 'ValueAttribute': 'None',
 'Value': 17081507.0,
 'WijkenEnBuurten': 'NL00'}

In [28]:
temp2['value'][0]

{'Id': 0,
 'Measure': 'T001036',
 'ValueAttribute': 'None',
 'Value': 17081507.0,
 'WijkenEnBuurten': 'NL00'}

In [94]:
r1[0]

{'Id': 0,
 'Measure': 'T001036',
 'ValueAttribute': 'None',
 'Value': 17081507.0,
 'WijkenEnBuurten': 'NL00'}

In [95]:
r2[0]

{'Id': 112045,
 'Measure': 'A019276',
 'ValueAttribute': 'None',
 'Value': 310.0,
 'WijkenEnBuurten': 'BU03633803'}

_____
# Parsing toml
____

In [1]:
with open ('../pyproject.toml') as f:
    toml_file = toml.loads(f)

NameError: name 'toml' is not defined

_____
# ODATA V3
______

In [38]:
import dask.bag as db

In [17]:
id = "83583NED"

In [18]:
base_url =  f"https://opendata.cbs.nl/ODataFeed/odata/{id}?$format=json"

In [20]:
urls = {
    item["name"]: item["url"]
for item in requests.get(base_url).json()["value"]
}

In [45]:
urls

{'TableInfos': 'https://opendata.cbs.nl/ODataFeed/odata/83583NED/TableInfos',
 'UntypedDataSet': 'https://opendata.cbs.nl/ODataFeed/odata/83583NED/UntypedDataSet',
 'TypedDataSet': 'https://opendata.cbs.nl/ODataFeed/odata/83583NED/TypedDataSet',
 'DataProperties': 'https://opendata.cbs.nl/ODataFeed/odata/83583NED/DataProperties',
 'CategoryGroups': 'https://opendata.cbs.nl/ODataFeed/odata/83583NED/CategoryGroups',
 'BedrijfstakkenBranchesSBI2008': 'https://opendata.cbs.nl/ODataFeed/odata/83583NED/BedrijfstakkenBranchesSBI2008',
 'Bedrijfsgrootte': 'https://opendata.cbs.nl/ODataFeed/odata/83583NED/Bedrijfsgrootte',
 'Perioden': 'https://opendata.cbs.nl/ODataFeed/odata/83583NED/Perioden'}

In [46]:
target_url = urls['TypedDataSet']

In [47]:
target_url = "?".join((target_url, "$format=json"))

In [48]:
r = requests.get(target_url).json()

In [73]:
# if r["value"]:
id

'83765NED'

In [50]:
def get_odata_v3(
    target_url: str,
):  # TODO -> How to define Bag for type hinting? (https://docs.python.org/3/library/typing.html#newtype)
    # TODO -> Change requests to CURL command
    """Gets a table from a specific url for CBS Odata v3.

    Args:
        - url_table_properties (str): url of the table

    Returns:
        - data (Dask bag): all data received from target url as json type, in a Dask bag
    """
    # First call target url and get json formatted response as dict
    r = requests.get(target_url).json()
    # Create Dask bag from dict (check if not empty field)
    if r["value"]:
        bag = db.from_sequence(r["value"])  # TODO -> define npartitions?

    # check if more data exists
    if "@odata.nextLink" in r:
        target_url = r["@odata.nextLink"]
    else:
        target_url = None

    # if more data exists continue to concat bag until complete
    while target_url:
        r = requests.get(target_url).json()
        if r["value"]:
            temp_bag = db.from_sequence(r["value"])
            bag = db.concat([bag, temp_bag])

        if "@odata.nextLink" in r:
            target_url = r["@odata.nextLink"]
        else:
            target_url = None

    return bag

In [51]:
bag = get_odata_v3(target_url)

In [52]:
bag.take(1)

({'ID': 0,
  'BedrijfstakkenBranchesSBI2008': 'T001081',
  'Bedrijfsgrootte': 'T001097',
  'Perioden': '2010JJ00',
  'BanenVanWerknemersInDecember_1': 7888.2},)

In [None]:
r = requests.get()