In [1]:
import requests

from pathlib import Path
import json
import time

from pyarrow import json as pa_json
import pyarrow.parquet
import pandas as pd

import dask.bag as db

In [2]:
def get_table_properties(table_url):
    r = requests.get(table_url +"/Properties").json()
    return r

In [3]:
def get_table_description(table_url):
    r = requests.get(table_url +"/Properties").json()
    return r['Description']

In [4]:
def get_odata_pd(target_url):
    data = pd.DataFrame()
    while target_url:
        r = requests.get(target_url).json()
        data = data.append(pd.DataFrame(r['value']))
        
        if '@odata.nextLink' in r:
            target_url = r['@odata.nextLink']
        else:
            target_url = None
            
    return data

In [5]:
def get_odata_list(target_url):
    data = []
    while target_url:
        r = requests.get(target_url).json()
        data.extend(r['value'])
        
        if '@odata.nextLink' in r:
            target_url = r['@odata.nextLink']
        else:
            target_url = None
            
    return data

In [13]:
def get_odata_bag(target_url):
     # First call target url and get json formatted response as dict
    r = requests.get(target_url).json()
    # Create Dask bag from dict
    bag = db.from_sequence(r['value'])  # TODO -> define npartitions?

    # check if more data exists
    if '@odata.nextLink' in r:
        target_url = r['@odata.nextLink']
    else:
        target_url = None

    # if more data exists continue to concat bag until complete
    while target_url:
        r = requests.get(target_url).json()
        temp_bag = db.from_sequence(r['value'])
        bag = db.concat([bag, temp_bag])

        if '@odata.nextLink' in r:
            target_url = r['@odata.nextLink']
        else:
            target_url = None

    return bag

In [14]:
TEMP = "test-cbs-v4-to-parquet"
TEMP_JSON = Path(TEMP + ".json")
TEMP_NDJSON = Path(TEMP + ".ndjson")
TEMP_PARQUET = Path(TEMP + ".parquet")

TESTDATA = "82807NED"

### Test conversion to parquet

In [15]:
table_url = f"https://odata4.cbs.nl/CBS/{TESTDATA}"
target_url = table_url + "/Observations"
# data_82931 = get_odata(target_url)

In [16]:
urls = {
        item["name"]: item["url"]
        for item in requests.get(table_url).json()["value"]
    }

In [12]:
r = requests.get(table_url).json()

In [32]:
pq_dir = Path("/temp/parquet")

In [41]:
gcs_folder = "/CBS/V4/"

In [42]:
pfile = os.listdir(pq_dir)[0]

In [54]:
from datetime import datetime
datetime.today().date().strftime('%Y%m%d')

'20201111'

In [58]:
datetime.now().strftime('%H:%M:%S')

'12:17:49'

In [61]:
pq_dir = Path(f"../temp/{datetime.today().date().strftime('%Y%m%d')}/parquet")

In [65]:
for pfile in os.listdir(pq_dir):
        # gcs_blob = gcs_bucket.blob(gcs_folder)
        print(f"start GCS upload {pfile} @ {datetime.now().strftime('%H:%M:%S')}")
        with open (pq_dir/pfile, "rb") as file:
            print("ok")

start GCS upload cbs.82807NED_PersoonskenmerkenCodes.parquet @ 12:29:41
ok
start GCS upload cbs.82807NED_PeriodenGroups.parquet @ 12:29:41
ok
start GCS upload cbs.82807NED_PeriodenCodes.parquet @ 12:29:41
ok
start GCS upload cbs.82807NED_BedrijfstakkenBranchesSBI2008Groups.parquet @ 12:29:41
ok
start GCS upload cbs.82807NED_BedrijfstakkenBranchesSBI2008Codes.parquet @ 12:29:41
ok
start GCS upload cbs.82807NED_PersoonskenmerkenGroups.parquet @ 12:29:41
ok
start GCS upload cbs.82807NED_Observations.parquet @ 12:29:41
ok
start GCS upload cbs.82807NED_MeasureCodes.parquet @ 12:29:41
ok
start GCS upload cbs.82807NED_Dimensions.parquet @ 12:29:41
ok
start GCS upload cbs.82807NED_GeslachtCodes.parquet @ 12:29:41
ok


In [70]:
urls = {
    item['name']: table_url + "/" +item['url']
    for item in r['value']
}

In [71]:
urls

{'MeasureCodes': 'https://odata4.cbs.nl/CBS/82807NED/MeasureCodes',
 'Dimensions': 'https://odata4.cbs.nl/CBS/82807NED/Dimensions',
 'PeriodenGroups': 'https://odata4.cbs.nl/CBS/82807NED/PeriodenGroups',
 'PeriodenCodes': 'https://odata4.cbs.nl/CBS/82807NED/PeriodenCodes',
 'BedrijfstakkenBranchesSBI2008Groups': 'https://odata4.cbs.nl/CBS/82807NED/BedrijfstakkenBranchesSBI2008Groups',
 'BedrijfstakkenBranchesSBI2008Codes': 'https://odata4.cbs.nl/CBS/82807NED/BedrijfstakkenBranchesSBI2008Codes',
 'PersoonskenmerkenGroups': 'https://odata4.cbs.nl/CBS/82807NED/PersoonskenmerkenGroups',
 'PersoonskenmerkenCodes': 'https://odata4.cbs.nl/CBS/82807NED/PersoonskenmerkenCodes',
 'GeslachtCodes': 'https://odata4.cbs.nl/CBS/82807NED/GeslachtCodes',
 'Observations': 'https://odata4.cbs.nl/CBS/82807NED/Observations',
 'Properties': 'https://odata4.cbs.nl/CBS/82807NED/Properties'}

### Take a look at the Properties table (singleton?)

In [12]:
# Dump to ndjson file
with open (TEMP_NDJSON, 'w+') as f:
    for record in r['value']:
        f.write(json.dumps(record) + "\n")

In [72]:
r = requests.get(table_url + "/Properties")

In [74]:
properties = r.json()

In [75]:
properties.keys()

dict_keys(['@odata.context', 'Identifier', 'Title', 'Description', 'Language', 'Authority', 'Modified', 'TemporalCoverage', 'Catalog', 'Publisher', 'ContactPoint', 'Version', 'VersionNotes', 'VersionReason', 'Frequency', 'Status', 'ObservationCount', 'ObservationsModified', 'DatasetType', 'DefaultPresentation', 'GraphTypes', 'SearchPriority', 'License', 'Source', 'Summary', 'LongDescription', 'Provenance', 'RelatedSources'])

In [16]:
# Create PyArrow tablef from ndjson file
table = pa_json.read_json(TEMP_NDJSON)

In [17]:
table

pyarrow.Table
Id: int64
Measure: string
ValueAttribute: string
Value: double
Perioden: string
BedrijfstakkenBranchesSBI2008: string
Persoonskenmerken: string
Geslacht: string

In [18]:
# Create Parquet file
pyarrow.parquet.write_table(table, TEMP_PARQUET)

### Explore CBS Odata V4

In [62]:
# List all tables under a dataset ID
meta = get_odata_pd(table_url)
meta

Unnamed: 0,name,kind,url
0,MeasureCodes,EntitySet,MeasureCodes
1,Dimensions,EntitySet,Dimensions
2,PeriodenGroups,EntitySet,PeriodenGroups
3,PeriodenCodes,EntitySet,PeriodenCodes
4,BedrijfstakkenBranchesSBI2008Groups,EntitySet,BedrijfstakkenBranchesSBI2008Groups
5,BedrijfstakkenBranchesSBI2008Codes,EntitySet,BedrijfstakkenBranchesSBI2008Codes
6,PersoonskenmerkenGroups,EntitySet,PersoonskenmerkenGroups
7,PersoonskenmerkenCodes,EntitySet,PersoonskenmerkenCodes
8,GeslachtCodes,EntitySet,GeslachtCodes
9,Observations,EntitySet,Observations


In [47]:
# Get all metadata table names (=url extensions).
# Exclude Observations as it is the actual data and retrieved separately
# Also include Properties as it is not relevant
meta_names = [url for url in meta.url.tolist() if url not in ['Observations', 'Properties']]
meta_names

['MeasureCodes',
 'Dimensions',
 'PeriodenGroups',
 'PeriodenCodes',
 'BedrijfstakkenBranchesSBI2008Groups',
 'BedrijfstakkenBranchesSBI2008Codes',
 'PersoonskenmerkenGroups',
 'PersoonskenmerkenCodes',
 'GeslachtCodes']

In [50]:
# Convert all metadata names to actual urls for table retrieval
meta_urls = [table_url + "/" + name for name in meta_names]
meta_urls

['https://odata4.cbs.nl/CBS/82807NED/MeasureCodes',
 'https://odata4.cbs.nl/CBS/82807NED/Dimensions',
 'https://odata4.cbs.nl/CBS/82807NED/PeriodenGroups',
 'https://odata4.cbs.nl/CBS/82807NED/PeriodenCodes',
 'https://odata4.cbs.nl/CBS/82807NED/BedrijfstakkenBranchesSBI2008Groups',
 'https://odata4.cbs.nl/CBS/82807NED/BedrijfstakkenBranchesSBI2008Codes',
 'https://odata4.cbs.nl/CBS/82807NED/PersoonskenmerkenGroups',
 'https://odata4.cbs.nl/CBS/82807NED/PersoonskenmerkenCodes',
 'https://odata4.cbs.nl/CBS/82807NED/GeslachtCodes']

In [51]:
# Get observations data, meaning the actual values of the dataset
observations = get_odata_pd(target_url)
observations

Unnamed: 0,Id,Measure,ValueAttribute,Value,Perioden,BedrijfstakkenBranchesSBI2008,Persoonskenmerken,Geslacht
0,0,3000795_1,,7771.0,2003KW01,T001081,T009002,T001038
1,1,3000795_1,,7782.0,2003KW02,T001081,T009002,T001038
2,2,3000795_1,,7809.0,2003KW03,T001081,T009002,T001038
3,3,3000795_1,,7770.0,2003KW04,T001081,T009002,T001038
4,4,3000795_1,,7783.0,2003JJ00,T001081,T009002,T001038
...,...,...,...,...,...,...,...,...
81765,496075,3000795_1,,0.0,2018KW01,999999,2021440,4000
81766,496076,3000795_1,,0.0,2018KW02,999999,2021440,4000
81767,496077,3000795_1,,0.0,2018KW03,999999,2021440,4000
81768,496078,3000795_1,,0.0,2018KW04,999999,2021440,4000


In [53]:
meta_tables = [get_odata_pd(url) for url in meta_urls]

In [59]:
meta_tables[3]

Unnamed: 0,Identifier,Index,Title,Description,DimensionGroupId,Status
0,2003KW01,1,2003 1e kwartaal,,0,Definitief
1,2003KW02,2,2003 2e kwartaal,,0,Definitief
2,2003KW03,3,2003 3e kwartaal,,0,Definitief
3,2003KW04,4,2003 4e kwartaal,,0,Definitief
4,2003JJ00,5,2003,,1,Definitief
...,...,...,...,...,...,...
75,2018KW01,76,2018 1e kwartaal,,0,Definitief
76,2018KW02,77,2018 2e kwartaal,,0,Definitief
77,2018KW03,78,2018 3e kwartaal,,0,Definitief
78,2018KW04,79,2018 4e kwartaal,,0,Definitief


In [58]:
meta_tables[2]

Unnamed: 0,Id,Index,Title,Description,ParentId
0,1,0,Jaren,,
1,0,2,Kwartalen,,


In [26]:
# list all dataset dimensions
dimensions = get_odata_pd(table_url + "/Dimensions")
dimensions

Unnamed: 0,Identifier,Title,Description,Kind,MapYear,ReleasePolicy
0,Perioden,Perioden,,TimeDimension,,True
1,BedrijfstakkenBranchesSBI2008,Bedrijfstakken/branches SBI 2008,,Dimension,,
2,Persoonskenmerken,Persoonskenmerken,,Dimension,,
3,Geslacht,Geslacht,,Dimension,,


In [None]:
[for dim in dimensions]

In [30]:
# get Measure Codes
codes = get_odata_pd(table_url + "/MeasureCodes")
codes

Unnamed: 0,Identifier,Index,Title,Description,MeasureGroupId,DataType,Unit,Format,Decimals,PresentationType
0,3000795_1,4,Werkzame beroepsbevolking,Personen die betaald werk hebben.\r\n\r\nDeze ...,,Integer,x 1 000,,0,


In [35]:
codes.Description

0    Personen die betaald werk hebben.\r\n\r\nDeze ...
Name: Description, dtype: object

In [17]:
cbs_catalogs = get_odata_pd("https://odata4.cbs.nl/CBS/Catalogs")
cbs_datasets = get_odata_pd("https://odata4.cbs.nl/CBS/Datasets")

In [20]:
cbs_datasets

Unnamed: 0,Identifier,Description,Language,Title,Modified,Catalog,Version,VersionNotes,VersionReason,Status,ObservationsModified,ObservationCount,DatasetType
0,80784ned,\nDeze tabel bevat gegevens op regioniveau ove...,nl,Landbouw; arbeidskrachten naar regio,2020-03-03T02:00:00+01:00,CBS,202003030200,,Actualisering,Regulier,2020-03-03T02:00:00+01:00,173148,Numeric
1,81075ned,\nDeze tabel gaat over de werkloosheidsduur va...,nl,Werkloze beroepsbevolking; werkloosheidsduur e...,2020-01-23T02:00:00+01:00,CBS,202001230200,,Stopgezet,Gediscontinueerd,2019-11-14T02:00:00+01:00,2432,Numeric
2,81575NED,\nDeze tabel bevat gegevens over het aantal ve...,nl,"Vestigingen van bedrijven; bedrijfstak, gemeente",2020-04-08T02:00:00+02:00,CBS,202004080200,,Actualisering,Regulier,2020-04-08T02:00:00+02:00,142800,Numeric
3,82245NED,\nDeze tabel bevat cijfers over de Nederlandse...,nl,Bevolking en huishoudens; viercijferige postco...,2019-08-23T02:00:00+02:00,CBS,201908230200,,Stopgezet,Gediscontinueerd,2013-11-15T02:00:00+01:00,326174,Numeric
4,82807NED,\nDeze tabel bevat kwartaal- en jaarcijfers ov...,nl,Werkzame beroepsbevolking; bedrijf,2019-11-29T02:00:00+01:00,CBS,201911290200,De cijfers over het onderwijsniveau in deze ta...,Correctie,Regulier,2019-11-29T02:00:00+01:00,481770,Numeric
5,82931NED,\nOverzicht van statistische gegevens van geme...,nl,Kerncijfers wijken en buurten 2014,2018-07-27T02:00:00+02:00,CBS,201807270200,,Verbetering,Regulier,2018-07-27T02:00:00+02:00,1348572,Numeric
6,83220NED,\nOverzicht van statistische gegevens van geme...,nl,Kerncijfers wijken en buurten 2015,2018-07-27T02:00:00+02:00,CBS,201807270200,,Verbetering,Regulier,2018-07-27T02:00:00+02:00,1401105,Numeric
7,83433NED,\nDeze tabel bevat cijfers over het prijsverlo...,nl,"Consumentenprijzen; werknemers laag, alle basi...",2016-04-12T02:00:00+02:00,CBS,201604120200,,Nieuw,Gediscontinueerd,2016-04-12T02:00:00+02:00,2580,Numeric
8,83435NED,\nDeze tabel bevat cijfers over het aantal pas...,nl,"Amsterdam Airport Schiphol; passagiersvervoer,...",2020-02-14T02:00:00+01:00,CBS,202002140200,,Actualisering,Regulier,2020-02-14T02:00:00+01:00,28742,Numeric
9,83487NED,\nOverzicht van statistische gegevens van geme...,nl,Kerncijfers wijken en buurten 2016,2019-02-01T02:00:00+01:00,CBS,201902010200,,Actualisering bijzonder,Regulier,2019-02-01T02:00:00+01:00,1511607,Numeric


In [18]:
ids = cbs_datasets.Identifier.tolist()

In [19]:
all_table_urls = ["https://odata4.cbs.nl/CBS/"+id for id in ids]

_________
## Check if table from parquet file is the same as table from cbs

In [79]:
from_pq.shape, from_cbs.shape

((481770, 8), (481770, 8))

In [83]:
from_cbs.head()

Unnamed: 0,Id,Measure,ValueAttribute,Value,Perioden,BedrijfstakkenBranchesSBI2008,Persoonskenmerken,Geslacht
0,0,3000795_1,,7771.0,2003KW01,T001081,T009002,T001038
1,1,3000795_1,,7782.0,2003KW02,T001081,T009002,T001038
2,2,3000795_1,,7809.0,2003KW03,T001081,T009002,T001038
3,3,3000795_1,,7770.0,2003KW04,T001081,T009002,T001038
4,4,3000795_1,,7783.0,2003JJ00,T001081,T009002,T001038


In [84]:
from_pq.head()

Unnamed: 0,Id,Measure,ValueAttribute,Value,Perioden,BedrijfstakkenBranchesSBI2008,Persoonskenmerken,Geslacht
0,0,3000795_1,,7771.0,2003KW01,T001081,T009002,T001038
1,1,3000795_1,,7782.0,2003KW02,T001081,T009002,T001038
2,2,3000795_1,,7809.0,2003KW03,T001081,T009002,T001038
3,3,3000795_1,,7770.0,2003KW04,T001081,T009002,T001038
4,4,3000795_1,,7783.0,2003JJ00,T001081,T009002,T001038


In [87]:
from_pq.eq(from_cbs)

Unnamed: 0,Id,Measure,ValueAttribute,Value,Perioden,BedrijfstakkenBranchesSBI2008,Persoonskenmerken,Geslacht
0,True,True,True,True,True,True,True,True
0,False,True,True,False,True,False,False,True
0,False,True,True,False,False,False,False,False
0,False,True,True,False,False,True,False,False
0,False,True,True,False,False,False,False,False
...,...,...,...,...,...,...,...,...
481765,False,False,False,False,False,False,False,False
481766,False,False,False,False,False,False,False,False
481767,False,False,False,False,False,False,False,False
481768,False,False,False,False,False,False,False,False


### Check if dataframe loaded from Parquet file written after Dask Bag implementation is same as original
______

In [96]:
table = bag
file_name = 'test'
out_dir = Path('../temp/test/')

In [98]:
# File path to dump table as ndjson
ndjson_path = Path(f"{temp_ndjson_dir}/{file_name}.ndjson")
# File path to create as parquet file
pq_path = Path(f"{out_dir}/{file_name}.parquet")

In [97]:
# create directories to store files
out_dir = Path(out_dir)
temp_ndjson_dir = Path("./temp/ndjson")
create_dir(temp_ndjson_dir)
create_dir(out_dir)

PosixPath('../temp/test')

In [99]:
# Dump as ndjson format  # TODO -> how to lazily iterage over a Bag to generate a single .ndjson file
with open(ndjson_path, 'w+') as ndjson:
    for record in table:
        print(record)
        print(type(record))
        break
        # ndjson.write(json.dumps(record) + "\n")

{'Id': 0, 'Measure': '3000795_1', 'ValueAttribute': 'None', 'Value': 7771.0, 'Perioden': '2003KW01', 'BedrijfstakkenBranchesSBI2008': 'T001081', 'Persoonskenmerken': 'T009002', 'Geslacht': 'T001038'}
<class 'dict'>


In [104]:
delayed = table.to_delayed()

In [108]:
delayed[1], delayed[0]

(Delayed(('from_sequence-e777cdd1a8a10054bfb99adce15d8dbf', 1)),
 Delayed(('from_sequence-e777cdd1a8a10054bfb99adce15d8dbf', 0)))

In [112]:
table.map(json.dumps).to_textfiles(temp_ndjson_dir/"*.json")

['/Users/tslilstrauss/Projects/nimbletl/nimbletl/temp/ndjson/00.json',
 '/Users/tslilstrauss/Projects/nimbletl/nimbletl/temp/ndjson/01.json',
 '/Users/tslilstrauss/Projects/nimbletl/nimbletl/temp/ndjson/02.json',
 '/Users/tslilstrauss/Projects/nimbletl/nimbletl/temp/ndjson/03.json',
 '/Users/tslilstrauss/Projects/nimbletl/nimbletl/temp/ndjson/04.json',
 '/Users/tslilstrauss/Projects/nimbletl/nimbletl/temp/ndjson/05.json',
 '/Users/tslilstrauss/Projects/nimbletl/nimbletl/temp/ndjson/06.json',
 '/Users/tslilstrauss/Projects/nimbletl/nimbletl/temp/ndjson/07.json',
 '/Users/tslilstrauss/Projects/nimbletl/nimbletl/temp/ndjson/08.json',
 '/Users/tslilstrauss/Projects/nimbletl/nimbletl/temp/ndjson/09.json',
 '/Users/tslilstrauss/Projects/nimbletl/nimbletl/temp/ndjson/10.json',
 '/Users/tslilstrauss/Projects/nimbletl/nimbletl/temp/ndjson/11.json',
 '/Users/tslilstrauss/Projects/nimbletl/nimbletl/temp/ndjson/12.json',
 '/Users/tslilstrauss/Projects/nimbletl/nimbletl/temp/ndjson/13.json',
 '/Use

In [144]:
filenames = sorted(glob(str(temp_ndjson_dir)+"/*.json"))

In [140]:
from glob import glob
filenames = sorted(glob(str(temp_ndjson_dir)+"/*.json"))
with open(temp_ndjson_dir/'outfile.json', 'w') as out:
    for fn in filenames:
        with open(fn) as f:
            out.write(f.read())

In [146]:
target_url

'https://odata4.cbs.nl/CBS/82807NED/Observations'

In [147]:
df_from_cbs = get_odata_pd(target_url)

In [149]:
df_from_parquet = pd.read_parquet("/Users/tslilstrauss/Projects/nimbletl/temp/20201111/parquet/cbs.82807NED_Observations.parquet")

In [148]:
df_from_cbs.head()

Unnamed: 0,Id,Measure,ValueAttribute,Value,Perioden,BedrijfstakkenBranchesSBI2008,Persoonskenmerken,Geslacht
0,0,3000795_1,,7771.0,2003KW01,T001081,T009002,T001038
1,1,3000795_1,,7782.0,2003KW02,T001081,T009002,T001038
2,2,3000795_1,,7809.0,2003KW03,T001081,T009002,T001038
3,3,3000795_1,,7770.0,2003KW04,T001081,T009002,T001038
4,4,3000795_1,,7783.0,2003JJ00,T001081,T009002,T001038


In [151]:
df_from_parquet.describe()

Unnamed: 0,Id,Value
count,481770.0,481770.0
mean,246339.821782,89.966609
std,143092.089111,317.879557
min,0.0,0.0
25%,120442.25,1.0
50%,245654.5,9.0
75%,370866.75,50.0
max,496079.0,8862.0


In [153]:
df_from_cbs.eq(df_from_parquet)

Unnamed: 0,Id,Measure,ValueAttribute,Value,Perioden,BedrijfstakkenBranchesSBI2008,Persoonskenmerken,Geslacht
0,True,True,True,True,True,True,True,True
0,False,True,True,False,True,False,False,True
0,False,True,True,False,False,False,False,False
0,False,True,True,False,False,True,False,False
0,False,True,True,False,False,False,False,False
...,...,...,...,...,...,...,...,...
481765,False,False,False,False,False,False,False,False
481766,False,False,False,False,False,False,False,False
481767,False,False,False,False,False,False,False,False
481768,False,False,False,False,False,False,False,False


In [154]:
df_from_parquet.dtypes

Id                                 int64
Measure                           object
ValueAttribute                    object
Value                            float64
Perioden                          object
BedrijfstakkenBranchesSBI2008     object
Persoonskenmerken                 object
Geslacht                          object
dtype: object

In [None]:
df_from_parquet.head()

## Check if /n is added when parsin bag to json file

In [17]:
url = table_url+"/Dimensions"

In [23]:
bag = get_odata_bag(target_url)

In [25]:
bag.map(json.dumps).to_textfiles("./temp/*.json")

ss/Projects/nimbletl/nimbletl/temp/193.json',
 '/Users/tslilstrauss/Projects/nimbletl/nimbletl/temp/194.json',
 '/Users/tslilstrauss/Projects/nimbletl/nimbletl/temp/195.json',
 '/Users/tslilstrauss/Projects/nimbletl/nimbletl/temp/196.json',
 '/Users/tslilstrauss/Projects/nimbletl/nimbletl/temp/197.json',
 '/Users/tslilstrauss/Projects/nimbletl/nimbletl/temp/198.json',
 '/Users/tslilstrauss/Projects/nimbletl/nimbletl/temp/199.json',
 '/Users/tslilstrauss/Projects/nimbletl/nimbletl/temp/200.json',
 '/Users/tslilstrauss/Projects/nimbletl/nimbletl/temp/201.json',
 '/Users/tslilstrauss/Projects/nimbletl/nimbletl/temp/202.json',
 '/Users/tslilstrauss/Projects/nimbletl/nimbletl/temp/203.json',
 '/Users/tslilstrauss/Projects/nimbletl/nimbletl/temp/204.json',
 '/Users/tslilstrauss/Projects/nimbletl/nimbletl/temp/205.json',
 '/Users/tslilstrauss/Projects/nimbletl/nimbletl/temp/206.json',
 '/Users/tslilstrauss/Projects/nimbletl/nimbletl/temp/207.json',
 '/Users/tslilstrauss/Projects/nimbletl/nimb