In [1]:
%load_ext autoreload
%autoreload 2

# Proposal: tiny light framework for routing with mappings

In [6]:
from typing import Callable
from dol import wrap_kvs
from tabled import dflt_ext_mapping, get_ext, identity, get_protocol
from graze import graze

def mk_key_func_reader(key: Callable = identity, mapping=None):
    return wrap_kvs(mapping or {}, id_of_key=key)


# Let's make two readers: One that uses the extension as it's key, and the other the protocol of a url

ext = mk_key_func_reader(get_ext, dflt_ext_mapping)

protocol = mk_key_func_reader(get_protocol, {
    'list': lambda x: x.decode().splitlines(), 
    'csv_list': lambda x: map(lambda line: line.split(','), x.decode().splitlines())
})


In [9]:
dflt_ext_mapping

{'xls': functools.partial(<function read_excel at 0x11f48f910>, index=False),
 'xlsx': functools.partial(<function read_excel at 0x11f48f910>, index=False),
 'csv': functools.partial(<function read_csv at 0x11f48e7a0>, index_col=False),
 'tsv': functools.partial(<function read_csv at 0x11f48e7a0>, sep='\t', index_col=False),
 'json': functools.partial(<function read_json at 0x11f865090>, orient='records'),
 'html': functools.partial(<function read_html at 0x11f8335b0>, index_col=False),
 'p': <function _pickle.load(file, *, fix_imports=True, encoding='ASCII', errors='strict', buffers=())>,
 'pickle': <function _pickle.load(file, *, fix_imports=True, encoding='ASCII', errors='strict', buffers=())>}

In [11]:
ext.get('asdf.xls')

functools.partial(<function read_excel at 0x11f48f910>, index=False)

In [47]:
assert 'blah.xls' in ext

But the part that's ugly, and perhaps we should do something about it: There's only a one way key transformation, so anything ending with `.xls` is a key, but not `xls` itself. 
Also, probably worse; you can't use `.items()` or `dict(...)` and such, since this requires two-way BIJECTIVE key transformation.

In [15]:
assert 'xls' not in ext  # because the extension of "xls" is the empty string, which is not in the mapping

# list(ext.items())  # would fail
# dict(ext)  # would fail
# ext.update({'md': lambda x: x.split('|')}) would actually add a key for an empty extension!!!


You can stack mappings this way:

In [30]:
from collections import ChainMap

route = ChainMap(ext, protocol)

print(*route)

http https xls xlsx csv tsv json html p pickle foo


In [40]:
route['http://google.com']

<function graze.base.graze(url: str, rootdir: str = '/Users/thorwhalen/graze', source=<graze.base.Internet object at 0x15f00da20>, max_age: Union[int, float, NoneType] = None)>

In [41]:
route['asdf.xls']

functools.partial(<function read_excel at 0x1321eab90>, index=False)

In [43]:
new_route = route.new_child({'asdf.xls': 'another_resolution'})
new_route['asdf.xls']

'another_resolution'

In [44]:
new_route['blah.xls']

functools.partial(<function read_excel at 0x1321eab90>, index=False)

In [45]:
new_route = route.new_child(
    mk_key_func_reader(get_ext, {'xls': 'my_new_way_of_reading_xls'})
)

In [38]:
from dol import Files, FilesOfZip, Pipe
import pandas as pd
from functools import partial
import io

ext = mk_key_func_reader(
    get_ext, 
    dict(
        dflt_ext_mapping, json=Pipe(io.BytesIO, partial(pd.read_json, orient='index'))
    )
)

table_trans = wrap_kvs(postget=lambda k, v: ext[k](v))

TableFiles = table_trans(Files)
TableZipFiles = table_trans(FilesOfZip)

In [39]:
s = TableZipFiles('/Users/thorwhalen/Dropbox/_odata/sound/induction_motor_data.zip')

In [42]:
len(s)

3909

In [43]:
k, v = s.head()
print(f"{k=}")
v

k='induction_motor_data/Bearing/Bearing_1250rpm/2021_01_27_15_03_06.json'


Unnamed: 0,0
dataType,completeSample
deviceId,00000781O
flux,"[31.3639, 23.8975, 35.5119, 27.0085, 16.6385, ..."
motorId,adi#0c7dbd85-a5e7-4e6a-b37c-a49e1adaca45
tempe,"[25.875, 25.875]"
tempm,"[25.9375, 25.9375]"
tenantId,adi
timestamp,1611756186321
ts,60814
tsr,False
