In [1]:
import numpy as np
from dataclasses import dataclass, field

import sys
sys.path.append('..')
import doctable

In [None]:
# set up database objects
#folder = '/tmp/devintest'
folder = '/econ/home/d/dc326/research/data/testing/tmp'
tmpf = doctable.TempFolder(folder)

@dataclass
class DataObj(doctable.DocTableSchema):
    id: int = doctable.IDCol()
    data: np.ndarray = doctable.Col(None)

@dataclass
class FileObj(doctable.DocTableSchema):
    id: int = doctable.IDCol()
    data: np.ndarray = doctable.Col(None, coltype='picklefile', type_args=dict(folder=folder))

target = f'{folder}/benchmark_fileobj.db'
ddb = doctable.DocTable(schema=DataObj, target=target, tabname='dataobj', new_db=True)
ddb.delete()

fdb = doctable.DocTable(schema=FileObj, target=target, tabname='fileobj', new_db=True)
fdb.delete() # empty datbases
fdb.clean_col_files('data')
print(fdb, ddb)

In [3]:
def make_payload(wrap_class, sizeGB=0.1, num=10):
    # 5000000000 (5e9) is 45 GB max, ~38GB stable.
    siz = int(1e8 * sizeGB)
    payload = [wrap_class(data=np.ones(siz)) for i in range(num)]
    return payload

def run_test(**payload_args):
    d_payload = make_payload(DataObj, **payload_args)
    f_payload = make_payload(FileObj, **payload_args)
    
    print('=== DB Delete ===')
    %time ddb.delete()
    %time fdb.delete()
    fdb.clean_col_files('data')
    print()
    
    print('=== DB Insert ===')
    %timeit ddb.insert(d_payload)
    %timeit fdb.insert(f_payload)
    print()
    
    print('=== DB Select ===')
    %timeit a = ddb.select()
    %timeit a = fdb.select()
    print()

In [None]:
run_test(sizeGB=0.0001, num=10000)

=== DB Delete ===
CPU times: user 2.93 ms, sys: 683 µs, total: 3.62 ms
Wall time: 29.9 ms
CPU times: user 572 µs, sys: 1.98 ms, total: 2.55 ms
Wall time: 25.8 ms

=== DB Insert ===
4.36 s ± 550 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
1min 36s ± 14.7 s per loop (mean ± std. dev. of 7 runs, 1 loop each)

=== DB Select ===


In [None]:
run_test(sizeGB=0.001, num=1000)

In [None]:
run_test(sizeGB=0.01, num=100)

In [None]:
run_test(sizeGB=0.1, num=10)

In [None]:
run_test(sizeGB=0.5, num=10)

In [None]:
run_test(sizeGB=1, num=5)