# Benchmark Results

For object creation and selecting, using schema objects increases time by 2-3x. This means using object-oriented approach incurs a huge cost over using dictionaries.

In [1]:
import sys
sys.path.append('..')
import doctable
import dataclasses
import functools
import tempfile
import random
import copy
import typing

## Basic Benchmark: Schema Object VS Raw Dictionaries

In [2]:
@doctable.schema
class MyObjSmall:
    __slots__ = []
    id: int = doctable.Col()
    name: str = doctable.Col()

@doctable.schema_experimental
class ExMyObjSmall:
    id: int = doctable.ExpIDCol()
    name: str = doctable.ExpCol()

@doctable.schema
class MyObjBig:
    __slots__ = []
    id: int = doctable.Col()
    name: str = doctable.Col()
    extra1: str = doctable.Col()
    extra2: str = doctable.Col()
    extra3: str = doctable.Col()
    extra4: str = doctable.Col()
    extra5: str = doctable.Col()

In [5]:
def make_test_dt(SchemaClass, in_memory: bool = True, tmpdir: str = None) -> doctable.DocTable:
    dt_small = doctable.DocTable(
        schema=SchemaClass,
        target = ':memory:' if in_memory else f'{tmpdir}/{SchemaClass}_test.db',
        new_db = True,
    )
    return dt_small

def runtest_exp(payload: typing.List[typing.Dict]):
    print(f'=== start EXPERIMENTAL test ================')
    with tempfile.TemporaryDirectory() as tmpdir:
        dt_small = make_test_dt(ExMyObjSmall)
        print(f'construct  objects __________')
        %timeit [ExMyObjSmall(**p) for p in payload]
        %timeit [ExMyObjSmall(_doctable_rowdict=p) for p in payload]
        test_objs = [ExMyObjSmall(**p) for p in payload]
        print(f'insert objects __________')
        %time dt_small.q.insert_multi(test_objs)
        print(f'select objects __________')
        %timeit dt_small.q.select()
        %timeit dt_small.q.select_raw()

def runtest_obj(payload: typing.List[typing.Dict]):
    print(f'=== start OBJECT test ================')
    with tempfile.TemporaryDirectory() as tmpdir:
        dt_small = make_test_dt(MyObjSmall)
        print(f'construct objects __________')
        %timeit [MyObjSmall(**p) for p in payload]
        #%timeit [MyObjSmall(i, f'name_{i}') for i in range(n)]
        test_objs = [MyObjSmall(**p) for p in payload]
        print(f'insert objects __________')
        %time dt_small.q.insert_multi(test_objs)
        print(f'select objects __________')
        %timeit dt_small.q.select()
        %timeit dt_small.q.select_raw()

def runtest_raw(payload: typing.List[typing.Dict]):
    print(f'=== start RAW test: {n=} ================')
    with tempfile.TemporaryDirectory() as tmpdir:
        dt_small = make_test_dt(MyObjSmall)
        %timeit [{'id': i, 'name': f'name_{i}'} for i in range(n)]
        test_objs = [{'id': i, 'name': f'name_{i}'} for i in range(n)]
        print(f'insert objects __________')
        %time dt_small.q.insert_multi_raw(test_objs)
        print(f'select objects __________')
        %timeit dt_small.q.select_raw()


for n in [10000, 100000, 1000000]:
    print(f'================== {n=}=============================')
    payload = [{'id': i, 'name': f'name_{i}'} for i in range(n)]
    runtest_exp(payload)
    runtest_obj(payload)
    runtest_raw(payload)

construct  objects __________
15.2 ms ± 57.7 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
5.06 ms ± 6.39 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
insert objects __________
CPU times: user 29.2 ms, sys: 0 ns, total: 29.2 ms
Wall time: 29.3 ms
select objects __________
48.4 ms ± 49.9 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
7.65 ms ± 7.67 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
construct objects __________
6.34 ms ± 864 ns per loop (mean ± std. dev. of 7 runs, 100 loops each)
insert objects __________
CPU times: user 37.9 ms, sys: 0 ns, total: 37.9 ms
Wall time: 37.9 ms
select objects __________
55.7 ms ± 141 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
7.86 ms ± 3.47 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
2.48 ms ± 739 ns per loop (mean ± std. dev. of 7 runs, 100 loops each)
insert objects __________
CPU times: user 26 ms, sys: 0 ns, total: 26 ms
Wall time: 26 ms
select objects __________
7.8 ms ± 3

In [None]:
def make_test_dt(SchemaClass, tmpdir) -> doctable.DocTable:
    return doctable.DocTable(
        schema=SchemaClass,
        target = f'{tmpdir}/{SchemaClass}_test.db',
        new_db = True,
    )
def row_obj_test(n: int = 1000):
    with tempfile.TemporaryDirectory() as tmpdir:
        tab = make_test_dt(MyObjSmall, tmpdir)
        test_objs = [{'id': i, 'name': f'name_{i}'} for i in range(n)]
        tab.q.insert_multi_raw(test_objs)
        row = tab.q.select_first(raw_result=True)
        print(type(row))
        print(row)
        rowdict = dict(row)
        print(rowdict)
row_obj_test()

<class 'sqlalchemy.engine.row.LegacyRow'>
(0, 'name_0')
{'id': 0, 'name': 'name_0'}


In [None]:
import attrs
import inspect
import dataclasses

#@dataclasses.dataclass
@attrs.define
class TestBase:
    a: int = 5

#print(inspect.signature(TestBase.__init__))

class Test(TestBase):
    def __init__(self, *args, __b: dict = None, **kwargs):
        self.d = dict()
        #print(type(self), self.__class__.__mro__)
        #print(inspect.signature(super(TestBase, self).__init__))
        #print(inspect.signature(TestBase.__init__))
        if __b is None:
            #super(TestBase, self).__init__(*args, **kwargs)
            TestBase.__init__(self, *args, **kwargs)
    
    def __repr__(self):
        #return ', '.join([f'{k}={v}' for k,v in self.d.items()])
        vals = ', '.join([f'{k}={v}' for k,v in self.d.items()])
        return f'{self.__class__.__name__}({vals})'
    
    @property
    def a(self):
        return self.d['a']
    
    @a.setter
    def a(self, val):
        self.d['a'] = val

o = Test(1)
print(o)
print(o.a)



Test(a=1)
1
