# Benchmark Results

For object creation and selecting, using schema objects increases time by 2-3x. This means using object-oriented approach incurs a huge cost over using dictionaries.

In [1]:
import sys
sys.path.append('..')
import doctable
import dataclasses
import functools
import tempfile
import random
import copy

## Basic Benchmark: Schema Object VS Raw Dictionaries

In [2]:
@doctable.schema
class MyObjSmall:
    __slots__ = []
    id: int = doctable.Col()
    name: str = doctable.Col()

@doctable.schema_experimental
class ExMyObjSmall:
    id: int = doctable.ExpIDCol()
    name: str = doctable.ExpCol()

@doctable.schema
class MyObjBig:
    __slots__ = []
    id: int = doctable.Col()
    name: str = doctable.Col()
    extra1: str = doctable.Col()
    extra2: str = doctable.Col()
    extra3: str = doctable.Col()
    extra4: str = doctable.Col()
    extra5: str = doctable.Col()

In [3]:

def make_test_dt(SchemaClass, tmpdir) -> doctable.DocTable:
    dt_small = doctable.DocTable(
        schema=SchemaClass,
        target = f'{tmpdir}/{SchemaClass}_test.db',
        new_db = True,
    )
    return dt_small

def runtest_exp(n: int = 100):
    print(f'=== start EXPERIMENTAL test: {n=} ================')
    with tempfile.TemporaryDirectory() as tmpdir:
        dt_small = make_test_dt(ExMyObjSmall, tmpdir)
        print(f'construct {n} objects')
        %timeit [ExMyObjSmall.from_dict({'id': i, 'name': f'name_{i}'}) for i in range(n)]
        test_objs = [ExMyObjSmall(i, f'name_{i}') for i in range(n)]
        print(f'insert objects')
        %time dt_small.q.insert_multi(test_objs)
        print(f'select objects')
        %timeit dt_small.q.select()

def runtest_obj(n: int = 100):
    print(f'=== start OBJECT test: {n=} ================')
    with tempfile.TemporaryDirectory() as tmpdir:
        dt_small = make_test_dt(MyObjSmall, tmpdir)
        print(f'construct {n} objects')
        %timeit [MyObjSmall(i, f'name_{i}') for i in range(n)]
        test_objs = [MyObjSmall(i, f'name_{i}') for i in range(n)]
        print(f'insert objects')
        %time dt_small.q.insert_multi(test_objs)
        print(f'select objects')
        %timeit dt_small.q.select()

def runtest_raw(n: int = 100):
    print(f'=== start RAW test: {n=} ================')
    with tempfile.TemporaryDirectory() as tmpdir:
        dt_small = make_test_dt(MyObjSmall, tmpdir)
        print(f'construct {n} objects')
        %timeit [{'id': i, 'name': f'name_{i}'} for i in range(n)]
        test_objs = [{'id': i, 'name': f'name_{i}'} for i in range(n)]
        print(f'insert objects')
        %time dt_small.q.insert_multi_raw(test_objs)
        print(f'select objects')
        %timeit dt_small.q.select_raw()


for n in [100, 1000, 10000, 100000, 1000000]:
    print(f'================== {n=}=============================')
    runtest_exp(n)
    runtest_obj(n)
    runtest_raw(n)

construct 100 objects
232 µs ± 11.1 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
insert objects
CPU times: user 3.11 ms, sys: 0 ns, total: 3.11 ms
Wall time: 10.1 ms
select objects
1.55 ms ± 50.6 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
construct 100 objects
118 µs ± 5.5 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
insert objects
CPU times: user 2.7 ms, sys: 0 ns, total: 2.7 ms
Wall time: 9.5 ms
select objects
1.72 ms ± 29.7 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
construct 100 objects
39.7 µs ± 433 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
insert objects
CPU times: user 2.94 ms, sys: 0 ns, total: 2.94 ms
Wall time: 12.4 ms
select objects
622 µs ± 5.8 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
construct 1000 objects
2.17 ms ± 517 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
insert objects
CPU times: user 10.1 ms, sys: 88 µs, total: 10.2 ms
Wall time: 17.9 ms
select objects
10.7 ms 

RowToObjectConversionFailedError: Conversion from <class 'sqlalchemy.engine.row.LegacyRow'> to <class '__main__.MyObjSmall'> failed. This may be caused by inserting objects of innapropriate type into the db.

In [None]:
def make_test_dt(SchemaClass, tmpdir) -> doctable.DocTable:
    return doctable.DocTable(
        schema=SchemaClass,
        target = f'{tmpdir}/{SchemaClass}_test.db',
        new_db = True,
    )
def row_obj_test(n: int = 1000):
    with tempfile.TemporaryDirectory() as tmpdir:
        tab = make_test_dt(MyObjSmall, tmpdir)
        test_objs = [{'id': i, 'name': f'name_{i}'} for i in range(n)]
        tab.q.insert_multi_raw(test_objs)
        row = tab.q.select_first(raw_result=True)
        print(type(row))
        print(row)
        rowdict = dict(row)
        print(rowdict)
row_obj_test()

In [None]:
import attrs
import inspect
import dataclasses

#@dataclasses.dataclass
@attrs.define
class TestBase:
    a: int = 5

#print(inspect.signature(TestBase.__init__))

class Test(TestBase):
    def __init__(self, *args, __b: dict = None, **kwargs):
        self.d = dict()
        #print(type(self), self.__class__.__mro__)
        #print(inspect.signature(super(TestBase, self).__init__))
        #print(inspect.signature(TestBase.__init__))
        if __b is None:
            #super(TestBase, self).__init__(*args, **kwargs)
            TestBase.__init__(self, *args, **kwargs)
    
    def __repr__(self):
        #return ', '.join([f'{k}={v}' for k,v in self.d.items()])
        vals = ', '.join([f'{k}={v}' for k,v in self.d.items()])
        return f'{self.__class__.__name__}({vals})'
    
    @property
    def a(self):
        return self.d['a']
    
    @a.setter
    def a(self, val):
        self.d['a'] = val

o = Test(1)
print(o)
print(o.a)

