In [1]:
import random
import pandas as pd
import numpy as np
import sys
sys.path.append('..')
import doctable as dt

In [2]:
schema = (
    ('integer','id',dict(primary_key=True, autoincrement=True)),
    ('string','name', dict(nullable=False)),
    ('integer','age'),
    ('boolean', 'is_old'),
)
db = dt.DocTable(schema)
print(db)

<DocTable2::_documents_ ct: 0>


In [3]:
for i in range(5):
    age = random.random() # number in [0,1]
    is_old = age > 0.5
    row = {'name':'user_'+str(i), 'age':age, 'is_old':is_old}
    db.insert(row)
print(db)

<DocTable2::_documents_ ct: 5>


In [4]:
for doc in db.select():
    print(doc)

(1, 'user_0', 0.0903606780594799, False)
(2, 'user_1', 0.1936026672500093, False)
(3, 'user_2', 0.7179026106618069, True)
(4, 'user_3', 0.07015109876076597, False)
(5, 'user_4', 0.9055454699822891, True)


The `.bootstrap()` method will return a DocBootstrap object.

In [5]:
bs = db.bootstrap()
type(bs)

doctable.bootstrap.DocBootstrap

There are three ways to use this object: (1) as an iterator directly, (2) using a stateful sample, or (3) using the .sample() to draw new samples.

In [6]:
for doc in db.bootstrap(n=3): # use directly
    print(doc)

(4, 'user_3', 0.07015109876076597, False)
(1, 'user_0', 0.0903606780594799, False)
(5, 'user_4', 0.9055454699822891, True)


In [7]:
bs = db.bootstrap() # use the stateful approach
bs.set_sample(3)
print(bs.ids)
for doc in bs.sample(): # here equivalent to for doc in bs
    print(doc)

[2 1 2]
(3, 'user_2', 0.7179026106618069, True)
(2, 'user_1', 0.1936026672500093, False)
(3, 'user_2', 0.7179026106618069, True)


In [8]:
bs = db.bootstrap()
for doc in bs.sample(3): # non-stateful sample
    print(doc)

(2, 'user_1', 0.1936026672500093, False)
(5, 'user_4', 0.9055454699822891, True)
(5, 'user_4', 0.9055454699822891, True)


In [9]:
bs = db.bootstrap(n=3)
for doc in bs.sample(2): # ignores n=3 to make new one-off bootstrap
    print(doc)
for doc in bs.sample(2):
    print(doc)

(2, 'user_1', 0.1936026672500093, False)
(4, 'user_3', 0.07015109876076597, False)
(3, 'user_2', 0.7179026106618069, True)
(3, 'user_2', 0.7179026106618069, True)


Because args and kwargs passed to .bootstrap are passed directly to .select() (with the exception of n), you can specify any other details. Note that using limit will reduce the number of rows initially queried then draw a bootstrap sample of the requested size from that subset.

In [10]:
for doc in db.bootstrap(['name','is_old'], n=5, limit=2): # use directly
    print(doc)

('user_0', False)
('user_1', False)
('user_1', False)
('user_1', False)
('user_1', False)
