In [1]:
import random
import pandas as pd
import numpy as np
import sys
sys.path.append('..')
import doctable as dt

In [2]:
schema = (
    ('id','integer',dict(primary_key=True, autoincrement=True)),
    ('name','string', dict(nullable=False)),
    ('age','integer'),
    ('is_old', 'boolean'),
)
db = dt.DocTable2(schema)
print(db)

<DocTable2::_documents_ ct: 0>


In [3]:
N = 5
for i in range(N):
    age = random.random() # number in [0,1]
    is_old = age > 0.5
    row = {'name':'user_'+str(i), 'age':age, 'is_old':is_old}
    db.insert(row)
print(db)

<DocTable2::_documents_ ct: 5>


In [4]:
for doc in db.select():
    print(doc)

(1, 'user_0', 0.5936955992324471, True)
(2, 'user_1', 0.31053348011189275, False)
(3, 'user_2', 0.040883152659224375, False)
(4, 'user_3', 0.6652588950640632, True)
(5, 'user_4', 0.07145453086837894, False)


The `.get_doc_sample()` method will get a single doc sample.

In [5]:
bs = db.get_bootstrap()
for idx,doc in bs.get_doc_sample(7):
    print(doc)

(3, 'user_2', 0.040883152659224375, False)
(3, 'user_2', 0.040883152659224375, False)
(3, 'user_2', 0.040883152659224375, False)
(3, 'user_2', 0.040883152659224375, False)
(3, 'user_2', 0.040883152659224375, False)
(5, 'user_4', 0.07145453086837894, False)
(1, 'user_0', 0.5936955992324471, True)


Use a combination of `.draw_sample()` and `.get_docs()` to set stateful sample and then retrieve and then draw sample respectively. Can be used when you want to draw docs multiple times from a single sample (i.e. training a stochastically initialized algorithm like Word2Vec or LDA).

In [7]:
bs = db.get_bootstrap(['name','age','is_old'], where=db['is_old'])
bs.draw_sample(3)
for _ in range(2):
    print('pulling docs')
    for idx, doc in bs.get_docs():
        print(doc)

pulling docs
('user_0', 0.5936955992324471, True)
('user_3', 0.6652588950640632, True)
('user_0', 0.5936955992324471, True)
pulling docs
('user_0', 0.5936955992324471, True)
('user_3', 0.6652588950640632, True)
('user_0', 0.5936955992324471, True)
