In [29]:
import random
import pandas as pd
import numpy as np
import sys
sys.path.append('..')
import doctable as dt

In [30]:
schema = (
    ('id','integer',dict(primary_key=True, autoincrement=True)),
    ('name','string', dict(nullable=False)),
    ('age','integer'),
)
db = dt.DocTable2(schema, tabname='mydocuments', verbose=True)
# defaults: #fname=':memory:', engine='sqlite', persistent_conn=True, new_db=True
# fname=':memory:' is special - it loads database into memory
print(db)

DocTable2 Query: SELECT count() AS count_1 
FROM mydocuments
 LIMIT :param_1
<DocTable2::mydocuments ct: 0>


In [31]:
N = 10
for i in range(N):
    db.insert({'name':'user_'+str(i), 'age':random.random()}, verbose=False)
print(db)

DocTable2 Query: SELECT count() AS count_1 
FROM mydocuments
 LIMIT :param_1
<DocTable2::mydocuments ct: 10>


## Regular Selects
These functions all return lists of ResultProxy objects. As such, they can be accessed using numerical indices or keyword indices. For instance, if one select output row is ```row=(1, 'user_0')``` (after selecting "id" and "user"), it can be accessed such that ```row[0]==row['id']``` and ```row[1]==row['user']```.

In [32]:
db.select(limit=2)

DocTable2 Query: SELECT mydocuments.id, mydocuments.name, mydocuments.age 
FROM mydocuments
 LIMIT :param_1


[(1, 'user_0', 0.8781237012539392), (2, 'user_1', 0.6605216075427214)]

In [33]:
db.select([db['id'],db['name']], limit=1)

DocTable2 Query: SELECT mydocuments.id, mydocuments.name 
FROM mydocuments
 LIMIT :param_1


[(1, 'user_0')]

In [34]:
db.select_first()

DocTable2 Query: SELECT mydocuments.id, mydocuments.name, mydocuments.age 
FROM mydocuments
 LIMIT :param_1


(1, 'user_0', 0.8781237012539392)

In [35]:
db.select(db['name'],limit=5)

DocTable2 Query: SELECT mydocuments.name 
FROM mydocuments
 LIMIT :param_1


['user_0', 'user_1', 'user_2', 'user_3', 'user_4']

In [44]:
db.select_first(db['age'])

DocTable2 Query: SELECT mydocuments.age 
FROM mydocuments
 LIMIT :param_1


0.8781237012539392

## Conditional Selects

In [37]:
db.select(where=db['id']==2)

DocTable2 Query: SELECT mydocuments.id, mydocuments.name, mydocuments.age 
FROM mydocuments 
WHERE mydocuments.id = :id_1


[(2, 'user_1', 0.6605216075427214)]

In [38]:
db.select(where=db['id']<3)

DocTable2 Query: SELECT mydocuments.id, mydocuments.name, mydocuments.age 
FROM mydocuments 
WHERE mydocuments.id < :id_1


[(1, 'user_0', 0.8781237012539392), (2, 'user_1', 0.6605216075427214)]

In [39]:
# note parantheses to handle order of ops with overloaded bitwise ops
db.select(where= (db['id']>=2) & (db['id']<=4) & (db['name']!='user_2'))

DocTable2 Query: SELECT mydocuments.id, mydocuments.name, mydocuments.age 
FROM mydocuments 
WHERE mydocuments.id >= :id_1 AND mydocuments.id <= :id_2 AND mydocuments.name != :name_1


[(2, 'user_1', 0.6605216075427214), (4, 'user_3', 0.10187579205918385)]

In [40]:
db.select(where=db['name'].in_(('user_2','user_3')))

DocTable2 Query: SELECT mydocuments.id, mydocuments.name, mydocuments.age 
FROM mydocuments 
WHERE mydocuments.name IN (:name_1, :name_2)


[(3, 'user_2', 0.23484160000276344), (4, 'user_3', 0.10187579205918385)]

In [52]:
db.select(where=db['id'].between_(2,4))

AttributeError: Neither 'Column' object nor 'Comparator' object has an attribute 'between_'

In [45]:
# use of logical not operator "~"
db.select(where= ~(db['name'].in_(('user_2','user_3'))) & (db['id'] < 4))

DocTable2 Query: SELECT mydocuments.id, mydocuments.name, mydocuments.age 
FROM mydocuments 
WHERE mydocuments.name NOT IN (:name_1, :name_2) AND mydocuments.id < :id_1


[(1, 'user_0', 0.8781237012539392), (2, 'user_1', 0.6605216075427214)]

In [47]:
# more verbose operators .and_, .or_, and .not_ are bound to DocTable2
db.select(where= db.or_(db.not_(db['id']==4)) & (db['id'] <= 2))

DocTable2 Query: SELECT mydocuments.id, mydocuments.name, mydocuments.age 
FROM mydocuments 
WHERE mydocuments.id != :id_1 AND mydocuments.id <= :id_2


[(1, 'user_0', 0.8781237012539392), (2, 'user_1', 0.6605216075427214)]

In [51]:
# now with simple computation
ages = db.select(db['age'])
mean_age = sum(ages)/len(ages)
db.select(db['name'], where=db['age']>mean_age, limit=2)

DocTable2 Query: SELECT mydocuments.age 
FROM mydocuments
DocTable2 Query: SELECT mydocuments.name 
FROM mydocuments 
WHERE mydocuments.age > :age_1
 LIMIT :param_1


['user_0', 'user_1']

In [None]:
## Column Operators