In [1]:
import pprint
import pandas as pd

import sys
sys.path.append('../')
import doctable

The iris dataset is simply a list of flowers with information about the sepal, petal, and species.

In [2]:
iris_df = pd.read_csv('https://raw.githubusercontent.com/mwaskom/seaborn-data/master/iris.csv')
iris_df.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [3]:
core = doctable.ConnectCore.open(
    target=':memory:', # use a filename for a sqlite to write to disk
    dialect='sqlite',
    echo=True,
)
core

ConnectCore(target=':memory:', dialect='sqlite', engine=Engine(sqlite:///:memory:), metadata=MetaData())

In [4]:
import datetime

@doctable.table_schema(table_name='iris', slots=True)
class Iris:
    sepal_length: float
    sepal_width: float
    petal_length: float
    petal_width: float
    species: str
    
    id: int = doctable.Column(
        column_args=doctable.ColumnArgs(order=0, primary_key=True, autoincrement=True),
    )
    updated: datetime.datetime = doctable.Column(
        column_args=doctable.ColumnArgs(default=datetime.datetime.utcnow),
    )
    added: datetime.datetime = doctable.Column(
        column_args=doctable.ColumnArgs(
            default=datetime.datetime.utcnow, 
            onupdate=datetime.datetime.utcnow
        )
    )
    
    @classmethod
    def from_row(cls, row: pd.Series):
        return cls(**row)

Iris(sepal_length=1, sepal_width=2, petal_length=3, petal_width=4, species='setosa')

Iris(sepal_length=1, sepal_width=2, petal_length=3, petal_width=4, species='setosa', id=MISSING, updated=MISSING, added=MISSING)

In [5]:
irises = [Iris.from_row(row) for _, row in iris_df.iterrows()]
print(irises[0])

Iris(sepal_length=5.1, sepal_width=3.5, petal_length=1.4, petal_width=0.2, species='setosa', id=MISSING, updated=MISSING, added=MISSING)


In [12]:
with core.begin_ddl() as emitter:
    itab = emitter.create_table_if_not_exists(Iris)
itab.name
#itab.inspect().get_columns()
import sqlalchemy
sqlalchemy.inspect(itab.table)

2023-11-03 11:40:29,761 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2023-11-03 11:40:29,763 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("iris")
2023-11-03 11:40:29,764 INFO sqlalchemy.engine.Engine [raw sql] ()
2023-11-03 11:40:29,767 INFO sqlalchemy.engine.Engine COMMIT


Table('iris', MetaData(), Column('id', Integer(), table=<iris>, primary_key=True), Column('added', DateTime(), table=<iris>, onupdate=CallableColumnDefault(<function datetime.utcnow at 0x7f516f11d8a0>), default=CallableColumnDefault(<function datetime.utcnow at 0x7f516f11d940>)), Column('petal_length', Float(), table=<iris>), Column('petal_width', Float(), table=<iris>), Column('sepal_length', Float(), table=<iris>), Column('sepal_width', Float(), table=<iris>), Column('species', String(), table=<iris>), Column('updated', DateTime(), table=<iris>, default=CallableColumnDefault(<function datetime.utcnow at 0x7f516f11d800>)), schema=None)

## Running Queries

### General Queries

In [None]:
with core.query() as q:
    q.delete(itab, all=True)
    q.insert_single(itab, {
        'sepal_length': 1,'sepal_width': 2,'petal_length': 3,'petal_width': 4,'species': 'setosa'
    })
    print(q.insert_multi.__doc__)
    q.insert_multi(itab, [
        {'sepal_length': 1, 'sepal_width': 2, 'petal_length': 3, 'petal_width': 4, 'species': 'setosa'},
        {'sepal_length': 1, 'sepal_width': 2, 'petal_length': 3, 'petal_width': 4, 'species': 'setosa'},
    ])
    print(q.select(itab).fetchall())

2023-11-03 11:28:48,807 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2023-11-03 11:28:48,808 INFO sqlalchemy.engine.Engine DELETE FROM iris
2023-11-03 11:28:48,809 INFO sqlalchemy.engine.Engine [generated in 0.00141s] ()
2023-11-03 11:28:48,811 INFO sqlalchemy.engine.Engine INSERT OR FAIL INTO iris (added, petal_length, petal_width, sepal_length, sepal_width, species, updated) VALUES (?, ?, ?, ?, ?, ?, ?)
2023-11-03 11:28:48,812 INFO sqlalchemy.engine.Engine [generated in 0.00060s] ('2023-11-03 15:28:48.811654', 3.0, 4.0, 1.0, 2.0, 'setosa', '2023-11-03 15:28:48.811656')
Insert multiple rows into the database using executemany-style 
            parameter binding.
        
2023-11-03 11:28:48,813 INFO sqlalchemy.engine.Engine INSERT OR FAIL INTO iris (added, petal_length, petal_width, sepal_length, sepal_width, species, updated) VALUES (?, ?, ?, ?, ?, ?, ?)
2023-11-03 11:28:48,813 INFO sqlalchemy.engine.Engine [generated in 0.00045s] [('2023-11-03 15:28:48.813420', 3.0, 4.0, 1.0, 2.0

In [None]:
with core.query() as q:
    # use table.cols to specify which columns to select
    columns = itab.cols('sepal_length', 'sepal_width')
    pprint.pprint(q.select(columns).fetchall())
    
    # use subscript to specify table for each column. use for table joins
    columns = [itab['sepal_length'], itab['sepal_width']]
    results = q.select(columns).fetchall()
    pprint.pprint(results)
    
    # use .sum(), .min(), .max(), .count(), .sum(), and .unique() to specify aggregate functions
    columns = [itab['species'].distinct()]
    result = q.select(columns).scalars()
    pprint.pprint(f'{result=}')
    
    # use in conjunction with group_by to specify groupings
    columns = [itab['sepal_length'].sum()]
    result = q.select(columns, group_by=[itab['species']]).scalar_one()
    pprint.pprint(f'{result=}')

2023-11-03 11:28:48,860 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2023-11-03 11:28:48,861 INFO sqlalchemy.engine.Engine SELECT iris.sepal_length, iris.sepal_width 
FROM iris
2023-11-03 11:28:48,862 INFO sqlalchemy.engine.Engine [generated in 0.00175s] ()
[(1.0, 2.0), (1.0, 2.0), (1.0, 2.0)]
2023-11-03 11:28:48,863 INFO sqlalchemy.engine.Engine SELECT iris.sepal_length, iris.sepal_width 
FROM iris
2023-11-03 11:28:48,864 INFO sqlalchemy.engine.Engine [cached since 0.004163s ago] ()
[(1.0, 2.0), (1.0, 2.0), (1.0, 2.0)]
2023-11-03 11:28:48,866 INFO sqlalchemy.engine.Engine SELECT distinct(iris.species) AS distinct_1 
FROM iris
2023-11-03 11:28:48,867 INFO sqlalchemy.engine.Engine [generated in 0.00071s] ()
'result=<sqlalchemy.engine.result.ScalarResult object at 0x7fed4c6d8b90>'
2023-11-03 11:28:48,869 INFO sqlalchemy.engine.Engine SELECT sum(iris.sepal_length) AS sum_1 
FROM iris GROUP BY iris.species
2023-11-03 11:28:48,870 INFO sqlalchemy.engine.Engine [generated in 0.00064s] ()
'

#### Table-specific Queries

In [None]:
with itab.query() as q:
    pprint.pprint(q.select())

2023-11-03 11:28:48,914 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2023-11-03 11:28:48,914 INFO sqlalchemy.engine.Engine SELECT iris.id, iris.added, iris.petal_length, iris.petal_width, iris.sepal_length, iris.sepal_width, iris.species, iris.updated 
FROM iris
2023-11-03 11:28:48,915 INFO sqlalchemy.engine.Engine [cached since 0.09978s ago] ()
[Iris(sepal_length=1.0,
      sepal_width=2.0,
      petal_length=3.0,
      petal_width=4.0,
      species='setosa',
      id=1,
      updated=datetime.datetime(2023, 11, 3, 15, 28, 48, 811656),
      added=datetime.datetime(2023, 11, 3, 15, 28, 48, 811654)),
 Iris(sepal_length=1.0,
      sepal_width=2.0,
      petal_length=3.0,
      petal_width=4.0,
      species='setosa',
      id=2,
      updated=datetime.datetime(2023, 11, 3, 15, 28, 48, 813422),
      added=datetime.datetime(2023, 11, 3, 15, 28, 48, 813420)),
 Iris(sepal_length=1.0,
      sepal_width=2.0,
      petal_length=3.0,
      petal_width=4.0,
      species='setosa',
      id=3

In [None]:
with itab.query() as q:
    q.delete(all=True)
    
    q.insert_multi(irises)
    
    db_irises = q.select()
    print(len(db_irises))
    pprint.pprint(db_irises[:2])

2023-11-03 11:28:48,965 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2023-11-03 11:28:48,966 INFO sqlalchemy.engine.Engine DELETE FROM iris
2023-11-03 11:28:48,966 INFO sqlalchemy.engine.Engine [cached since 0.1589s ago] ()
2023-11-03 11:28:48,973 INFO sqlalchemy.engine.Engine INSERT OR FAIL INTO iris (added, petal_length, petal_width, sepal_length, sepal_width, species, updated) VALUES (?, ?, ?, ?, ?, ?, ?)
2023-11-03 11:28:48,973 INFO sqlalchemy.engine.Engine [cached since 0.1602s ago] [('2023-11-03 15:28:48.971869', 1.4, 0.2, 5.1, 3.5, 'setosa', '2023-11-03 15:28:48.971872'), ('2023-11-03 15:28:48.971872', 1.4, 0.2, 4.9, 3.0, 'setosa', '2023-11-03 15:28:48.971873'), ('2023-11-03 15:28:48.971873', 1.3, 0.2, 4.7, 3.2, 'setosa', '2023-11-03 15:28:48.971874'), ('2023-11-03 15:28:48.971874', 1.5, 0.2, 4.6, 3.1, 'setosa', '2023-11-03 15:28:48.971874'), ('2023-11-03 15:28:48.971875', 1.4, 0.2, 5.0, 3.6, 'setosa', '2023-11-03 15:28:48.971875'), ('2023-11-03 15:28:48.971876', 1.7, 0.4, 5.4

In [None]:
with itab.query() as q:
    db_irises = q.select(['id', 'sepal_width', 'sepal_length'])
    pprint.pprint(db_irises[:2])

2023-11-03 11:29:34,898 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2023-11-03 11:29:34,901 INFO sqlalchemy.engine.Engine SELECT iris.id, iris.sepal_width, iris.sepal_length 
FROM iris
2023-11-03 11:29:34,903 INFO sqlalchemy.engine.Engine [cached since 19.96s ago] ()
[Iris(sepal_length=5.1,
      sepal_width=3.5,
      petal_length=MISSING,
      petal_width=MISSING,
      species=MISSING,
      id=1,
      updated=MISSING,
      added=MISSING),
 Iris(sepal_length=4.9,
      sepal_width=3.0,
      petal_length=MISSING,
      petal_width=MISSING,
      species=MISSING,
      id=2,
      updated=MISSING,
      added=MISSING)]
2023-11-03 11:29:34,909 INFO sqlalchemy.engine.Engine COMMIT


## Working with Multple Tables