In [1]:
from __future__ import annotations
import pprint
import pandas as pd

import sys
sys.path.append('../')
import doctable

The iris dataset is simply a list of flowers with information about the sepal, petal, and species.

In [2]:
iris_df = pd.read_csv('https://raw.githubusercontent.com/mwaskom/seaborn-data/master/iris.csv')
iris_df.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [3]:
core = doctable.ConnectCore.open(
    target=':memory:', # use a filename for a sqlite to write to disk
    dialect='sqlite',
    echo=True,
)
core

ConnectCore(target=':memory:', dialect='sqlite', engine=Engine(sqlite:///:memory:), metadata=MetaData())

In [4]:
import datetime

@doctable.table_schema(table_name='iris', slots=True)
class Iris:
    sepal_length: float
    sepal_width: float
    petal_length: float
    petal_width: float
    species: str
    
    id: int = doctable.Column(
        column_args=doctable.ColumnArgs(order=0, primary_key=True, autoincrement=True),
    )
    updated: datetime.datetime = doctable.Column(
        column_args=doctable.ColumnArgs(default=datetime.datetime.utcnow),
    )
    added: datetime.datetime = doctable.Column(
        column_args=doctable.ColumnArgs(
            default=datetime.datetime.utcnow, 
            onupdate=datetime.datetime.utcnow
        )
    )
    
    @classmethod
    def from_row(cls, row: pd.Series):
        return cls(**row)

Iris(sepal_length=1, sepal_width=2, petal_length=3, petal_width=4, species='setosa')

Iris(sepal_length=1, sepal_width=2, petal_length=3, petal_width=4, species='setosa', id=MISSING, updated=MISSING, added=MISSING)

In [5]:
irises = [Iris.from_row(row) for _, row in iris_df.iterrows()]
print(irises[0])

Iris(sepal_length=5.1, sepal_width=3.5, petal_length=1.4, petal_width=0.2, species='setosa', id=MISSING, updated=MISSING, added=MISSING)


In [6]:
with core.begin_ddl() as emitter:
    itab = emitter.create_table_if_not_exists(Iris)
itab.name
#itab.inspect().get_columns()
import sqlalchemy
itab.inspect_columns()

2023-11-05 10:57:30,496 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2023-11-05 10:57:30,497 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("iris")
2023-11-05 10:57:30,498 INFO sqlalchemy.engine.Engine [raw sql] ()
2023-11-05 10:57:30,499 INFO sqlalchemy.engine.Engine PRAGMA temp.table_info("iris")
2023-11-05 10:57:30,500 INFO sqlalchemy.engine.Engine [raw sql] ()
2023-11-05 10:57:30,502 INFO sqlalchemy.engine.Engine 
CREATE TABLE iris (
	id INTEGER, 
	added DATETIME, 
	petal_length FLOAT, 
	petal_width FLOAT, 
	sepal_length FLOAT, 
	sepal_width FLOAT, 
	species VARCHAR, 
	updated DATETIME, 
	PRIMARY KEY (id)
)


2023-11-05 10:57:30,503 INFO sqlalchemy.engine.Engine [no key 0.00084s] ()
2023-11-05 10:57:30,504 INFO sqlalchemy.engine.Engine COMMIT
2023-11-05 10:57:30,505 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2023-11-05 10:57:30,505 INFO sqlalchemy.engine.Engine PRAGMA main.table_xinfo("iris")
2023-11-05 10:57:30,506 INFO sqlalchemy.engine.Engine [raw sql] ()
2023-11-05 

[{'name': 'id',
  'type': INTEGER(),
  'nullable': True,
  'default': None,
  'primary_key': 1},
 {'name': 'added',
  'type': DATETIME(),
  'nullable': True,
  'default': None,
  'primary_key': 0},
 {'name': 'petal_length',
  'type': FLOAT(),
  'nullable': True,
  'default': None,
  'primary_key': 0},
 {'name': 'petal_width',
  'type': FLOAT(),
  'nullable': True,
  'default': None,
  'primary_key': 0},
 {'name': 'sepal_length',
  'type': FLOAT(),
  'nullable': True,
  'default': None,
  'primary_key': 0},
 {'name': 'sepal_width',
  'type': FLOAT(),
  'nullable': True,
  'default': None,
  'primary_key': 0},
 {'name': 'species',
  'type': VARCHAR(),
  'nullable': True,
  'default': None,
  'primary_key': 0},
 {'name': 'updated',
  'type': DATETIME(),
  'nullable': True,
  'default': None,
  'primary_key': 0}]

## Running Queries

### General Queries

In [7]:
with core.query() as q:
    q.delete(itab, all=True)
    q.insert_single(itab, {
        'sepal_length': 1,'sepal_width': 2,'petal_length': 3,'petal_width': 4,'species': 'setosa'
    })
    print(q.insert_multi.__doc__)
    q.insert_multi(itab, [
        {'sepal_length': 1, 'sepal_width': 2, 'petal_length': 3, 'petal_width': 4, 'species': 'setosa'},
        {'sepal_length': 1, 'sepal_width': 2, 'petal_length': 3, 'petal_width': 4, 'species': 'setosa'},
    ])
    print(q.select(itab).fetchall())

2023-11-05 10:57:30,553 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2023-11-05 10:57:30,555 INFO sqlalchemy.engine.Engine DELETE FROM iris
2023-11-05 10:57:30,556 INFO sqlalchemy.engine.Engine [generated in 0.00275s] ()
2023-11-05 10:57:30,559 INFO sqlalchemy.engine.Engine INSERT OR FAIL INTO iris (added, petal_length, petal_width, sepal_length, sepal_width, species, updated) VALUES (?, ?, ?, ?, ?, ?, ?)
2023-11-05 10:57:30,560 INFO sqlalchemy.engine.Engine [generated in 0.00120s] ('2023-11-05 15:57:30.559574', 3.0, 4.0, 1.0, 2.0, 'setosa', '2023-11-05 15:57:30.559576')
Insert multiple rows into the database using executemany-style 
            parameter binding.
        
2023-11-05 10:57:30,562 INFO sqlalchemy.engine.Engine INSERT OR FAIL INTO iris (added, petal_length, petal_width, sepal_length, sepal_width, species, updated) VALUES (?, ?, ?, ?, ?, ?, ?)
2023-11-05 10:57:30,563 INFO sqlalchemy.engine.Engine [generated in 0.00095s] [('2023-11-05 15:57:30.562511', 3.0, 4.0, 1.0, 2.0

In [8]:
with core.query() as q:
    # use table.cols to specify which columns to select
    columns = itab.cols('sepal_length', 'sepal_width')
    pprint.pprint(q.select(columns).fetchall())
    
    # use subscript to specify table for each column. use for table joins
    columns = [itab['sepal_length'], itab['sepal_width']]
    results = q.select(columns).fetchall()
    pprint.pprint(results)
    
    # use .sum(), .min(), .max(), .count(), .sum(), and .unique() to specify aggregate functions
    columns = [itab['species'].distinct()]
    result = q.select(columns).scalars()
    pprint.pprint(f'{result=}')
    
    # use in conjunction with group_by to specify groupings
    columns = [itab['sepal_length'].sum()]
    result = q.select(columns, group_by=[itab['species']]).scalar_one()
    pprint.pprint(f'{result=}')

2023-11-05 10:57:30,610 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2023-11-05 10:57:30,612 INFO sqlalchemy.engine.Engine SELECT iris.sepal_length, iris.sepal_width 
FROM iris
2023-11-05 10:57:30,613 INFO sqlalchemy.engine.Engine [generated in 0.00239s] ()
[(1.0, 2.0), (1.0, 2.0), (1.0, 2.0)]
2023-11-05 10:57:30,614 INFO sqlalchemy.engine.Engine SELECT iris.sepal_length, iris.sepal_width 
FROM iris
2023-11-05 10:57:30,615 INFO sqlalchemy.engine.Engine [cached since 0.004547s ago] ()
[(1.0, 2.0), (1.0, 2.0), (1.0, 2.0)]
2023-11-05 10:57:30,617 INFO sqlalchemy.engine.Engine SELECT distinct(iris.species) AS distinct_1 
FROM iris
2023-11-05 10:57:30,618 INFO sqlalchemy.engine.Engine [generated in 0.00076s] ()
'result=<sqlalchemy.engine.result.ScalarResult object at 0x7f87a44468a0>'
2023-11-05 10:57:30,625 INFO sqlalchemy.engine.Engine SELECT sum(iris.sepal_length) AS sum_1 
FROM iris GROUP BY iris.species
2023-11-05 10:57:30,626 INFO sqlalchemy.engine.Engine [generated in 0.00099s] ()
'

#### Table-specific Queries

In [9]:
with itab.query() as q:
    pprint.pprint(q.select())

2023-11-05 10:57:30,660 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2023-11-05 10:57:30,661 INFO sqlalchemy.engine.Engine SELECT iris.id, iris.added, iris.petal_length, iris.petal_width, iris.sepal_length, iris.sepal_width, iris.species, iris.updated 
FROM iris
2023-11-05 10:57:30,662 INFO sqlalchemy.engine.Engine [cached since 0.09616s ago] ()
[Iris(sepal_length=1.0,
      sepal_width=2.0,
      petal_length=3.0,
      petal_width=4.0,
      species='setosa',
      id=1,
      updated=datetime.datetime(2023, 11, 5, 15, 57, 30, 559576),
      added=datetime.datetime(2023, 11, 5, 15, 57, 30, 559574)),
 Iris(sepal_length=1.0,
      sepal_width=2.0,
      petal_length=3.0,
      petal_width=4.0,
      species='setosa',
      id=2,
      updated=datetime.datetime(2023, 11, 5, 15, 57, 30, 562513),
      added=datetime.datetime(2023, 11, 5, 15, 57, 30, 562511)),
 Iris(sepal_length=1.0,
      sepal_width=2.0,
      petal_length=3.0,
      petal_width=4.0,
      species='setosa',
      id=3

In [10]:
with itab.query() as q:
    q.delete(all=True)
    
    q.insert_multi(irises)
    
    db_irises = q.select()
    print(len(db_irises))
    pprint.pprint(db_irises[:2])

2023-11-05 10:57:30,713 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2023-11-05 10:57:30,714 INFO sqlalchemy.engine.Engine DELETE FROM iris
2023-11-05 10:57:30,715 INFO sqlalchemy.engine.Engine [cached since 0.1618s ago] ()
2023-11-05 10:57:30,727 INFO sqlalchemy.engine.Engine INSERT OR FAIL INTO iris (added, petal_length, petal_width, sepal_length, sepal_width, species, updated) VALUES (?, ?, ?, ?, ?, ?, ?)
2023-11-05 10:57:30,728 INFO sqlalchemy.engine.Engine [cached since 0.1662s ago] [('2023-11-05 15:57:30.723621', 1.4, 0.2, 5.1, 3.5, 'setosa', '2023-11-05 15:57:30.723625'), ('2023-11-05 15:57:30.723626', 1.4, 0.2, 4.9, 3.0, 'setosa', '2023-11-05 15:57:30.723627'), ('2023-11-05 15:57:30.723628', 1.3, 0.2, 4.7, 3.2, 'setosa', '2023-11-05 15:57:30.723629'), ('2023-11-05 15:57:30.723630', 1.5, 0.2, 4.6, 3.1, 'setosa', '2023-11-05 15:57:30.723631'), ('2023-11-05 15:57:30.723632', 1.4, 0.2, 5.0, 3.6, 'setosa', '2023-11-05 15:57:30.723632'), ('2023-11-05 15:57:30.723633', 1.7, 0.4, 5.4

In [11]:
with itab.query() as q:
    db_irises = q.select(['id', 'sepal_width', 'sepal_length'])
    pprint.pprint(db_irises[:2])

2023-11-05 10:57:30,766 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2023-11-05 10:57:30,767 INFO sqlalchemy.engine.Engine SELECT iris.id, iris.sepal_width, iris.sepal_length 
FROM iris
2023-11-05 10:57:30,768 INFO sqlalchemy.engine.Engine [generated in 0.00224s] ()
[Iris(sepal_length=5.1,
      sepal_width=3.5,
      petal_length=MISSING,
      petal_width=MISSING,
      species=MISSING,
      id=1,
      updated=MISSING,
      added=MISSING),
 Iris(sepal_length=4.9,
      sepal_width=3.0,
      petal_length=MISSING,
      petal_width=MISSING,
      species=MISSING,
      id=2,
      updated=MISSING,
      added=MISSING)]
2023-11-05 10:57:30,771 INFO sqlalchemy.engine.Engine COMMIT


## Working with Multple Tables

In [12]:
print(iris_df['species'].unique())

species_data = {
    'setosa':'bristle-pointed iris',
    'versicolor':'Southern blue flag',
    'virginica':'Northern blue flag',
}

['setosa' 'versicolor' 'virginica']


In [15]:
import typing

@doctable.table_schema(table_name='species')
class Species:
    name: str
    common_name: str = doctable.Column(
        column_args=doctable.ColumnArgs(nullable=True),
    )
    
    id: int = doctable.Column(# will appear as the first column in the table
        column_args=doctable.ColumnArgs(order=0, primary_key=True, autoincrement=True),
    )
    
    @classmethod
    def from_dict(cls, data: typing.Dict[str, str]) -> typing.List[Species]:
        return [cls(name=n, common_name=cn) for n,cn in data.items()]

@doctable.table_schema(table_name='iris')
class IrisEntry:
    sepal_length: float
    sepal_width: float
    petal_length: float
    petal_width: float
    species: str = doctable.Column(
        column_args=doctable.ColumnArgs(foreign_key='species.name', nullable=False),
    )
    
    id: int = doctable.Column(# will appear as the first column in the table
        column_args=doctable.ColumnArgs(order=0, primary_key=True, autoincrement=True),
    )
    
    @classmethod
    def from_dataframe(cls, df: pd.DataFrame) -> typing.List[IrisEntry]:
        return [cls(**row) for _,row in df.iterrows()]


core = doctable.ConnectCore.open(
    target=':memory:', # use a filename for a sqlite to write to disk
    dialect='sqlite',
    echo=True,
)

with core.begin_ddl() as emitter:
    spec_tab = emitter.create_table_if_not_exists(Species)
    iris_tab = emitter.create_table_if_not_exists(IrisEntry)
print(spec_tab.inspect_columns())

2023-11-05 10:59:54,101 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2023-11-05 10:59:54,102 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("species")
2023-11-05 10:59:54,103 INFO sqlalchemy.engine.Engine [raw sql] ()
2023-11-05 10:59:54,104 INFO sqlalchemy.engine.Engine PRAGMA temp.table_info("species")
2023-11-05 10:59:54,105 INFO sqlalchemy.engine.Engine [raw sql] ()
2023-11-05 10:59:54,106 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("iris")
2023-11-05 10:59:54,107 INFO sqlalchemy.engine.Engine [raw sql] ()
2023-11-05 10:59:54,108 INFO sqlalchemy.engine.Engine PRAGMA temp.table_info("iris")
2023-11-05 10:59:54,109 INFO sqlalchemy.engine.Engine [raw sql] ()
2023-11-05 10:59:54,110 INFO sqlalchemy.engine.Engine 
CREATE TABLE species (
	id INTEGER, 
	common_name VARCHAR, 
	name VARCHAR, 
	PRIMARY KEY (id)
)


2023-11-05 10:59:54,111 INFO sqlalchemy.engine.Engine [no key 0.00086s] ()
2023-11-05 10:59:54,112 INFO sqlalchemy.engine.Engine 
CREATE TABLE iris (
	id INTEGER,