# DocTable Schemas
There are two ways to define schemas for a DocTable:

1. **dataclass schema**: a dataclass object which inherits from `DocTableSchema`.
2. **list schema**: sequence of column specifications according to a custom doctable format.

The doctable package builds on sqlalchemy, so both types of schema specifications ultimately result in a sequence of [`sqlalchemy` column types](https://docs.sqlalchemy.org/en/13/core/type_basics.html) that will be used to construct (or interface with) the database.

In [1]:
from datetime import datetime
from pprint import pprint
import pandas as pd

import sys
sys.path.append('..')
import doctable

## Schema Type Maps
There are two lookup tables used to relate to sqlalchemy column types. The first is a map from Python datatypes to the sqlalchemy types. This is sufficient for the simplest possible dataclass schema specification.

In [2]:
doctable.python_to_slqlchemy_type

{int: sqlalchemy.sql.sqltypes.Integer,
 float: sqlalchemy.sql.sqltypes.Float,
 str: sqlalchemy.sql.sqltypes.String,
 bool: sqlalchemy.sql.sqltypes.Boolean,
 datetime.datetime: sqlalchemy.sql.sqltypes.DateTime,
 datetime.time: sqlalchemy.sql.sqltypes.Time,
 datetime.date: sqlalchemy.sql.sqltypes.Date}

The second is a string lookup that is provided for the list schema format. You can see that this offers a larger number of types compared to the Python type conversion.

In [3]:
doctable.string_to_sqlalchemy_type

{'biginteger': sqlalchemy.sql.sqltypes.BigInteger,
 'boolean': sqlalchemy.sql.sqltypes.Boolean,
 'date': sqlalchemy.sql.sqltypes.Date,
 'datetime': sqlalchemy.sql.sqltypes.DateTime,
 'enum': sqlalchemy.sql.sqltypes.Enum,
 'float': sqlalchemy.sql.sqltypes.Float,
 'integer': sqlalchemy.sql.sqltypes.Integer,
 'interval': sqlalchemy.sql.sqltypes.Interval,
 'largebinary': sqlalchemy.sql.sqltypes.LargeBinary,
 'numeric': sqlalchemy.sql.sqltypes.Numeric,
 'smallinteger': sqlalchemy.sql.sqltypes.SmallInteger,
 'string': sqlalchemy.sql.sqltypes.String,
 'text': sqlalchemy.sql.sqltypes.Text,
 'time': sqlalchemy.sql.sqltypes.Time,
 'unicode': sqlalchemy.sql.sqltypes.Unicode,
 'unicodetext': sqlalchemy.sql.sqltypes.UnicodeText,
 'json': doctable.schemas.custom_coltypes.JSONType,
 'pickle': doctable.schemas.custom_coltypes.CpickleType,
 'picklefile': doctable.schemas.custom_coltypes.PickleFileType,
 'textfile': doctable.schemas.custom_coltypes.TextFileType}

## Dataclass Schemas
The simplest way to specify a schema for doctable is to give a dataclass object which inherits from `DocTableSchema`. This object will represent a single row in the table, and 

In [4]:
from dataclasses import dataclass
from datetime import datetime

@dataclass
class Record(doctable.DocTableSchema):
    
    # custom doctable column types
    id: int = doctable.IDCol() # auto-increment primary key
    added: datetime = doctable. AddedCol() # record when row was added
    updated: datetime = doctable.UpdatedCol() # record when row was updated
    
    # generic column object.
    # Keyword arguments are passed directly to sqlalchemy Column constructor
    name: str = doctable.Col(nullable=False)
    
    # first argument is default value or factory (automatically determined)
    num_siblings: int = doctable.Col(0)
        
    # this will be stored as a binary type in sql
    friends: list = doctable.Col(list)
    
    # can also use regular scalar default values
    age: int = 6
    is_old: bool = None
        
    # indices and constraints - these are used by DocTableSchema objects
    _indices_ = {
        # SQLAlchemy: Index('name_index', 'name')
        'name_index': ('name',),
        
        # SQLAlchemy: Index('name_age_index', 'name', 'age', unique=True)
        'name_age_index': ('name', 'age', {'unique':True}),
    }
    
    # add constraints to table
    _constraints_ = (
        
        #SQLAlchemy:  UniqueConstraint('name', 'age')
        ('unique', 'name', 'age'),
        
        #SQLAlchemy: CheckConstraint('age > 0', name='check_age')
        ('check', 'age > 0', {'name':'check_age'}), 
        
        #('foreignkey', ('a','b'), ('c','d')),
    )
        
        
    # doctable method to execute after constructor is created
    def __post_init__(self):
        self.is_old = age > 28
        
    # any custom method the user would like to add
    @property
    def num_friends(self):
        return len(self.friends)
    
    
db = doctable.DocTable(target=':memory:', schema=Record)
db.schema_table()

Unnamed: 0,name,type,nullable,default,autoincrement,primary_key
0,id,INTEGER,False,,auto,1
1,added,DATETIME,True,,auto,0
2,updated,DATETIME,True,,auto,0
3,name,VARCHAR,False,,auto,0
4,num_siblings,INTEGER,True,,auto,0
5,friends,BLOB,True,,auto,0
6,age,INTEGER,True,,auto,0
7,is_old,BOOLEAN,True,,auto,0


## List Schemas
And this is another example showing the list schema format.

In [5]:
schema = (
    # standard id column
    #SQLAlchemy: Column('id', Integer, primary_key = True, autoincrement=True), 
    ('integer', 'id', dict(primary_key=True, autoincrement=True)),
    # short form (can't provide any additional args though): ('idcol', 'id')

    # make a category column with two options: "FICTION" and "NONFICTION"
    #SQLAlchemy: Column('title', String,)
    ('string', 'category', dict(nullable=False)),

    # make a non-null title column
    #SQLAlchemy: Column('title', String,)
    ('string', 'title', dict(nullable=False)),

    # make an abstract where the default is an empty string instead of null
    #SQLAlchemy: Column('abstract', String, default='')
    ('string', 'abstract',dict(default='')),

    # make an age column where age must be greater than zero
    #SQLAlchemy: Column('abstract', Integer)
    ('integer', 'age'),

    # make a column that keeps track of column updates
    #SQLAlchemy: Column('updated_on', DateTime(), default=datetime.now, onupdate=datetime.now)
    ('datetime', 'updated_on',  dict(default=datetime.now, onupdate=datetime.now)),
    # short form to auto-record update date: ('date_updated', 'updated_on')
    
    #SQLAlchemy: Column('updated_on', DateTime(), default=datetime.now)
    ('datetime', 'updated_on',  dict(default=datetime.now)),
    # short form to auto-record insertion date: ('date_added', 'added_on')

    # make a string column with max of 500 characters
    #SQLAlchemy: Column('abstract', String, default='')
    ('string', 'text',dict(),dict(length=500)),

    
    ##### Custom DocTable Column Types #####
    
    # uses json.dump to convert python object to json when storing and
    # json.load to convert json back to python when querying
    ('json','json_data'),
    
    # stores pickled python object directly in table as BLOB
    # TokensType and ParagraphsType are defined in doctable/coltypes.py
    # SQLAlchemy: Column('tokenized', TokensType), Column('sentencized', ParagraphsType)
    ('pickle','tokenized'),
    
    # store pickled data into a separate file, recording only filename directly in table
    # the 'fpath' argument can specify where the files should be placed, but by
    # default they are stored in <dbname>_<tablename>_<columnname>
    #('picklefile', 'pickle_obj', dict(), dict(fpath='folder_for_picklefiles')),
    
    # very similar to above, but use only when storing text data
    #('textfile', 'text_file'), # similar to above
    
    
    ##### Constraints #####
    
    #SQLAlchemy: CheckConstraint('category in ("FICTION","NONFICTION")', name='salary_check')
    ('check_constraint', 'category in ("FICTION","NONFICTION")', dict(name='salary_check')),
    
    #SQLAlchemy: CheckConstraint('age > 0')
    ('check_constraint', 'age > 0'),
    
    # make sure each category/title entry is unique
    #SQLAlchemy:  UniqueConstraint('category', 'title', name='work_key')
    ('unique_constraint', ['category','title'], dict(name='work_key')),
    
    # makes a foreign key from the 'subkey' column of this table to the 'id'
    # column of ANOTHERDOCTABLE, setting the SQL onupdate and ondelete foreign key constraints
    #('foreignkey_constraint', [['subkey'], [ANOTHERDOCTABLE['id']]], {}, dict(onupdate="CASCADE", ondelete="CASCADE")),
    #NOTE: Can't show here because we didn't make ANOTHERDOCTABLE
    
    ##### Indexes ######
    
    # make index table
    # SQLAlchemy: Index('ind0', 'category', 'title', unique=True)
    ('index', 'ind0', ('category','title'),dict(unique=True)),
    
)
md = doctable.DocTable(target=':memory:', schema=schema, verbose=True)
md.schema_table()

Unnamed: 0,name,type,nullable,default,autoincrement,primary_key
0,id,INTEGER,False,,auto,1
1,category,VARCHAR,False,,auto,0
2,title,VARCHAR,False,,auto,0
3,abstract,VARCHAR,True,,auto,0
4,age,INTEGER,True,,auto,0
5,updated_on,DATETIME,True,,auto,0
6,text,VARCHAR(500),True,,auto,0
7,json_data,VARCHAR,True,,auto,0
8,tokenized,BLOB,True,,auto,0
