In [1]:
# did/example/Core_API
#   v0.0.0
#   Purpose: to demonstrate the key features of the DID API.

In [2]:
# This adds the path to import the development version (git repo) of DID Python.
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [3]:
# Basic DID imports.
from did import DID, DIDDocument, Query as Q
from did.database import SQL

from did.exception import IntegrityError

In [4]:
# Python built in libraries binary data storage demo.
import numpy as np
import struct

In [5]:
# The DID instance is initialized with 
#   a DID database, which inherits from the DID_Database abstract class,
#   a path to a directory where the binary data lives,
#   and the optional argument `auto_save`, which defaults to False.
did = DID(
    driver = SQL(
        'postgres://postgres:password@localhost:5432/did_tests',
        
        hard_reset_on_init = True,
        debug_mode = False,
        verbose_feedback = True,
    ),
    binary_directory = './test_sql_crud',
    
    auto_save = True,
)

In [6]:
# `DIDDocument` objects are wrappers for DID documents,
#   which are JSON data structured by NDI schema.
# `DIDDocument`s are initialized by passing in that JSON structure.

# Here, we set up three DID documents and initialize them.
mock_document_data = [
    {
        'base': {
            'id': '0',
            'session_id': '2387492',
            'name': 'A',
            'datestamp': '2020-10-28T08:12:20+0000',
            'snapshots': [],
            'records': [],
        },
        'depends_on': [],
        'dependencies': [],
        'binary_files': [],
        'document_class': {
            'definition': '$NDIDOCUMENTPATH/ndi_document_app.json',
            'validation': '$NDISCHEMAPATH/ndi_document_app_schema.json',
            'class_name': 'ndi_document_app',
            'property_list_name': 'app',
            'class_version': 1,
            'superclasses': [{
                'definition': '$NDIDOCUMENTPATH/base_document.json'
            }],
        },
        'app': {
            'a': True,
            'b': True
        },
    },
    {
        'base': {
            'id': '1',
            'session_id': '2387492',
            'name': 'B',
            'datestamp': '2020-10-28T08:12:20+0000',
            'snapshots': [],
            'records': [],
        },
        'depends_on': [],
        'dependencies': [],
        'binary_files': [],
        'document_class': {
            'definition': '$NDIDOCUMENTPATH/ndi_document_app.json',
            'validation': '$NDISCHEMAPATH/ndi_document_app_schema.json',
            'class_name': 'ndi_document_app',
            'property_list_name': 'app',
            'class_version': 1,
            'superclasses': [{
                'definition': '$NDIDOCUMENTPATH/base_document.json'
            }],
        },
        'app': {
            'a': True,
            'b': False
        },
    },
    {
        'base': {
            'id': '2',
            'session_id': '2387492',
            'name': 'C',
            'datestamp': '2020-10-28T08:12:20+0000',
            'snapshots': [],
            'records': [],
        },
        'depends_on': [],
        'dependencies': [],
        'binary_files': [],
        'document_class': {
            'definition': '$NDIDOCUMENTPATH/ndi_document_app.json',
            'validation': '$NDISCHEMAPATH/ndi_document_app_schema.json',
            'class_name': 'ndi_document_app',
            'property_list_name': 'app',
            'class_version': 1,
            'superclasses': [{
                'definition': '$NDIDOCUMENTPATH/base_document.json'
            }],
        },
        'app': {
            'a': False,
            'b': False
        },
    },
]
moc_docs = [DIDDocument(data) for data in mock_document_data]

In [7]:
# We'll start by adding our three `DIDDocument`s to the DID instance.

#   Note: Since `auto_save` has beens set to True, they are saved to the database individually.
#         When `auto_save` is False, changes may be saved by calling DID.save().
#         Alternatively, all methods that modify the database have the `save` keyword argument,
#           which can be set to true or false to override `auto_save`.
#         Saving and snapshots will be covered in futher detail below (see ## Transaction Management ##).

for doc in moc_docs:
    did.add(doc)

saving...
Changes saved.
saving...
Changes saved.
saving...
Changes saved.


In [8]:
# Attempts to add duplicate `DIDDocument`s will throw an IntegrityError with useful information.

try:
    did.add(moc_docs[0])
except IntegrityError as error:
    print(error)

Duplicate Key error for document id=0.


In [9]:
# `DIDDocument`s can be retrieved by ID...

doc = did.find_by_id(moc_docs[0].id)

doc.data['base']

{'id': '0',
 'name': 'A',
 'records': ['94e60c8e6c11bea644d8714a59f031b6282f66430025b2652b03d075377b19f9'],
 'datestamp': '2020-10-28T08:12:20+0000',
 'snapshots': [1],
 'session_id': '2387492'}

In [10]:
# or by DID Query.

by_app_a = Q('app.a') == True
docs = did.find(by_app_a)

[doc.data['app'] for doc in docs]

<did.database.sql.SQL object at 0x10eca4d90>


[{'a': True, 'b': True}, {'a': True, 'b': False}]

In [11]:
# More complex DID Queries may be composed with and (&) and or (|) operators,
#   with order of operations left-to-right or as structured by parentheses.

app_a_is_true = Q('app.a') == False
app_b_is_true = Q('app.b') == False
is_not_zero_version = Q('base.version') > '0'
is_ndi_doc_class = Q('document_class.class_name').contains('ndi_')

by_complex_query = (app_a_is_true | app_b_is_true) & is_not_zero_version & is_ndi_doc_class
docs = did.find(by_complex_query)

[doc.data for doc in docs]

<did.database.sql.SQL object at 0x10eca4d90>


[]

In [12]:
# All documents can be retrieved very simply.

did.find()

<did.database.sql.SQL object at 0x10eca4d90>


[<did.document.DIDDocument at 0x10eca4160>,
 <did.document.DIDDocument at 0x10eca4cd0>,
 <did.document.DIDDocument at 0x10eca4a60>]

In [13]:
# `DIDDocument`s can be updated with instances at hand...

# Use-cases:
# - `DIDDocument` instance is available.
# - Avoids need for dict `payload`.

doc = moc_docs[0]
doc.data['app']['c'] = True
did.update(doc)

doc_from_db = did.find_by_id(moc_docs[0].id)
doc_from_db.data['app']

Changes saved.


{'a': True, 'b': True, 'c': True}

In [14]:
# , by ID and payload...

# Use-cases:
# - `DIDDocument` instance is not available.
# - Allows reuse of `payload`s.

payload = { 'app': { 'c': False } }
did.update_by_id(moc_docs[0].id, payload, )

doc_from_db = did.find_by_id(moc_docs[0].id)
doc_from_db.data['app']

Changes saved.


{'a': True, 'b': True, 'c': False}

In [15]:
#, or by Query and payload.

# Use-cases:
# - Multiple documents require same updates.
# - Allows reuse of `payload`s.

payload = {
    'app': {
        'b': False,
        'c': True,
        'd': True,
    },
}
by_app_a = Q('app.a') == True
did.update_many(by_app_a, payload)

docs_from_db = did.find()
[doc.data['app'] for doc in docs_from_db]

Changes saved.
<did.database.sql.SQL object at 0x10eca4d90>


[{'a': False, 'b': False},
 {'a': True, 'b': False, 'c': True, 'd': True},
 {'a': True, 'b': False, 'c': True, 'd': True}]

In [16]:
# If you are not sure if a DID document is in the database or not,
#   but want to update it if it is, use DID.upsert()
# This will add the document to the database if it does not already exist...

new_moc_doc = DIDDocument({
    'base': {
        'id': '3',
        'session_id': '2387492',
        'name': 'D',
        'datestamp': '2020-10-28T08:12:20+0000',
        'snapshots': [],
        'records': [],
    },
    'depends_on': [],
    'binary_files': [],
    'document_class': {
        'definition': '$NDIDOCUMENTPATH/ndi_document_app.json',
        'validation': '$NDISCHEMAPATH/ndi_document_app_schema.json',
        'class_name': 'ndi_document_app',
        'property_list_name': 'app',
        'class_version': 1,
        'superclasses': [{
            'definition': '$NDIDOCUMENTPATH/base_document.json'
        }],
    },
    'app': {
        'a': False,
        'b': True
    },
})
did.upsert(new_moc_doc)

docs_from_db = did.find()
[doc.data['app'] for doc in docs_from_db]

Changes saved.
<did.database.sql.SQL object at 0x10eca4d90>


[{'a': False, 'b': False},
 {'a': True, 'b': False, 'c': True, 'd': True},
 {'a': True, 'b': False, 'c': True, 'd': True},
 {'a': False, 'b': True}]

In [17]:
# If the document does exist in the database, it will be updated.

new_moc_doc.data['app'] = {
    **new_moc_doc.data['app'],
    'c': False,
    'd': False,
}
did.upsert(new_moc_doc)

docs_from_db = did.find()
[doc.data['app'] for doc in docs_from_db]

Changes saved.
<did.database.sql.SQL object at 0x10eca4d90>


[{'a': False, 'b': False},
 {'a': True, 'b': False, 'c': True, 'd': True},
 {'a': True, 'b': False, 'c': True, 'd': True},
 {'a': False, 'b': True, 'c': False, 'd': False}]

In [18]:
# Similarly to updates, `DIDDocument`s can be deleted directly...

did.delete(new_moc_doc)

docs_from_db = did.find()
[doc.data['app'] for doc in docs_from_db]

Changes saved.
<did.database.sql.SQL object at 0x10eca4d90>


[{'a': False, 'b': False},
 {'a': True, 'b': False, 'c': True, 'd': True},
 {'a': True, 'b': False, 'c': True, 'd': True}]

In [19]:
#, by ID...

print(f'deleting {moc_docs[0].data["base"]["name"]}')
did.delete_by_id(moc_docs[0].id)

docs_from_db = did.find()
[
    {
        'name': doc.data['base']['name'],
        'app': doc.data['app']
    }
    for doc in docs_from_db
]

deleting A
Changes saved.
<did.database.sql.SQL object at 0x10eca4d90>


[{'name': 'C', 'app': {'a': False, 'b': False}},
 {'name': 'B', 'app': {'a': True, 'b': False, 'c': True, 'd': True}}]

In [20]:
#, or by Query.

by_app_a = Q('app.a') == True
did.delete_many(by_app_a)

docs_from_db = did.find()
[doc.data['app'] for doc in docs_from_db]

Changes saved.
<did.database.sql.SQL object at 0x10eca4d90>


[{'a': False, 'b': False}]

In [21]:
## Transaction Management ##

# All CRUD operations so far have been saved by default,
#   because the DID instance was instantiated with `auto_save = True`.

# To manage calls to the database, operations can be bundled into transactions (working_snapshots).

# CRUD methods that modify the database have the `save: bool` keyword parameter,
#   and operations that are not saved will be open or be added to the current transaction.

# All operations under a current transaction are passed to the database when DID.save() is called,
#   or when a CRUD method is called with `save = True`. At this point, the working snapshot
#   is committed to the database.

# All operations under a current transaction can be discarded by calling DID.revert().

In [22]:
# `DIDDocument`s can be upserted, deleted, or otherwise modified,
#   but until they are saved, they can be reverted...

for doc in moc_docs:
    did.upsert(doc, save = False)
did.revert()

Changes reverted.


In [23]:
# and the documents in the database will remain unchanged.

docs_from_db = did.find()
[doc.data['app'] for doc in docs_from_db]

<did.database.sql.SQL object at 0x10eca4d90>


[{'a': False, 'b': False}]

In [24]:
# `DIDDocument`s may have associated binary data,
#   which is accessible through the DID instance's bin property.

# The list_files utility can be used to check what binary data exists for a given `DIDDocument`.

doc = docs_from_db[0]
did.bin.list_files(doc)

[]

In [25]:
# New binary data is given a name or identifier, and passed in as as bytes.
# Names should be in snake_case,
#   and should not contain spaces or special characters.

with did.bin.open_write_stream(doc, 'new_data_name') as write_stream:
    write_stream.write(b'This is test data.')

did.bin.list_files(doc)

['new_data_name']

In [26]:
# The new file's path is accessible, relative to the working directory.
#   Note that the filename is composed of the document id hyphenated with the data's name,
#     followed by '--on_' and the document's latest record.
#     <document_id>-<name>--on_<previous_hash>.bin
#     On documents that haven't yet been recorded, <previous_hash> defaults to "NEW".

did.bin.get_filepath(doc, 'new_data_name')

PosixPath('test_sql_crud/2-new_data_name--on_ad762ac8fc53b4b030db11dd6fc3e21ec3bba6ed67414adbaa0f5cc73a7d77b5.bin')

In [27]:
# Binary data is accessed as a read stream.

with did.bin.open_read_stream(doc, 'new_data_name') as read_stream:
    print(read_stream.read())

b'This is test data.'


In [28]:
# Separate binary files can be opened by simply writing to a new name.

with did.bin.open_write_stream(doc, 'numpy_data') as write_stream:
    random_data = np.random.random(1_000_000)
    
    for x in random_data:
        write_stream.write(struct.pack('d', x))

did.bin.list_files(doc)

['new_data_name', 'numpy_data']

In [29]:
# More complex read operations are supported through standard python libraries.

with did.bin.open_read_stream(doc, 'numpy_data') as read_stream:
    
    # reading a chunk of ten bytes
    data = read_stream.read(10)
    parsed_data = ', '.join(str(x).rjust(3, ' ') for x in data)
    print(f'First 10 items:  {parsed_data}')
    
    # moving the pointer
    read_stream.seek(5)
    print(f'Pointer has moved to index {read_stream.tell()}.')
    
    # reading chunks
    data = read_stream.read(10)
    parsed_data = ', '.join(str(x).rjust(3, ' ') for x in data)
    print(f'Ten items from index 5:                   {parsed_data}')
    
    # converting back to numpy array
    read_stream.seek(0)
    data = read_stream.read()
    retrieved_data = np.frombuffer(data, dtype=float)
    print(f'All readable data matches written data: {all(retrieved_data == random_data)}')

First 10 items:  224,  88, 208,  42, 130, 163, 160,  63, 130,  69
Pointer has moved to index 5.
Ten items from index 5:                   163, 160,  63, 130,  69, 106, 252, 102, 219, 214
All readable data matches written data: True


In [30]:
# Binary files can be removed as well.

did.bin.remove_file(doc, 'new_data_name')

did.bin.list_files(doc)

['numpy_data']