In [1]:
import logging
import sys
import copy
import collections.abc
import json

logging.basicConfig(format='%(levelname)s | %(message)s',
                    level=logging.DEBUG,
                    stream=sys.stdout)
log = logging.getLogger(__name__)

In [2]:
import datalink

In [3]:
from pandas.util import hash_pandas_object
import pandas as pd
import numpy as np

np.random.seed(42)
arr = np.random.choice(['foo', 'bar', 42], size=(3,4))
df = pd.DataFrame(arr)

df
h = hash_pandas_object(df)
type(h)
h

arr = np.random.choice(['foo', 'bar', 42], size=(3,4))
df = pd.DataFrame(arr)
df
h2 = hash_pandas_object(df)
type(h2)
h2

h3 = h2.copy()

h.equals(h2)

h2.equals(h3)

Unnamed: 0,0,1,2,3
0,42,foo,42,42
1,foo,foo,42,bar
2,42,42,42,42


pandas.core.series.Series

0     5559921529589760079
1    16825627446701693880
2     7171023939017372657
dtype: uint64

Unnamed: 0,0,1,2,3
0,foo,42,bar,foo
1,bar,bar,bar,bar
2,foo,foo,bar,bar


pandas.core.series.Series

0    13395664528142679341
1     4844743719075605538
2    13663166965737423489
dtype: uint64

False

True

In [4]:
datalink.test_output()

INFO | logging from datalink


In [5]:
datalink.SQLInterface(db_path='~/test_dl.db')

DEBUG | Creating database: /home/sogilvy/test_dl.db
INFO | - db created at path: ~/test_dl.db


<datalink.SQLInterface at 0x7f4e60c07048>

In [6]:
class DataStore:
    """
    Base class for datastores. Link classes should inherit from these.
    """

    def __init__(self, **kwargs):
        self._data = None
        self._data_last = None
        
    @property
    def data(self):
        return self._data
    
class StructStore(DataStore):
    """
    Class to handle a data store with a basic mapping.
    """
    
    
    

In [7]:
from collections import namedtuple

p = namedtuple('Point', ['x', 'y'])
n = p(x=(1,2,3), y=2)
n2 = p(x=(1,2,3), y=2)

In [8]:
hash(n)
hash(n2)

5701640225614034726

5701640225614034726

In [9]:
class DataStoreDescriptor(object):
    """A descriptor for the relevant key in the datastore."""

    def __init__(self, key):
        self.key = key

    def __get__(self, instance, owner):
        return instance._data[self.key]

    def __set__(self, instance, value):
        instance._data[self.key] = value
        if instance._has_data_updated:
            instance._save_state()
            instance._set_data_hash()

class DataStore:
    """Class for a basic mapping datastore."""
    def __init__(self):
        self._hash_previous = None
        self._data = {'a': 10, 'b': [2,3], 'c': 'a string'}
        for key in self._data:
            if not hasattr(self.__class__, key):
                setattr(self.__class__, key, DataStoreDescriptor(key))
        self._get_data_hash()
    
    def _save_state(self):
        log.debug('Call to _save_state.')
    
    @property
    def data(self):
        return self._data
    
    def update(self, config):
        """
        Update multiple properties at once.
        Only uses descriptor directly in last call for 
        one save call.
        """
        for i, (k, v) in enumerate(config.items()):
            if i == len(config)-1:
                setattr(self, k, v)
            else:
                self._data[k] = v
    
    def _get_data_hash(self):
        """
        Creates a hash of the internal datastore, casting 
        unhashable types to hashables where possible.
        """
        d = copy.deepcopy(self._data)
        
        # First make unhashables into hashables.
        for key, val in d.items():
            if isinstance(val, collections.abc.Hashable):
                continue
            else:
                if isinstance(val, collections.abc.Iterable):
                    try:
                        d[key] = tuple(val)
                    except TypeError:
                        raise
        # Make a hash and assign it.
        h = hash(json.dumps(d, sort_keys=True))
        return h

    def _set_data_hash(self):
        self._hash_previous = self._get_data_hash()
    
    @property
    def _has_data_updated(self):
        new_hash = self._get_data_hash()
        if new_hash == self._hash_previous:
            return False
        else:
            return True
    
d = DataStore()
d.data
d.update({'a': 12, 'c': 'a new string'})
d.data
d.update({'a': 12, 'c': 'a new string'})


{'a': 10, 'b': [2, 3], 'c': 'a string'}

DEBUG | Call to _save_state.


{'a': 12, 'b': [2, 3], 'c': 'a new string'}