In [33]:
import bson
from mongoengine import query
import functools

In [34]:
class Stub(object):
    pass

In [35]:
import collections

In [36]:
def make_nodes(*keys):
    kdict = {'keys':keys}
    def setter(val=None):
        keys = list(kdict['keys'])
        nextkey = keys.pop()
        nextval = {nextkey:val}
        setitem_funcs = []
        setfunc = functools.partial(nextval.__setitem__, nextkey)
        setitem_funcs.append(setfunc)
        for nextkey in reversed(keys):
            nextval = {nextkey:nextval}
            setfunc = functools.partial(nextval.__setitem__, nextkey)
            setitem_funcs.append(setfunc)
        return nextval
    return setter

In [37]:
def make_node_setter(*keys, **kwargs):
    dct = kwargs.get('dct', None)
    localdict = {'keys':keys,'dct':dct}
    def setter(val=None, dct=None):
        if dct is None:
            dct = localdict['dct'] or {}
        keys = list(localdict['keys'])
        lastkey = keys.pop()
        nextdct = dct
        for k in keys:
            if k not in nextdct:
                nextdct[k] = {}
            nextdct = nextdct[k]
        nextdct[lastkey] = val
        return dct
    return setter

In [38]:
DEFAULT_DELIM = '.'

def _object_to_rows(obj, prefix=None, delim=DEFAULT_DELIM):
    rows = []
    dot_prefix = (prefix and (prefix + delim) or '')
    if isinstance(obj, dict):
        for key, item in obj.iteritems():
            rows.extend(_object_to_rows(item, prefix=dot_prefix + key, delim=delim))
    elif isinstance(obj, (list, tuple)):
        for i, item in enumerate(obj):
            rows.extend(_object_to_rows(item, prefix=dot_prefix + str(i), delim=delim))
    else:
        rows.append(((prefix or ''), obj))
    return rows

def flatten(obj, delim=DEFAULT_DELIM):
    return dict(_object_to_rows(obj, delim=delim))

class DotMapper(unicode):
    sep = '.'
    
    @property
    def parts(self):
        return self.split(self.sep)
        
    def get(self, obj, parts=None):
        parts = parts or self.parts
        next_obj = obj
        len_parts = len(parts)
        for i, part in enumerate(parts):
            selection = None
            if next_obj is None:
                return
            if hasattr(next_obj, '__getitem__'):
                selection = next_obj.get(part, None)
            else:
                selection = getattr(next_obj, part, None)
            if i+1 <= len_parts:
                next_obj = selection
        return next_obj
    
    def __call__(self, thing):
        if not self:
            return thing
        return self.get(thing)
    
    def dict_setter(self, *keys):
        keys = keys or self.parts
        return make_node_setter(*keys)
    
    
def test_flatten(doc):
    for k, v in flatten(doc).iteritems():
        keys = k.split('.')
        if len(keys) == 1:
            captured_val = v
        else:
            newv = doc
            for key in keys:
                newv = newv[key]
            captured_val = newv
        assert v == captured_val
    print 'test successful'
def test_dotmapper(doc):
    for bigkey, v in flatten(doc).iteritems():
         assert DotMapper(bigkey)(doc) == v
    print 'test successful'

In [39]:
def _mq(cls, **kwargs):
    dct = {}
    for k,v in kwargs.iteritems():
        parts = k.split('__')
        op = None
        if parts[-1] in MongoProperty.operators:
            op = parts.pop()
            k = '__'.join(parts)
        prop = getattr(cls, k, None)
        if prop is not None:
            dct.update(prop.expression(v, op=op))
        else:
            kop = k+'__'+op if op is not None else k
            dct.update(query(**{kop:v}))
    return dct

def mq(cls, **kwargs):
    and_ = kwargs.pop('and_', None)
    or_ = kwargs.pop('or_', None)
    not_ = kwargs.pop('not_', None)
    dct = {}
    dct.update(_mq(cls, **kwargs))
    if and_:
        dct.update({'$and':_mq(cls, **and_)})
    if or_:
        dct.update({'$or':_mq(cls, **or_)})
    if not_:
        dct.update({k:{'$not':v} for k, v in _mq(cls, **not_).iteritems()})
    return dct

setattr(mq, 'and_', lambda c, **kw: mq(c, and_=kw))
setattr(mq, 'or_', lambda c, **kw: mq(c, or_=kw))
setattr(mq, 'not_', lambda c, **kw: mq(c, not_=kw))



In [40]:

class MongoProperty(object):
    operators = 'eq', 'ne', 'lt', 'lte', 'gt', 'gte', 'ne', 'in', 'nin', 'exists', 'type', 'mod', 'where', 'text', 'regex', 'size', 'elemMatch', 'all', 'comment', 'slice', 'meta', 

    def __init__(self, path_to_property, content_attr_name=None):
        super(MongoProperty, self).__init__()
        self.owner = None
        self.content_attr_name = content_attr_name
        self.dotmapper = DotMapper(path_to_property)
        self.e = Stub()
        self.createops()
        self._name = None
    
    def __unicode__(self):
        return unicode(self.dotmapper)
    
    def __get__(self, instance, owner):
        self.owner = owner
        if instance is None:
            return self
        else:
            content_attr_name = self.content_attr_name or getattr(instance, '_content_attr_name', None)
            assert content_attr_name, 'Missing content attr name. Please define _content_attr_name in class or content_attr_name in property constructor.'
            content = getattr(instance, content_attr_name)
            return self.dotmapper(content)

    def __set__(self, instance, value):
        instance.update_fields(**{self.dotmapper:value})
        
    def name(self):
        if self._name is None:
            for attr in dir(self.owner):
                if getattr(self, attr) == self:
                    self._name = attr
                    break
            else:
                raise Exception('Cannot find name of mongo property in owner.')
        return self._name
            
    def expression(self, val, op=None):
        parts = self.dotmapper.parts
        if op not in (None,'eq'):
            parts += [op]
        label = '__'.join(parts)
        return query(**{label:val})

    def createops(self):
        for op in self.operators:
            func = functools.partial(self.expression, op=op)
            setattr(self.e, op, func)
            


In [41]:
class Options(object):
    _fields = 'collection_name', 'database_name'
    def __init__(self, **kwargs):
        super(Options, self).__init__()
        for f in self._fields:
            setattr(self, f, kwargs.get(f))
        self.dirty_fields = {}
        self.field_names = []

In [42]:
def create_options(target_cls, *sorted_meta_options_classes):
    dct = {}
    for opt_cls in sorted_meta_options_classes:
        dct.update(opt_cls.__dict__)
    return target_cls(**dct)

In [43]:
class MetaOptionsMixin(object):
    _meta = None
    class Meta:
        options_class = Options

    @classmethod
    def get_meta_classes(cls, reverse=False):
        classes = []
        if reverse:
            cls_list = reversed(cls.mro())
        else:
            cls_list = cls.mro()
        for class_ in cls_list:
            MetaCls = getattr(class_, 'Meta', None)
            if MetaCls:
                classes.append(MetaCls)
        return classes
            
    @classmethod
    def get_options_class(cls):
        for class_ in cls.mro():
            if not hasattr(class_, 'get_meta_classes'):
                continue
            for Meta in class_.get_meta_classes(reverse=False):
                OptionsClass = getattr(Meta, 'options_class', None)
                if OptionsClass:
                    return OptionsClass
        raise Exception('Missing options class on class %s' % cls)

    @classmethod
    def create_options(cls):
        options_class = cls.get_options_class()
        meta_options = cls.get_meta_classes(reverse=True)
        cls._meta = create_options(options_class, *meta_options)

In [44]:
class ModelCursor(object):
    def __init__(self, model, cursor):
        super(ModelCursor, self).__init__()
        self.model = model
        self.cursor = cursor
    
    def next(self):
        return self.model(data=self.cursor.next())
        
    def __iter__(self):
        for c in self.cursor:
            yield self.model(data=c)

In [45]:
def modelcursor(func):
    ''' Decorator wraps cursor in a model cursor '''
    def decorator(self, *args, **kwargs):
        cur = func(self, *args, **kwargs)
        return ModelCursor(self.mongo_model, cur)
    return decorator
        
        
class MongoModelQuery(object):
    client = None
    def __init__(self, 
                 mongo_model, 
                 collection_name=None, 
                 database_name=None, 
                 client=None):
        super(MongoModelQuery, self).__init__()
        self.mongo_model = mongo_model
        self.client = client
        self.database_name = database_name 
        self.collection_name = collection_name 

    @property
    def database(self):
        return self.client[self.database_name]

    @property
    def collection(self):
        return self.database[self.collection_name]
        
    @classmethod
    def bind_client(cls, client):
        cls.client = client

    @modelcursor
    def find(self, *args):
        kwargs, projection = {}, {}
        if len(args):
            kwargs = args[0] or {}
            if len(args) > 1:
                projection = args[1] or {}
        if kwargs is not None:
            exp = self.mongo_model.create_expression(**kwargs)
        else:
            exp = {}
        data = self.collection.find(exp, projection)
        return data
    
    def find_one(self):
        return self.mongo_model(self.collection.find_one())
    
    @modelcursor
    def query(self, **kwargs):
        exp = self.mongo_model.create_expression(**kwargs)
        data = self.collection.find(exp)
        return data

In [46]:
class mongo_content(object):
    
    def __init__(self, 
                 collection_path=None,
                 collection_attr='get_mongo_collection', 
                 filter_mongo_collection_attr='filter_mongo_collection', 
                 document_id_attr='document_id'):
        super(mongo_content, self).__init__()
        self.collection_path = collection_path
        self.collection_attr = collection_attr
        self.document_id_attr = document_id_attr
        self._cur = None
        self.filter_mongo_collection_attr = filter_mongo_collection_attr
    
    def __get__(self, instance, owner):
        if instance is None:
            return self
        else:
            docid = bson.ObjectId(getattr(instance, self.document_id_attr))
            coll = getattr(instance, self.collection_attr)
            if callable(coll):
                coll = coll()
            data = coll.find_one({'_id':docid})
            if self.collection_path is not None:
                mapper = DotMapper(self.collection_path)
                return mapper(data)
            return data

In [47]:
class MongoCollectionMixin(object):

    _content_attr_name = 'content_cache'
    
    def __init__(self, data):
        super(MongoCollectionMixin, self).__init__()
        self._data_cache = None
        self._content_cache = data
        self.document_id = data.get('_id')
    
    @property
    def content_cache(self):
        if self._data_cache is None:
            self._data_cache = self._content  # this is where it really lives
        return self._data_cache

    def clear_cache(self):
        self._content_cache = None
    
    @classmethod
    def get_mongo_collection(self):
        return self.q.collection     

    def update_fields(self, **kwargs):
        target = kwargs.get('target', None) or self.content_cache
        for k, v in kwargs.iteritems():
            self._meta.dirty_fields[k] = v
            dict_setter = DotMapper(k).dict_setter()
            dict_setter(val=v, dct=target)
            
    def save(self):
        coll = self.get_mongo_collection()
        dirty = self._meta.dirty_fields
        if dirty:
            coll.update({'_id':self.document_id}, {'$set': dirty})
            self.clear_cache()   

In [48]:
class MongoModel(MetaOptionsMixin, MongoCollectionMixin):
    
    class Meta:
        database_name = None
        collection_name = None
        dirty_fields = None
    
    @classmethod
    def create_expression(cls, **kwargs):
        return mq(cls, **kwargs)
    @classmethod
    def _get_mongo_property_names(cls):
        for f in dir(cls):
            val = getattr(cls, f, None)
            if isinstance(val, MongoProperty):
                yield f
                
    def as_dict(self):
        return {k:getattr(self, k) for k in self._get_mongo_property_names()}

In [49]:
def register_models(*MongoModelClasses, **kwargs):
    client = kwargs.pop('client', None)
    for MongoModelClass in MongoModelClasses:
        MongoModelClass.create_options()
        MongoModelClass._meta.field_names = list(MongoModelClass._get_mongo_property_names())
        MongoModelClass.q = MongoModelQuery(MongoModelClass,
                                           database_name=MongoModelClass._meta.database_name,
                                           collection_name=MongoModelClass._meta.collection_name,
                                           client=client)

In [50]:
class BigqueryJob(MongoModel, MongoCollectionMixin):
    class Meta:
        database_name = 'logs'
        collection_name = 'bigquery_jobs'
        
    _content = mongo_content()
    error = MongoProperty('status.errorResult.message')
    state = MongoProperty('status.state')
    output = MongoProperty('jobReference')
    
    kind = MongoProperty('kind')
    job_id = MongoProperty('jobReference.jobId')
    job_project_id = MongoProperty('jobReference.projectId')
    
    created = MongoProperty('statistics.creationTime')
    ended = MongoProperty('statistics.endTime')
    started = MongoProperty('statistics.startTime')

In [51]:
# Testing expressions 
# mq.and_(BigqueryJob, prop='test', oooo__lt=9)
# mq.or_(BigqueryJob, prop='test', oooo__lt=9)
# mq.not_(BigqueryJob, prop='test', oooo__lt=9)
# BigqueryJob.create_expression(**{'not_':dict(prop='test', oooo__lt=9)})
# BigqueryJob.create_expression(**{'not_':dict(prop='test', oooo__lt=9), 'tee__eest.Ting__lte':9})

In [52]:
import pymongo
cli = pymongo.MongoClient()
db = cli['logs']
collection = db['bigquery_jobs']

In [53]:
register_models(BigqueryJob, client=cli)

In [54]:
bqj = BigqueryJob.q.find_one()
bqj.as_dict()

{'created': u'1444309984342',
 'ended': u'1444310017418',
 'error': None,
 'job_id': u'job_2308320498MASDAD',
 'job_project_id': u'pretend-project',
 'kind': u'bigquery#job',
 'output': {u'jobId': u'job_2308320498MASDAD',
  u'projectId': u'pretend-project'},
 'started': u'1444310001693',
 'state': u'DONE'}

In [55]:
with_errors = BigqueryJob.q.query(error__exists=True)

In [56]:
with_errors.cursor.count()

4

In [57]:
# you can also query the original structure: 
BigqueryJob.q.query(status__errorResult__exists=True).cursor.count()

4

In [58]:
with_errors.next().as_dict()

{'created': u'1444240356405',
 'ended': u'1444240385987',
 'error': u'Resources exceeded during query execution.',
 'job_id': u'job_2308320498MASDAD',
 'job_project_id': u'pretend-project',
 'kind': u'bigquery#job',
 'output': {u'jobId': u'job_2308320498MASDAD',
  u'projectId': u'pretend-project'},
 'started': u'1444240356860',
 'state': u'DONE'}

In [59]:
random_job = BigqueryJob.q.find_one()

In [60]:
random_job.state

u'DONE'

In [61]:
print 'lets make the job silly, but dont save it'
random_job.state = 'SILLY'
print random_job.state, 'from dirty cache random_job.state'
print random_job._content['status']['state'], "from original random_job._content['status']['state']"

lets make the job silly, but dont save it
SILLY from dirty cache random_job.state
DONE from original random_job._content['status']['state']


In [62]:
print 'ok save it'
random_job.save()
print random_job.state, 'from dirty cache random_job.state'
print random_job._content['status']['state'], "from original random_job._content['status']['state']"

ok save it
SILLY from dirty cache random_job.state
SILLY from original random_job._content['status']['state']


In [63]:
print 'and then revert because changing logs is not silly.'
random_job.state = "DONE"
random_job.save()
print random_job._content['status']['state'], "from original random_job._content['status']['state']"

and then revert because changing logs is not silly.
DONE from original random_job._content['status']['state']
