In [1]:
from os import walk, path
from os.path import join, getsize
from sys import getsizeof
from decimal import *
from math import log10, floor, pow
import time, random, pprint, uuid 
import traceback
import json
import operator

import pandas as pd
import frontmatter
from addict import Dict
import boto3
from boto3.dynamodb.conditions import Key, Attr
import mistune

pretty = pprint.PrettyPrinter(indent=1)
getcontext().prec = 3 # for decimal conversion

In [2]:
%matplotlib inline

In [3]:
# use profile_name='personal_default'
session = boto3.session.Session(profile_name="personal_default")

# see https://boto3.readthedocs.io/en/latest/reference/services/dynamodb.html?highlight=dynamodb#service-resource
s3 = session.resource('s3')
dydbr = session.resource('dynamodb', endpoint_url='http://localhost:8000', region_name='us-west-2')

In [4]:
list(dydbr.tables.all())

[dynamodb.Table(name='MonthIdx'),
 dynamodb.Table(name='Posts'),
 dynamodb.Table(name='YearIdx')]

In [6]:
Posts = dydbr.Table('Posts')
MonthIdx = dydbr.Table('MonthIdx')
YearIdx = dydbr.Table('YearIdx')

pretty.pprint(MonthIdx.attribute_definitions)
print("items: ",MonthIdx.item_count)
print("size:  ", MonthIdx.table_size_bytes)

pretty.pprint(YearIdx.attribute_definitions)
print("items: ", YearIdx.item_count)
print("size:  ", YearIdx.table_size_bytes)

pretty.pprint(Posts.attribute_definitions)
print("items: ",Posts.item_count)
print("size:  ", Posts.table_size_bytes)

[{'AttributeName': 'monthIdx', 'AttributeType': 'S'}]
items:  24
size:   5710
[{'AttributeName': 'yearIdx', 'AttributeType': 'S'}]
items:  2
size:   4115
[{'AttributeName': '_id', 'AttributeType': 'S'},
 {'AttributeName': '_loc', 'AttributeType': 'S'}]
items:  123
size:   351602


In [8]:
# YearIdx.query( KeyConditionExpression = Key('yearIdx').eq('2017'))['Items']

In [9]:
def GetPosts(dydbr, by, value):
    # defaults
    if not by:
        by="Year"
    
    if "year" in by.lower():
        YearIdx = dydbr.Table('YearIdx')
        for item in YearIdx.query( KeyConditionExpression = Key('yearIdx').eq(str(value)))['Items']:
            Item = Dict(item)
            
            # put each id in the key array into a dict with "_id" as its key
            return Posts.meta.client.batch_get_item(RequestItems={
                    'Posts': 
                        {'Keys': 
                          list(map(lambda x : {"_id":str(x)}, list(Item.posts.keys())))}
                        })
    if "month" in by.lower():
        MonthIdx = dydbr.Table('MonthIdx')
        for item in MonthIdx.query( KeyConditionExpression = Key('monthIdx').eq(str(value)))['Items']:
            Item = Dict(item)
            
            # put each id in the key array into a dict with "_id" as its key
            return Posts.meta.client.batch_get_item(RequestItems={
                    'Posts': 
                        {'Keys': 
                          list(map(lambda x : {"_id":str(x)}, list(Item.posts.keys())))}
                        })

In [10]:
%time
posts = GetPosts(dydbr, by="Month", value="2017-04")['Responses']['Posts']
print(len(posts))
print("done")

CPU times: user 3 µs, sys: 1e+03 ns, total: 4 µs
Wall time: 7.15 µs
1
done


In [11]:
GetPosts(dydbr, by="Year", value=2017)["Responses"]["Posts"][3]

{'_id': '6212d117-75fd-4277-b745-8aa36ed79013',
 '_loc': '/2017/Quantity-Before-Quality',
 '_meta': {'@ref': 's3://BUCKET//2017/Quantity-Before-Quality',
  '_id': '6212d117-75fd-4277-b745-8aa36ed79013',
  'bytes': Decimal('3941'),
  'epochCreate': Decimal('1513459233'),
  'epochUpdate': Decimal('1492361221'),
  'isDraft': True,
  'layout': 'single_column',
  'sizeB': Decimal('2983')},
 '_plugins': {'imageGrid': {'_config': {'1': True},
   '_meta': {'1': True},
   'input': {'1': True},
   'rendered': {'1': True}}},
 'loc': {'full': '/2017/Quantity-Before-Quality',
  'root': '/',
  'slug': 'Quantity-Before-Quality'},
 'related': {'Next': {'1': True}, 'Prev': {'1': True}, 'Related': {'1': True}},
 'type': 'post',
 'written': {'Authors': [{'_id': '046fef4e-587a-46c2-94af-189a4cebff53',
    'name': 'Eric D Moore'}],
  'Content': {'processed': {'engine': '1:True',
    'html': '1:True',
    'md': '1:True'},
   'raw': 'Macaroons and tires have more in common than you might think. They are both

In [12]:
def foo(*positional, raw=False, **keywords, ):
    print("Positional:", positional)
    print("Keywords:", keywords)

In [13]:
foo([{"_id":"ABC"}])

Positional: ([{'_id': 'ABC'}],)
Keywords: {}


In [86]:
def Posts(*positional, raw=False, **keywords):
    """ ::positional
        @param (_id) = looking for Post IDs 
        
        ::keywords
        @param by: "year | month | date | author | title | slug | content | <list[str]> | <None>"
            todo: " meta:tags | meta:categories "
        @param value: 
        @param query: { #param("by") : #param("value") }
        @param raw (False): Return the meta responses from Dynamo
        
        Conlicts & Hierarchy:
            who wins if there are conflicting side door params.
        
        Overview:
            1. Marshal input options into standardized input
            2. Build out query and submit 
            3. Get Response, perhaps filter out meta and return
    
    """
    # todo: validate inputs
    valid = set(['by', 'value','query','_id','_loc',
                 'title','author','year','month','date',
                 'author', 'slug'])
    if len(set(keywords.keys()).difference(valid)):
        # print('YIKES! Should we handle this extra arg word ???')
        # print(set(keywords.keys()).difference(valid))
        raise NotImplementedError("This arg word has not been implemented. (And may never be)")
    
    params = {}
    if 'query' in keywords:
        params = keywords['query']
    elif positional:
        params = {"_id": list(*positional)}
    else:
        if "by" in keywords and 'value' in keywords:
            if type(keywords['by']) is list:
                assert(type(keywords['value']) is list)
                params = dict(zip(keywords['by'], keywords['value']))
            else:
                params = {keywords['by'] : keywords['value']}
        else:
            params = keywords
    # params is normalized
    return params, raw

In [88]:
# Tests
assert(Posts(month="2017-01") == Posts(by="month", value="2017-01"))
assert(Posts(year="2017") == Posts(by="year", value="2017"))
assert(Posts(author="Eric") == Posts(by="author", value="Eric"))
assert(Posts(title="Amazing") == Posts(by="title", value="Amazing"))
assert(Posts(author="Eric", year="2017") == Posts(query= {"year":"2017", "author":"Eric"}) == Posts(by=["year","author"], value=["2017","Eric"]))
assert(Posts(["ABCD","EFGH", "HIJK"]) == Posts(query= {"_id":["ABCD","EFGH", "HIJK"]}) == Posts(by="_id",value=["ABCD","EFGH", "HIJK"]))
try:
    foundException = False
    Posts(invalid="Should Error")
except NotImplementedError as e:
    assert(str(e) == "This arg word has not been implemented. (And may never be)")
    foundException = True
finally:
    assert(foundException == True)

    Dict = mg.Posts(by="month",  			value="2017-01", 		raw=false) # indexed :)
    Dict = mg.Posts(month="2017-01", 		                        raw=false) # indexed :)
    Dict = mg.Posts(by="year",   			value="2017", 			raw=false) # indexed :)
    Dict = mg.Posts(by="author", 			value="Eric",  			raw=false) # No index :(
    Dict = mg.Posts(by="title", 			value="How I fought",	raw=false) # No index :(
    Dict = mg.Posts(by=["year","author"], 	value=["2017","Eric"], 	raw=false) # index? then filter :)
    Dict = mg.Posts(by=[ids], 				value=[ids], 			raw=false) # indexed :)
    Dict = mg.Posts([{"\_id":ABCDEF-EF-GH-HIJKLM}])                            # indexed :)
    Dict = mg.Posts(by="content",  			value="Some written txt",raw=false)# No index :(

    Dict = mg.Posts(content="Some written txt",raw=false)
    Dict = mg.Posts(_loc="2017/some-location", raw=false)
    Dict = mg.Posts(slug="2017/some-location", raw=false)
    Dict = mg.Posts(title="A Great Title",     raw=false)

    dict = mg.Pages([{id:"ABCDEF"}], raw=False)
    dict = mg.Pages(ids=[ids], 		 raw=False)
    dict = mg.Pages(slug="", 		 raw=False)
    dict = mg.Pages(title="", 		 raw=False)

    dict = mg.Authors([{id:"ABCDEF"}], 	raw=False)
    dict = mg.Authors(ids=[ids], 		raw=False)
    dict = mg.Authors(name="Eric", 		raw=False)
    dict = mg.Authors(of=[{Post()}], 	raw=False)

    Dict = mg.Scaffolds("post", 			raw=False)
    Dict = mg.Plugins("post", 				raw=False)
    Dict = mg.InstallPlugin(Store, id="", 	raw=False)

    Dict = mg.UploadDirectory(dir="abs/path/to/dir", raw=False)
    Dict = mg.UpdateMetadata(ids=[ids], meta="tags", 	   find="this", replace="that", add=False, raw=False)
    Dict = mg.UpdateMetadata(ids=[ids], meta="categories", find="this", replace="that", add=False, raw=False)

    Dict = mg.PostSave(type="", absFilepath="/", 	raw=False) #Changes State
    ~~Dict = mg.Change(type="", absFilepath="/", 	raw=False)~~