# AWS SDK
Data Processing 


## DynamoDB Local and Remote
This Notebook connects to local DynamoDb.  
```
# you should build the docker container at least once.
docker-compose build
```
local requires you to launch the Dynamodb locally
```
docker-compose up
```


# The gateway is handled in node

## Issues

* need to make typifyItem go deep into JSON
* need to create version that creates, reads, writes, updates REMOTE aws tables
* need to convert the keys from a # seperator to . separator d#1 goes to d.1
* 2019-06-01 need to setup (developement, test, prod) versions of tables
* 2019-05-31 add access_key to .env
* 2019-05-31 add secret_key to .env


In [12]:
from dotenv import load_dotenv
load_dotenv(verbose=True)
import os
from os.path import isfile, join
from pprint import pprint
import json
# from interface import implements, Interface 
import interface 
from util import Util

#from buffered_writer import BufferedWriter

#from parameters import Parameters
from data_process_interface import DataProcessInterface
from stubdb import StubDB

import boto3
from boto3.dynamodb.conditions import Key
# data processing
from document_process import DocumentProcess
from movie_process import MovieProcess

util = Util()

In [13]:
# process_config is written to process_config.json by 01-process-data.ipynb
# load process_config.json

process_config = util.readProcessConfig()

pprint(process_config)

{'key': 'documents', 'region': 'us-east-2', 'suffix': 'dev', 'target': 'remote'}


In [14]:

API_URL = os.getenv("API_URL")
API_USER = os.getenv("API_USER")
AWS_ACCESS_KEY_ID=os.getenv("AWS_ACCESS_KEY_ID")
AWS_SECRET_ACCESS_KEY_ID=os.getenv("AWS_SECRET_ACCESS_KEY_ID")
REGION_NAME=os.getenv("REGION_NAME")

print('API_URL: ',API_URL)
print('API_USER: ',API_USER)
print('AWS_ACCESS_KEY_ID: ',AWS_ACCESS_KEY_ID)
print('AWS_SECRET_ACCESS_KEY_ID: ',AWS_SECRET_ACCESS_KEY_ID)
print('REGION_NAME: ',REGION_NAME)

API_URL:  None
API_USER:  None
AWS_ACCESS_KEY_ID:  None
AWS_SECRET_ACCESS_KEY_ID:  None
REGION_NAME:  None


## Connect and Load Table Definitions

In [15]:
#########
# CREATE STUBDB
##
db_name='dynamodb'
endpoint_url='http://localhost:8000'
histories = 'table.histories.json' # split create table and gsi

stubDB = None

stubDB = StubDB(db_name=db_name,\
                endpoint_url=endpoint_url,\
                aws_access_key_id=AWS_ACCESS_KEY_ID,\
                aws_secret_access_key=AWS_SECRET_ACCESS_KEY_ID,\
                region_name=process_config['region'])\
.loadTableHistories(histories, env_suffix=process_config['suffix'])\
.connect(process_config['target'])
    
print('--------')

# rename the key to reflect the environment dev, test, or prod
for key in stubDB.table_histories:
    keyname = key 
    for item in stubDB.table_histories[key]:
        tb_name = '{}_{}'.format(item['TableName'], process_config['suffix'])
        item['TableName']=tb_name      

#stubDB.getTableDescription(process_config['key'])
#r = stubDB.client.DescribeTable(process_config['key'])
#r = stubDB.client.describe_table(TableName=process_config['key'])

db_name:  dynamodb
region_name:  us-east-2
endpoint_url:  http://localhost:8000
aws_access_key_id:  None
aws_secret_access_key:  None
todo: add .env aws_access_key_id
todo: add .env aws_secret_access_key
------------
tb:  documents
tb:  music
tb:  movies
---------
dbName:  dynamodb
aws_access_key_id:  None
aws_secret_access_key :  None
region_name:  us-east-2
remote db connecting...
self.db:  dynamodb.ServiceResource()
remote client connecting...
list tables:  {'TableNames': ['documents_dev'], 'ResponseMetadata': {'RequestId': 'KV9ORSAC0JCDHGDRQ8V644JDJRVV4KQNSO5AEMVJF66Q9ASUAAJG', 'HTTPStatusCode': 200, 'HTTPHeaders': {'server': 'Server', 'date': 'Sun, 30 Jun 2019 11:33:11 GMT', 'content-type': 'application/x-amz-json-1.0', 'content-length': '32', 'connection': 'keep-alive', 'x-amzn-requestid': 'KV9ORSAC0JCDHGDRQ8V644JDJRVV4KQNSO5AEMVJF66Q9ASUAAJG', 'x-amz-crc32': '310421954'}, 'RetryAttempts': 0}}
connecting...out
--------


In [16]:
pprint(stubDB.getTableList())

['documents_dev']


In [17]:
import ipywidgets as widgets
from IPython.display import display

class SystemButtons:
    def __init__(self, stubDB):
        self.stubDB = stubDB
        self.show_button = widgets.Button(description="Show Tables ")
        
    def show_on_button_clicked(self, b):
        print(self.stubDB.getTableList())
        
    def display(self):
        self.show_button.on_click(self.show_on_button_clicked)
        display(self.show_button)
        return self
        
class TableButtons:
    def __init__(self, stubDB, table_name_key, env_suffix):
        self.stubDB = stubDB
        self.env_suffix = env_suffix
        self.table_name = stubDB.getTableName(table_name_key)
        self.table_name_key = table_name_key
        # self.table_def = table_def
        self.table_history = self.stubDB.table_histories[self.table_name_key]
        
        self.create_button =widgets.Button(description="Create {}".format(self.table_name))
        #self.update_table_button = widgets.Button(description="Update {}".format(self.table_name)) 
        self.del_button = widgets.Button(description="Delete {}".format(self.table_name))        
        self.count_button = widgets.Button(description="Count Local {} Data".format(self.table_name))
        self.batch_load_button =widgets.Button(description="Batch Load {}".format(self.table_name))
        
    def create_on_button_clicked(self, b):

        for tdef in self.table_history:
            if 'KeySchema' in tdef:
                try:
                    print('A:')
                    pprint(tdef)
                    self.stubDB.client.create_table(**tdef)
                except NameError as nameerror:
                    print(nameerror)
                    print('table {} already created.'.format(self.table_name))
            else:     
                try:
                    print('B:')
                    pprint(tdef)
                    self.stubDB.client.update_table(**tdef)
                except NameError as nameerror:
                    print(nameerror)
                    print('table {} already updated.'.format(self.table_name))
                    
    def count_on_button_clicked(self,b):
        #print('count')
        try:
            #table = self.stubDB.db.Table(self.table_name)
            #table_name = self.table_history[0]['TableName']
            
            table = self.stubDB.db.Table(self.table_name)
            print('table: ', table.item_count)
        except:
            print('Table {} doesnt exist'.format(self.table_name))
        
    def del_on_button_clicked(self, b):
        #self.stubDB.deleteTable(self.table_name)  
        try:
            # table_name = self.table_history[0]['TableName']
            table = self.stubDB.db.Table(self.table_name)
            table.delete()
            #self.stubDB.deleteTable(table_name)
            #table = self.db.Table(tablename)  
            #table.delete()
            #waiter = this.client.get_waiter('table_not_exists')
            #waiter.wait(TableName=tablename)
            #table = self.stubDB.db.Table(tablename)
            #table.delete()
        except NameError as nameerror:
            print(nameerror)
            print('failed to delete {}'.format(self.table_name))
             
    def batch_load_on_button_clicked(self, b):
        self.stubDB.loadBatchTableData(self.table_name_key, self.env_suffix, 'local')
    
    def display(self):
        #self.update_table_button.on_click(self.update_table_on_button_clicked)
        self.create_button.on_click(self.create_on_button_clicked)
        self.del_button.on_click(self.del_on_button_clicked)
        self.count_button.on_click(self.count_on_button_clicked)
        self.batch_load_button.on_click(self.batch_load_on_button_clicked)
        #display(self.update_table_button)
        display(self.create_button)
        display(self.del_button)
        display(self.count_button)
        display(self.batch_load_button)
        return self

'''
class QueryButtons:
    
    def __init__(self, stubDB, table_name_key, env_suffix):
      
        self.stubDB = stubDB
        self.env_suffix = env_suffix
        self.table_name = stubDB.getTableName(table_name_key)
        self.table_name_key = table_name_key
          
        self.query_10_button =widgets.Button(description="Query {} top 10".format(self.table_name))
        self.query_GSI_button =widgets.Button(description="GSI {} ".format(self.table_name))
        
    def query_GSI_on_button_clicked(self,b):   
        try:
            response = {'Items': 'Undefined'}
            #if self.table_name == 'documents':
            db = boto3.resource('dynamodb',
                 endpoint_url='http://localhost:8000',
                 aws_access_key_id=AWS_ACCESS_KEY_ID,
                 aws_secret_access_key=AWS_SECRET_ACCESS_KEY_ID
                )

            document_table = db.Table(self.table_name)

            if self.table_name_key == 'documents':
                response = document_table.query(
                    IndexName='gsi_1',
                    KeyConditionExpression=Key('sk').eq('michigan.1')
                )
            else:    
                print('Sorry no GSI for {}'.format(self.table_name))    
                
            if self.table_name_key == 'movies':
                print('Sorry no GSI for {}'.format(self.table_name)) 
                
            pprint(response['Items'])
        except NameError as nameerror:
            print(nameerror)
            print('Bad query ')
            
    def query_10_on_button_clicked(self,b):
    
        try:
            response = {}
            #if self.table_name == 'documents':
            db = boto3.resource('dynamodb',
                 endpoint_url='http://localhost:8000',
                 aws_access_key_id=AWS_ACCESS_KEY_ID,
                 aws_secret_access_key=AWS_SECRET_ACCESS_KEY_ID
                )
            print('table_name: ', self.table_name)
            print('table_name_key: ', self.table_name_key)
            
            document_table = db.Table(self.table_name)
            
            if self.table_name_key == 'documents':
                response = document_table.query(
                    KeyConditionExpression=Key('pk').eq('d.2'),
                )
                
            if self.table_name_key == 'movies':
                
                response = document_table.query(
                    KeyConditionExpression=Key('year').eq('1984')
                )    
            
            pprint(response['Items'])
            
        except NameError as nameerror:
            print(nameerror)
            print('Bad query ')
        
    def display(self):
       
        self.query_10_button.on_click(self.query_10_on_button_clicked)
        self.query_GSI_button.on_click(self.query_GSI_on_button_clicked)
        display(self.query_10_button)
        display(self.query_GSI_button)
        return self
'''

'\nclass QueryButtons:\n    \n    def __init__(self, stubDB, table_name_key, env_suffix):\n      \n        self.stubDB = stubDB\n        self.env_suffix = env_suffix\n        self.table_name = stubDB.getTableName(table_name_key)\n        self.table_name_key = table_name_key\n          \n        self.query_10_button =widgets.Button(description="Query {} top 10".format(self.table_name))\n        self.query_GSI_button =widgets.Button(description="GSI {} ".format(self.table_name))\n        \n    def query_GSI_on_button_clicked(self,b):   \n        try:\n            response = {\'Items\': \'Undefined\'}\n            #if self.table_name == \'documents\':\n            db = boto3.resource(\'dynamodb\',\n                 endpoint_url=\'http://localhost:8000\',\n                 aws_access_key_id=AWS_ACCESS_KEY_ID,\n                 aws_secret_access_key=AWS_SECRET_ACCESS_KEY_ID\n                )\n\n            document_table = db.Table(self.table_name)\n\n            if self.table_name_key == 

In [18]:
stubDB.dryrun = True # dont write to aws
stubDB.dryrun = False
systemButton =  SystemButtons(stubDB).display()   
tableButtons = TableButtons(stubDB, 
                            process_config['key'], 
                            process_config['suffix']
                           ).display()

Button(description='Show Tables ', style=ButtonStyle())

Button(description='Create documents_dev', style=ButtonStyle())

Button(description='Delete documents_dev', style=ButtonStyle())

Button(description='Count Local documents_dev Data', style=ButtonStyle())

Button(description='Batch Load documents_dev', style=ButtonStyle())

source_file:  ../../../data/documents/output/0.documents.json
wait...
Dryrun is ON:  ../../../data/documents/output/0.documents.json
Dryrun is ON:  ../../../data/documents/output/1.documents.json
Dryrun is ON:  ../../../data/documents/output/2.documents.json
Dryrun is ON:  ../../../data/documents/output/3.documents.json
Dryrun is ON:  ../../../data/documents/output/4.documents.json
Dryrun is ON:  ../../../data/documents/output/5.documents.json
Dryrun is ON:  ../../../data/documents/output/6.documents.json
Dryrun is ON:  ../../../data/documents/output/7.documents.json
Dryrun is ON:  ../../../data/documents/output/8.documents.json
Dryrun is ON:  ../../../data/documents/output/9.documents.json
Dryrun is ON:  ../../../data/documents/output/10.documents.json
Dryrun is ON:  ../../../data/documents/output/11.documents.json
Dryrun is ON:  ../../../data/documents/output/12.documents.json
Dryrun is ON:  ../../../data/documents/output/13.documents.json
Dryrun is ON:  ../../../data/documents/outpu

Dryrun is ON:  ../../../data/documents/output/170.documents.json
Dryrun is ON:  ../../../data/documents/output/171.documents.json
Dryrun is ON:  ../../../data/documents/output/172.documents.json
Dryrun is ON:  ../../../data/documents/output/173.documents.json
Dryrun is ON:  ../../../data/documents/output/174.documents.json
Dryrun is ON:  ../../../data/documents/output/175.documents.json
Dryrun is ON:  ../../../data/documents/output/176.documents.json
Dryrun is ON:  ../../../data/documents/output/177.documents.json
Dryrun is ON:  ../../../data/documents/output/178.documents.json
Dryrun is ON:  ../../../data/documents/output/179.documents.json
Dryrun is ON:  ../../../data/documents/output/180.documents.json
Dryrun is ON:  ../../../data/documents/output/181.documents.json
Dryrun is ON:  ../../../data/documents/output/182.documents.json
Dryrun is ON:  ../../../data/documents/output/183.documents.json
Dryrun is ON:  ../../../data/documents/output/184.documents.json
Dryrun is ON:  ../../../d

Dryrun is ON:  ../../../data/documents/output/345.documents.json
Dryrun is ON:  ../../../data/documents/output/346.documents.json
Dryrun is ON:  ../../../data/documents/output/347.documents.json
Dryrun is ON:  ../../../data/documents/output/348.documents.json
Dryrun is ON:  ../../../data/documents/output/349.documents.json
Dryrun is ON:  ../../../data/documents/output/350.documents.json
Dryrun is ON:  ../../../data/documents/output/351.documents.json
Dryrun is ON:  ../../../data/documents/output/352.documents.json
Dryrun is ON:  ../../../data/documents/output/353.documents.json
Dryrun is ON:  ../../../data/documents/output/354.documents.json
Dryrun is ON:  ../../../data/documents/output/355.documents.json
Dryrun is ON:  ../../../data/documents/output/356.documents.json
Dryrun is ON:  ../../../data/documents/output/357.documents.json
Dryrun is ON:  ../../../data/documents/output/358.documents.json
Dryrun is ON:  ../../../data/documents/output/359.documents.json
Dryrun is ON:  ../../../d

Dryrun is ON:  ../../../data/documents/output/486.documents.json
Dryrun is ON:  ../../../data/documents/output/487.documents.json
Dryrun is ON:  ../../../data/documents/output/488.documents.json
Dryrun is ON:  ../../../data/documents/output/489.documents.json
Dryrun is ON:  ../../../data/documents/output/490.documents.json
Dryrun is ON:  ../../../data/documents/output/491.documents.json
Dryrun is ON:  ../../../data/documents/output/492.documents.json
Dryrun is ON:  ../../../data/documents/output/493.documents.json
Dryrun is ON:  ../../../data/documents/output/494.documents.json
Dryrun is ON:  ../../../data/documents/output/495.documents.json
Dryrun is ON:  ../../../data/documents/output/496.documents.json
Dryrun is ON:  ../../../data/documents/output/497.documents.json
Dryrun is ON:  ../../../data/documents/output/498.documents.json
Dryrun is ON:  ../../../data/documents/output/499.documents.json
Dryrun is ON:  ../../../data/documents/output/500.documents.json
Dryrun is ON:  ../../../d

Dryrun is ON:  ../../../data/documents/output/641.documents.json
Dryrun is ON:  ../../../data/documents/output/642.documents.json
Dryrun is ON:  ../../../data/documents/output/643.documents.json
Dryrun is ON:  ../../../data/documents/output/644.documents.json
Dryrun is ON:  ../../../data/documents/output/645.documents.json
Dryrun is ON:  ../../../data/documents/output/646.documents.json
Dryrun is ON:  ../../../data/documents/output/647.documents.json
Dryrun is ON:  ../../../data/documents/output/648.documents.json
Dryrun is ON:  ../../../data/documents/output/649.documents.json
Dryrun is ON:  ../../../data/documents/output/650.documents.json
Dryrun is ON:  ../../../data/documents/output/651.documents.json
Dryrun is ON:  ../../../data/documents/output/652.documents.json
Dryrun is ON:  ../../../data/documents/output/653.documents.json
Dryrun is ON:  ../../../data/documents/output/654.documents.json
Dryrun is ON:  ../../../data/documents/output/655.documents.json
Dryrun is ON:  ../../../d

Dryrun is ON:  ../../../data/documents/output/860.documents.json
Dryrun is ON:  ../../../data/documents/output/861.documents.json
Dryrun is ON:  ../../../data/documents/output/862.documents.json
Dryrun is ON:  ../../../data/documents/output/863.documents.json
Dryrun is ON:  ../../../data/documents/output/864.documents.json
Dryrun is ON:  ../../../data/documents/output/865.documents.json
Dryrun is ON:  ../../../data/documents/output/866.documents.json
Dryrun is ON:  ../../../data/documents/output/867.documents.json
Dryrun is ON:  ../../../data/documents/output/868.documents.json
Dryrun is ON:  ../../../data/documents/output/869.documents.json
Dryrun is ON:  ../../../data/documents/output/870.documents.json
Dryrun is ON:  ../../../data/documents/output/871.documents.json
Dryrun is ON:  ../../../data/documents/output/872.documents.json
Dryrun is ON:  ../../../data/documents/output/873.documents.json
Dryrun is ON:  ../../../data/documents/output/874.documents.json
Dryrun is ON:  ../../../d

### print('table_Name_key: ' , table_name_key)
#print('table_name: ', stubDB.getTableName(table_name_key))
tableButtons = TableButtons(stubDB, process_config['key'], process_config['suffix']).display()
#print('button table_name: ', stubDB.getTableName(table_name_key))

In [19]:
'''
if process_config['target'] == 'local':
    queryButtons = QueryButtons(stubDB, process_config['key'], process_config['suffix']).display()
else:
    print('Queries are not configured to run remotely.')
'''

Queries are not configured to run remotely.


# GATEWAY LAMBDA


In [20]:
#import requests 
#response = requests.get('https://vaf8njip53.execute-api.us-east-2.amazonaws.com/dev/document-item/d.1/the.1')
#response = requests.get('https://vaf8njip53.execute-api.us-east-2.amazonaws.com/dev/document-item/d.1/the.1')


#print(response)
#pprint(response.json())


# Seperate API Installs for Prod and Dev 
Achieve independence by seperating everything.
* prod_app > prod_url > prod_gateway > prod_lambda > prod_database
* test_app > test_url > test_gateway > test_lambda > test_database
* dev_app > dev_url > dev_gateway > dev_lambda > dev_database

# Overview
functions
distribution
collection
assimilation


**UserFunctions** 

    (Function keyword-search (param keyword, keyword, ...))

**UserImplementation** 
    
    (Framework nuxtjs 
                        (Framework vuejs 
                            (Runtime nodejs
                                (Language javascript)
                            )
                        )
                   )
                   
**UserFlow** 
    
    (Interface textbox (Button "search" (Interface keyword-search)))

**Admin-Functions**
    
    (Process plain-text-data-assimilation)
    
**Admin-Implementation**
    
    (Language python)

**Webservices**
    
    (Service aws-gateway (Function lambda (Interface dataservice )))
    (aws(Gateway ""gateway(Function lambda (data-service))))
    
**Web-implementation** 
    
    (Framework nodejs (Language javascript ))

**Dataservices** 
    
    (table "table-name" (create object)) 
    (table "table-name" (read object))
    (table "table-name" (update object)) 
    (table "table-name" (delete object))
    
**Data-implementation** 
    
    (database dynamodb (index GSI))

**Data-Interface** 
    
    (object JSON)

Software-Architecture (
    
    (website (function (webservice (callback))))
    )  

Data-Architecture ()

## Design Goals 
    * static website
    * single data table design








## GATEWAY Dev
Keyword search

In [21]:
# https://r922ib1m5m.execute-api.us-east-2.amazonaws.com/dev/document/d.1

#import requests 
#from pprint import pprint

#keys = 'michigan opioid maps'
#aws_dom='85z6m8l3q8'
#stage='dev'
#gateway_url='https://{}.execute-api.us-east-2.amazonaws.com/dev'.format(aws_dom)
#lambda_function='documents/keywords?keywords={}'.format(keys)
## pk='d.1'

#url = 'https://{}.execute-api.us-east-2.amazonaws.com/{}/{}'\
#    .format(aws_dom,
#           stage,
#           lambda_function)

#print(url)

#response = requests.get(url)
#pprint(response.json())


## GATEWAY Prod

In [22]:
#import requests 
#gateway_url = 'https://xg4pgsqx2m.execute-api.us-east-2.amazonaws.com/prod'
## gateway_url='https://tdo6fo54aa.execute-api.us-east-2.amazonaws.com/prod'
#lambda_function='document'
#path_param_d_id='d.1'


#response = requests.get('{}/{}/{}'.format(gateway_url, 
#                                         lambda_function, 
#                                         path_param_d_id ))
#pprint(response.json())
