# AWS SDK
Data Processing 


## DynamoDB Local and Remote
This Notebook connects to local DynamoDb.  
```
# you should build the docker container at least once.
docker-compose build
```
local requires you to launch the Dynamodb locally
```
docker-compose up
```


# The gateway is handled in node

## Issues

* need to make typifyItem go deep into JSON
* need to create version that creates, reads, writes, updates REMOTE aws tables
* need to convert the keys from a # seperator to . separator d#1 goes to d.1
* 2019-06-01 need to setup (developement, test, prod) versions of tables
* 2019-05-31 add access_key to .env
* 2019-05-31 add secret_key to .env


In [1]:
from dotenv import load_dotenv
load_dotenv(verbose=True)
import os
from os.path import isfile, join
from pprint import pprint
import json
# from interface import implements, Interface 
import interface 
from util import Util

#from buffered_writer import BufferedWriter

#from parameters import Parameters
from data_process_interface import DataProcessInterface
from stubdb import StubDB

import boto3
from boto3.dynamodb.conditions import Key
# data processing
from document_process import DocumentProcess
from movie_process import MovieProcess

util = Util()

In [2]:
# process_config is written to process_config.json by 01-process-data.ipynb
# load process_config.json

# process_config = util.readProcessConfig()
process_config = {'key': 'movies', 'region': 'us-east-1', 'suffix': 'dev', 'target': 'remote'}
process_config = {'key': 'documents', 'region': 'us-east-1', 'suffix': 'dev', 'target': 'remote'}

pprint(process_config)

{'key': 'documents', 'region': 'us-east-1', 'suffix': 'dev', 'target': 'remote'}


In [3]:

API_URL = os.getenv("API_URL")
API_USER = os.getenv("API_USER")
AWS_ACCESS_KEY_ID=os.getenv("AWS_ACCESS_KEY_ID")
AWS_SECRET_ACCESS_KEY_ID=os.getenv("AWS_SECRET_ACCESS_KEY_ID")
REGION_NAME='us-east-1' #os.getenv("REGION_NAME")

assert API_URL != None  
assert API_USER != None
assert AWS_ACCESS_KEY_ID != None 
assert AWS_SECRET_ACCESS_KEY_ID != None 
assert REGION_NAME != None 

## Connect and Load Table Definitions

In [4]:
#########
# CREATE STUBDB
##
db_name='dynamodb'
endpoint_url='http://localhost:8000'
histories = 'table.histories.json' # split create table and gsi

stubDB = None

stubDB = StubDB(db_name=db_name,\
                endpoint_url=endpoint_url,\
                aws_access_key_id=AWS_ACCESS_KEY_ID,\
                aws_secret_access_key=AWS_SECRET_ACCESS_KEY_ID,\
                region_name=process_config['region'])\
.loadTableHistories(histories, env_suffix=process_config['suffix'])\
.connect(process_config['target'])
    
print('--------')

# rename the key to reflect the environment dev, test, or prod
for key in stubDB.table_histories:
    keyname = key 
    for item in stubDB.table_histories[key]:
        tb_name = '{}_{}'.format(item['TableName'], process_config['suffix'])
        item['TableName']=tb_name      


------------
tb:  documents
tb:  music
tb:  movies
---------
remote db connecting...
remote client connecting...
connecting...out
--------


In [5]:
pprint(stubDB.getTableList())

['documents_dev', 'movies_dev']


In [6]:
import ipywidgets as widgets
from IPython.display import display

class SystemButtons:
    def __init__(self, stubDB):
        self.stubDB = stubDB
        self.show_button = widgets.Button(description="Show Tables ")
        
    def show_on_button_clicked(self, b):
        print(self.stubDB.getTableList())
        
    def display(self):
        self.show_button.on_click(self.show_on_button_clicked)
        display(self.show_button)
        return self
        
class TableButtons:
    def __init__(self, stubDB, table_name_key, env_suffix):
        self.stubDB = stubDB
        self.env_suffix = env_suffix
        self.table_name = stubDB.getTableName(table_name_key)
        self.table_name_key = table_name_key
        # self.table_def = table_def
        self.table_history = self.stubDB.table_histories[self.table_name_key]
        
        self.create_button =widgets.Button(description="Create {}".format(self.table_name))
        #self.update_table_button = widgets.Button(description="Update {}".format(self.table_name)) 
        self.del_button = widgets.Button(description="Delete {}".format(self.table_name))        
        self.count_button = widgets.Button(description="Count Local {} Data".format(self.table_name))
        self.batch_load_button =widgets.Button(description="Batch Load {}".format(self.table_name))
        
    def create_on_button_clicked(self, b):

        for tdef in self.table_history:
            if 'KeySchema' in tdef:
                try:
                    print('A:')
                    pprint(tdef)
                    self.stubDB.client.create_table(**tdef)
                except NameError as nameerror:
                    print(nameerror)
                    print('table {} already created.'.format(self.table_name))
            else:     
                try:
                    print('B:')
                    pprint(tdef)
                    self.stubDB.client.update_table(**tdef)
                except NameError as nameerror:
                    print(nameerror)
                    print('table {} already updated.'.format(self.table_name))
                    
    def count_on_button_clicked(self,b):
        #print('count')
        try:
            #table = self.stubDB.db.Table(self.table_name)
            #table_name = self.table_history[0]['TableName']
            
            table = self.stubDB.db.Table(self.table_name)
            print('table: ', table.item_count)
        except:
            print('Table {} doesnt exist'.format(self.table_name))
        
    def del_on_button_clicked(self, b):
        #self.stubDB.deleteTable(self.table_name)  
        try:
            # table_name = self.table_history[0]['TableName']
            table = self.stubDB.db.Table(self.table_name)
            table.delete()
            #self.stubDB.deleteTable(table_name)
            #table = self.db.Table(tablename)  
            #table.delete()
            #waiter = this.client.get_waiter('table_not_exists')
            #waiter.wait(TableName=tablename)
            #table = self.stubDB.db.Table(tablename)
            #table.delete()
        except NameError as nameerror:
            print(nameerror)
            print('failed to delete {}'.format(self.table_name))
             
    # def batch_load_on_button_clicked(self, b):
    #    self.stubDB.loadBatchTableData(self.table_name_key, self.env_suffix, 'local')
    
    def display(self):
        #self.update_table_button.on_click(self.update_table_on_button_clicked)
        self.create_button.on_click(self.create_on_button_clicked)
        self.del_button.on_click(self.del_on_button_clicked)
        self.count_button.on_click(self.count_on_button_clicked)
        #self.batch_load_button.on_click(self.batch_load_on_button_clicked)
        #display(self.update_table_button)
        display(self.create_button)
        display(self.del_button)
        display(self.count_button)
        # display(self.batch_load_button)
        return self


In [7]:
stubDB.dryrun = True # dont write to aws
stubDB.dryrun = False
systemButton =  SystemButtons(stubDB).display()   
tableButtons = TableButtons(stubDB, 
                            process_config['key'], 
                            process_config['suffix']
                           ).display()

Button(description='Show Tables ', style=ButtonStyle())

Button(description='Create documents_dev', style=ButtonStyle())

Button(description='Delete documents_dev', style=ButtonStyle())

Button(description='Count Local documents_dev Data', style=ButtonStyle())

['documents_dev', 'movies_dev']
['movies_dev']
A:
{'AttributeDefinitions': [{'AttributeName': 'pk', 'AttributeType': 'S'},
                          {'AttributeName': 'sk', 'AttributeType': 'S'},
                          {'AttributeName': 'data', 'AttributeType': 'S'}],
 'GlobalSecondaryIndexes': [{'IndexName': 'gsi_1',
                             'KeySchema': [{'AttributeName': 'sk',
                                            'KeyType': 'HASH'},
                                           {'AttributeName': 'data',
                                            'KeyType': 'RANGE'}],
                             'Projection': {'ProjectionType': 'ALL'},
                             'ProvisionedThroughput': {'ReadCapacityUnits': 5,
                                                       'WriteCapacityUnits': 5}}],
 'KeySchema': [{'AttributeName': 'pk', 'KeyType': 'HASH'},
               {'AttributeName': 'sk', 'KeyType': 'RANGE'}],
 'ProvisionedThroughput': {'ReadCapacityUnits': 5, 'WriteCa