In [None]:
'''

create a dynamodb client
list tables in dynamodb
create table in dynamodb
list item in table
insert two item to dynamodb
list item in table
get item from table
remove item from table
query
scan

What kind difference of query and scan?

參考連結
https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/dynamodb.html

DynamoDB 對應Python 資料型態
https://boto3.amazonaws.com/v1/documentation/api/latest/reference/customizations/dynamodb.html#ref-valid-dynamodb-types

'''

In [None]:
!pip install boto3 awscli
!aws --version


In [None]:
# create a dynamodb client

import boto3

# Get the service resource.
dynamodb = boto3.resource('dynamodb', endpoint_url='http://dynamodb:8000',
                            region_name='us-east-1',  # 若有使用aws cli 設定，此行可註解
                            aws_access_key_id='test',  # 若有使用aws cli 設定，此行可註解
                            aws_secret_access_key='test'  # 若有使用aws cli 設定，此行可註解
                         )


In [None]:
# list tables in dynamodb

list(dynamodb.tables.all())


In [None]:
# create table in dynamodb

table = dynamodb.create_table(
    TableName='s3-objects-list', 
    KeySchema=[
        {
            'AttributeName': 'image_name',
            'KeyType': 'HASH'  # Partition key
        },
        {
            'AttributeName': 'upload_time',
            'KeyType': 'RANGE'  # Sort key
        }
    ], 
    AttributeDefinitions=[
        {
            'AttributeName': 'image_name',
            'AttributeType': 'S'  # N 代表數字； S 代表字串。
        },
        {
            'AttributeName': 'upload_time',
            'AttributeType': 'S'  
        }
    ],
    ProvisionedThroughput={
        'ReadCapacityUnits': 5,
        'WriteCapacityUnits': 5
    }
)


In [None]:
# list tables in dynamodb

list(dynamodb.tables.all())

In [None]:
# count item in table

table = dynamodb.Table('s3-objects-list')

print(table.item_count)


In [None]:
# insert two item to dynamodb

# Float types are not supported. Use Decimal types instead.
from decimal import Decimal
import datetime

now1 = datetime.datetime.now().strftime('%Y-%m-%d, %H:%M:%S')
print(now1)

table.put_item(
   Item={
        "image_name": 'first_image',
        "upload_time": now1,
        "image_url": 'https://s3_url/first_image'
    }
)

now2 = datetime.datetime.now().strftime('%Y-%m-%d, %H:%M:%S')
print(now2)

table.put_item(
   Item={
        "image_name": 'second_image',
        "upload_time": now2,
        "image_url": 'https://s3_url/second_image'
    }
)


In [None]:
# count item in table

print(table.item_count)


In [None]:
# get item from table

from pprint import pprint

response = table.get_item(
    Key={
        'image_name': 'second_image',
        'upload_time': now2
    }
)

pprint(response)

item = response['Item']
pprint(item)


In [None]:
# remove item from table

# For the primary key, you must provide all of the attributes. 
#     For example, with a simple primary key, 
#         you only need to provide a value for the partition key. 
#     For a composite primary key, 
#         you must provide values for both the partition key and the sort key.


table.delete_item(
    Key={
        "image_name": 'second_image',
        "upload_time": now2,
    }
)


In [None]:
# query

# The Query operation finds items based on primary key values. 
# You can query any table or secondary index that has a composite primary key (a partition key and a sort key)

# To add conditions to scanning and querying the table, 
#     you will need to import the boto3.dynamodb.conditions.Key 
#     and boto3.dynamodb.conditions.Attr classes.

# https://boto3.amazonaws.com/v1/documentation/api/latest/reference/customizations/dynamodb.html#ref-dynamodb-conditions

from boto3.dynamodb.conditions import Key, Attr
from pprint import pprint

response = table.query(
    KeyConditionExpression=Key('image_name').eq('first_image'),
#     FilterExpression=Attr('image_url').contains('xxx'),
    
    # 以下可以同時指定 Sort Key 條件，KeyConditionExpression 和 KeyConditions 只能二選一
#     KeyConditions={
#         'image_name': {
#             'AttributeValueList': [
#                 'first_image'
#             ],
#             'ComparisonOperator': 'EQ'
#         },
#         'upload_time': {
#             'AttributeValueList': [
#                 '2020-07-19, 10:57:18'
#             ],
#             'ComparisonOperator': 'EQ'
#         }
#     }
)

items = response['Items']
pprint(items)


In [None]:
# scan

# The Scan operation returns one or more items and item attributes 
#     by accessing every item in a table or a secondary index.

# To add conditions to scanning and querying the table, 
#     you will need to import the boto3.dynamodb.conditions.Key 
#     and boto3.dynamodb.conditions.Attr classes.

# If the total number of scanned items exceeds the maximum dataset size limit of 1 MB, 
#     the scan stops and results are returned to the user as a LastEvaluatedKey value 
#     to continue the scan in a subsequent operation.

from boto3.dynamodb.conditions import Key, Attr
from pprint import pprint

response = table.scan(
    FilterExpression=Attr('image_url').contains('s3_url')
)

items = response['Items']
pprint(items)


In [None]:
# 平行掃描
# 透過多執行緒or應用程式實作平行掃描，需指定Segment 和TotalSegments
#     Segment：特定worker(執行緒or應用程式)要掃描的區段 (從0開始算)
#     TotalSegments：平行掃描的區段總數，此值必須等於使用的worker數目

# https://docs.aws.amazon.com/zh_tw/amazondynamodb/latest/developerguide/Scan.html#Scan.ParallelScan

from boto3.dynamodb.conditions import Key, Attr
from pprint import pprint

# 準備平行掃描
response_0 = table.scan(
    TotalSegments=3,
    Segment=0,
)

response_1 = table.scan(
    TotalSegments=3,
    Segment=1,
)

response_2 = table.scan(
    TotalSegments=3,
    Segment=2,
)

# 印出結果
items_0 = response_0['Items']
print('--- ITEMS 0 ---')
pprint(items_0)

items_1 = response_1['Items']
print('--- ITEMS 1 ---')
pprint(items_1)

items_2 = response_2['Items']
print('--- ITEMS 2 ---')
pprint(items_2)

In [None]:
# 準備更多items 來測試平行掃描
from decimal import Decimal
import datetime


for i in range(100):
    now = datetime.datetime.now().strftime('%Y-%m-%d, %H:%M:%S')
    table.put_item(
       Item={
            "image_name": f'image_{i}',
            "upload_time": now,
            "image_url": f'https://image_{i}/test'
        }
    )