In [9]:
'''

@Author: Vighnesh Harish Bilgi
@Date: 2022-11-25
@Last Modified by: Vighnesh Harish Bilgi
@Last Modified time: 2022-11-25
@Title : Genrerate random records and upload them to dynamoDB

'''

'\n\n@Author: Vighnesh Harish Bilgi\n@Date: 2022-11-25\n@Last Modified by: Vighnesh Harish Bilgi\n@Last Modified time: 2022-11-25\n@Title : Genrerate random records and upload them to dynamoDB\n\n'

In [10]:
import boto3
import pandas as pd
import names
import random
import time
from io import StringIO

In [11]:
import os
os.environ['AWS_DEFAULT_REGION'] = 'us-east-1'
os.environ['AWS_ACCESS_KEY_ID'] = os.environ.get('test1_access_key')
os.environ['AWS_SECRET_ACCESS_KEY'] = os.environ.get('test1_secret_access_key')
TABLE_NAME = 'Auto_Load_Table'
BUCKET_NAME = 'auto-load-bucket'

### Custom fucntions to create dynamoDB Table

In [12]:
def connect_to_dynamoDB():
    """

    Description:
        To connect to AWS DynamoDB service.
    Parameter:
        No parameters
    Return:
        ServiceResource dyDB

    """
    dyDB =  boto3.resource('dynamodb')
    return dyDB

def create_items(table,dataset_records):
    """

    Description:
        To create items in a table of DynamoDB.
    Parameter:
        dynamodb.table table
    Return:
        No values returned.

    """
    table.put_item(
        Item={
                'id': dataset_records[0],
                'name': dataset_records[1]
            }
    )

def create_table(dyDB,table_name):
    """

    Description:
        To create a dynamoDB table if it doen't exist
    Parameter:
        ServiceResource dyDB
    Return:
        no value returned

    """
    
    dynamodb_client = boto3.client('dynamodb')
    existing_tables = dynamodb_client.list_tables()['TableNames']

    if table_name not in existing_tables:
        # Create the DynamoDB table.
        dyDB.create_table(
            TableName=table_name,
            KeySchema=[
                {
                    'AttributeName': 'id',
                    'KeyType': 'HASH'
                },
                {
                    'AttributeName': 'name',
                    'KeyType': 'RANGE'
                }
            ],
            AttributeDefinitions=[
                {
                    'AttributeName': 'id',
                    'AttributeType': 'N'
                },
                {
                    'AttributeName': 'name',
                    'AttributeType': 'S'
                },
            ],
            ProvisionedThroughput={
                'ReadCapacityUnits': 5,
                'WriteCapacityUnits': 5
            }
        )


### Custom fucntions to connect to S3 resource

In [13]:
def connect_to_s3_client():
    """

    Description:
        To connect to AWS S3 service.
    Parameter:
        No parameters
    Return:
        ServiceResource s3
    """
    # s3 =  boto3.resource('s3')
    client = boto3.client("s3")
    return client


def connect_to_s3_resource():
    """

    Description:
        To connect to AWS S3 service through an IAM user.
    Parameter:
        No parameters
    Return:
        ServiceResource s3
    """
    s3 =  boto3.resource(service_name = 's3')
    return s3


### Custom fucntions to create upload data to DynamoDB and send each item from DynamoDB as .csv file to S3 Bucket

In [14]:
def dynamo_to_s3(table):
    """

    Description:
        Fetching each item from dynamoDB Table 'table' as .csv file and uploading them to S3 bucket
    Parameter:
        dynamodb.table table
    Return:
        No values returned

    """
    s3 = connect_to_s3_resource()
    
    table_details = table.scan()
    table_items = table_details['Items']

    print(f"Displaying objects in {BUCKET_NAME}:")

    count = 1
    for item in table_items:

        file_name = f'record#{count}.csv'

        csv_buffer = StringIO()
        df = pd.DataFrame(item, index=[0]) 
        df.to_csv(csv_buffer, index= False)
        s3.Object(BUCKET_NAME, f'data-output/{file_name}').put(Body=csv_buffer.getvalue())

        print(f"{file_name} is uploaded to S3 Bucket '{BUCKET_NAME}'")

        count = count + 1

        # wait for 30 seconds before next iteration
        time.sleep(30)


def generate_and_upload(table,n):
    """

    Description:
        Checking if 'id' is duplicate from list record before uploading them as an item to DynamoDB Table
    Parameter:
        dynamodb.table table,
        integer n
    Return:
        No values returned

    """
    
    check_id = []
    count = 1
    for i in range(n):
        record = [random.randint(100,999), names.get_full_name()]
        
        if record[0] not in check_id:

            check_id.append(record[0])
            print(f"Record #{i+1} data : {record}")
            create_items(table,record)

        else:
            print(f"Record id : {record[0]} already exists! ")    

        count = count + 1

        # wait for 5 seconds before next iteration
        time.sleep(5)

### 1. Create S3 Bucket

In [15]:
s3 = connect_to_s3_resource()
client = connect_to_s3_client()

# creating new bucket
client.create_bucket(Bucket = BUCKET_NAME)
print(f"Printing all bucket names to verify if - {BUCKET_NAME} is created:")
for bucket in s3.buckets.all():
    print(bucket.name)

Printing all bucket names to verify if - auto-load-bucket is created:
auto-load-bucket
aws-cloudtrail-logs-949401335332-4af97cdf
aws-cloudtrail-logs-949401335332-a2ad74b3


### 2. Send data to DynamoDB Table

In [16]:
dyDB = connect_to_dynamoDB()

create_table(dyDB,TABLE_NAME)

table = dyDB.Table(TABLE_NAME)
table.wait_until_exists()
print(f"DateTime creation of Table : {table.creation_date_time}")

n = int(input("Enter number of times to run loop:"))

generate_and_upload(table,n)

DateTime creation of Table : 2022-12-07 11:18:38.846000+05:30
Record #1 data : [538, 'Robert Herrera']
Record #2 data : [942, 'William Thompson']
Record #3 data : [401, 'Clifford Tyler']
Record #4 data : [772, 'Sandra Charlot']
Record #5 data : [548, 'Lauren Self']
Record #6 data : [119, 'Keri Ryan']
Record #7 data : [489, 'Debra Hoover']
Record #8 data : [662, 'Robert Grasso']
Record #9 data : [411, 'Sharon Petrich']
Record #10 data : [604, 'Marcos Clyde']
Record #11 data : [316, 'Josephine Everett']
Record #12 data : [779, 'Justin Beck']
Record #13 data : [597, 'Candice Jethva']
Record #14 data : [267, 'Brian Deboer']
Record #15 data : [456, 'Margaret Lavalley']
Record #16 data : [394, 'Billie Stillman']
Record #17 data : [449, 'Jennifer Padilla']
Record #18 data : [727, 'Jeffery Smith']
Record #19 data : [500, 'Carolyn Jacobson']
Record #20 data : [577, 'Brandon Hernandez']
Record #21 data : [266, 'Kellie Williams']
Record #22 data : [830, 'Shirley Nooe']
Record #23 data : [757, 'My

### 3. Send items from DynamoDB Table as .csv file to S3 bucket

In [17]:
table = dyDB.Table(TABLE_NAME)
dynamo_to_s3(table)

Displaying objects in auto-load-bucket:
record#1.csv is uploaded to S3 Bucket 'auto-load-bucket'
record#2.csv is uploaded to S3 Bucket 'auto-load-bucket'
record#3.csv is uploaded to S3 Bucket 'auto-load-bucket'
record#4.csv is uploaded to S3 Bucket 'auto-load-bucket'
record#5.csv is uploaded to S3 Bucket 'auto-load-bucket'
record#6.csv is uploaded to S3 Bucket 'auto-load-bucket'
record#7.csv is uploaded to S3 Bucket 'auto-load-bucket'
record#8.csv is uploaded to S3 Bucket 'auto-load-bucket'
record#9.csv is uploaded to S3 Bucket 'auto-load-bucket'
record#10.csv is uploaded to S3 Bucket 'auto-load-bucket'
record#11.csv is uploaded to S3 Bucket 'auto-load-bucket'
record#12.csv is uploaded to S3 Bucket 'auto-load-bucket'
record#13.csv is uploaded to S3 Bucket 'auto-load-bucket'
record#14.csv is uploaded to S3 Bucket 'auto-load-bucket'
record#15.csv is uploaded to S3 Bucket 'auto-load-bucket'
record#16.csv is uploaded to S3 Bucket 'auto-load-bucket'
record#17.csv is uploaded to S3 Bucket 'a