In [8]:
import boto3
import pandas as pd

In [9]:
boto3.setup_default_session(profile_name='ck-pythonist')

In [7]:
transaction={"transactionid":"tr202303251425","customerid":"ck","Date":"2023-03-25",
             "Items":[{"Item":"Apple","Qty":10,"Rate":25,"Amount":125,"Category":"Fruits&Vegetables","uom":"nos"},
                      {"Item":"Orange","Qty":10,"Rate":50,"Amount":500,"Category":"Fruits&Vegetables","uom":"nos"},
                      {"Item":"Badam","Qty":500,"Rate":1,"Amount":500,"Category":"DryFruits&Nuts","uom":"gms"},
                      {"Item":"Walnut","Qty":500,"Rate":2,"Amount":1000,"Category":"DryFruits&Nuts","uom":"gms"}
                      ]}

In [14]:
def TabletoDynamoDBJson(data):

    "converts dataframe to dynamodb compatible json"

    pk="TransactionID"

    sk="Item"

    if any([x not in list(data.columns) for x in [pk,sk]]):

        raise Exception ("Partition Key Or Sort Key Missing In Data")
    
    dynamoDBJson=[]

    for iD,row in data.iterrows():

        objRow=dict(row)

        objIteration={}

        for key,val in objRow.items():

            if type(val) in [int,float]:

                objIteration[key]={"N":str(val)}
            
            elif pd.isnull(val)==True:

                objIteration[key]={"NULL":True}
            
            else:

                objIteration[key]={"S":str(val)}
        
        dynamoDBJson.append(objIteration)
    

    # batchwise split

    batchRecords=[dynamoDBJson[n:n+20] for n in range(0, len(dynamoDBJson), 20)]
    

    return batchRecords


In [20]:
def ConvertTransactionToDynamoJson(transaction):

    "converts transaction to a dynamodb compatible json"

    dynamoDB=boto3.client("dynamodb",region_name="us-east-2")

    transactionTable=pd.DataFrame.from_dict(transaction["Items"])

    for key,val in transaction.items():

        if key=="Items":

            continue

        transactionTable.loc[:,key]=val

    # Columns    

    transactionTable.columns=[x.lower() for x in list(transactionTable.columns)]

    mapPKSK={"transactionid":"TransactionID","item":"Item"}

    transactionTable.rename(columns=mapPKSK,inplace=True)

    # Batch DynamoDB Json

    batchRecords=TabletoDynamoDBJson(transactionTable)

    # Push To DynamoDB

    for batch in batchRecords:

        respBatchWrite=dynamoDB.batch_write_item(RequestItems={
            "Transactions":[{"PutRequest":{"Item":x}} for x in batch]})
    
    

    return {"Status":"Transaction Posted To Database"}


In [21]:
status=ConvertTransactionToDynamoJson(transaction)

In [22]:
status

{'Status': 'Transaction Posted To Database'}

In [25]:
def DynamoDBTable(transactionid):

    "retrieves transactionid from dynamoDB"

    dynamoDB=boto3.client("dynamodb",region_name="us-east-2")

    query="select * from Transactions where TransactionID='%s'"%transactionid

    results=dynamoDB.execute_statement(Statement=query,Limit=1000)

    return results

In [26]:
results=DynamoDBTable(transaction["transactionid"])

In [28]:
results["Items"]

[{'uom': {'S': 'nos'},
  'date': {'S': '2023-03-25'},
  'rate': {'N': '25'},
  'Item': {'S': 'Apple'},
  'qty': {'N': '10'},
  'category': {'S': 'Fruits&Vegetables'},
  'amount': {'N': '125'},
  'TransactionID': {'S': 'tr202303251425'},
  'customerid': {'S': 'ck'}},
 {'uom': {'S': 'gms'},
  'date': {'S': '2023-03-25'},
  'rate': {'N': '1'},
  'Item': {'S': 'Badam'},
  'qty': {'N': '500'},
  'category': {'S': 'DryFruits&Nuts'},
  'amount': {'N': '500'},
  'TransactionID': {'S': 'tr202303251425'},
  'customerid': {'S': 'ck'}},
 {'uom': {'S': 'nos'},
  'date': {'S': '2023-03-25'},
  'rate': {'N': '50'},
  'Item': {'S': 'Orange'},
  'qty': {'N': '10'},
  'category': {'S': 'Fruits&Vegetables'},
  'amount': {'N': '500'},
  'TransactionID': {'S': 'tr202303251425'},
  'customerid': {'S': 'ck'}},
 {'uom': {'S': 'gms'},
  'date': {'S': '2023-03-25'},
  'rate': {'N': '2'},
  'Item': {'S': 'Walnut'},
  'qty': {'N': '500'},
  'category': {'S': 'DryFruits&Nuts'},
  'amount': {'N': '1000'},
  'Transa

In [29]:
import json

In [43]:
def Djson2HiveJson(response):

    "converts dynamodb json to hive json"

    hiveJsonL=[]

    for item in response["Items"]:

        obj={}

        for key,val in item.items():

            if "S" in val.keys():

                obj[key]=val["S"]
            
            else:

                obj[key]=float(val["N"])
        
        hiveJsonL.append(obj)
    

    jsonH=json.dumps(hiveJsonL)

    jsonH=jsonH.replace("}, ","}\n")

    jsonH=jsonH[1:-1]

    return jsonH

In [44]:
jsonH=Djson2HiveJson(results)

In [46]:
trans=open("test_trans.json","w")

In [47]:
trans.write(jsonH)

740

In [48]:
trans.close()

In [49]:
s3=boto3.client("s3",region_name="us-east-2")

In [50]:
s3.put_object(Body=jsonH,Bucket="data-analytics-ck",Key="TransactionTable/test.json")

{'ResponseMetadata': {'RequestId': 'M31YXBH0X1TGNHPM',
  'HostId': 'SBRJ6Y+v5CmKW69H/8FVj22eWwD4a8uqNBKU+nxTpyz6uuIddwnpfeXG7hYS3FYwELgPGuqn6nQ/0SjcfvXM7A==',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amz-id-2': 'SBRJ6Y+v5CmKW69H/8FVj22eWwD4a8uqNBKU+nxTpyz6uuIddwnpfeXG7hYS3FYwELgPGuqn6nQ/0SjcfvXM7A==',
   'x-amz-request-id': 'M31YXBH0X1TGNHPM',
   'date': 'Sat, 25 Mar 2023 10:18:30 GMT',
   'x-amz-server-side-encryption': 'AES256',
   'etag': '"898e0f77736c701f139d6094c61eab74"',
   'server': 'AmazonS3',
   'content-length': '0'},
  'RetryAttempts': 0},
 'ETag': '"898e0f77736c701f139d6094c61eab74"',
 'ServerSideEncryption': 'AES256'}