In [1]:
import boto3

In [2]:
s3_client = boto3.client('s3')

In [3]:
file_body = s3_client.get_object(
    Bucket='aibotodemo',
    Key='data/retail_db/orders/part-00000'
)['Body']

In [4]:
orders_list = file_body.read().decode('utf-8').splitlines()

In [5]:
order_statuses = map(lambda order: (order.split(',')[3], 1), orders_list)

In [6]:
import itertools as iter

In [7]:
order_statuses_grouped = iter.groupby(
    sorted(order_statuses, key=lambda k: k[0]), 
    key=lambda order_status: order_status[0]
)

In [8]:
order_count_by_status_tuple = list(
    map(
        lambda order_status: (order_status[0], len(list(order_status[1]))), 
        order_statuses_grouped
    )
)

In [9]:
order_count_by_status_str = list(map(lambda ele: f'{ele[0]},{ele[1]}', order_count_by_status_tuple))

In [10]:
order_count_by_status_str

['CANCELED,1428',
 'CLOSED,7556',
 'COMPLETE,22899',
 'ON_HOLD,3798',
 'PAYMENT_REVIEW,729',
 'PENDING,7610',
 'PENDING_PAYMENT,15030',
 'PROCESSING,8275',
 'SUSPECTED_FRAUD,1558']

In [11]:
s3_client.put_object?

[0;31mSignature:[0m [0ms3_client[0m[0;34m.[0m[0mput_object[0m[0;34m([0m[0;34m*[0m[0margs[0m[0;34m,[0m [0;34m**[0m[0mkwargs[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m
Adds an object to a bucket. You must have WRITE permissions on a bucket to add an object to it.

 

Amazon S3 never adds partial objects; if you receive a success response, Amazon S3 added the entire object to the bucket.

 

Amazon S3 is a distributed system. If it receives multiple write requests for the same object simultaneously, it overwrites all but the last object written. Amazon S3 does not provide object locking; if you need this, make sure to build it into your application layer or use versioning instead.

 

To ensure that data is not corrupted traversing the network, use the ``Content-MD5`` header. When you use this header, Amazon S3 checks the object against the provided MD5 value and, if they do not match, returns an error. Additionally, you can calculate the MD5 while 

In [12]:
order_count_by_status_body = '\n'.join(order_count_by_status_str)

In [13]:
order_count_by_status_body

'CANCELED,1428\nCLOSED,7556\nCOMPLETE,22899\nON_HOLD,3798\nPAYMENT_REVIEW,729\nPENDING,7610\nPENDING_PAYMENT,15030\nPROCESSING,8275\nSUSPECTED_FRAUD,1558'

In [14]:
print(order_count_by_status_body)

CANCELED,1428
CLOSED,7556
COMPLETE,22899
ON_HOLD,3798
PAYMENT_REVIEW,729
PENDING,7610
PENDING_PAYMENT,15030
PROCESSING,8275
SUSPECTED_FRAUD,1558


In [15]:
order_count_by_status_body.encode('utf-8')

b'CANCELED,1428\nCLOSED,7556\nCOMPLETE,22899\nON_HOLD,3798\nPAYMENT_REVIEW,729\nPENDING,7610\nPENDING_PAYMENT,15030\nPROCESSING,8275\nSUSPECTED_FRAUD,1558'

In [16]:
s3_client.put_object(
    Body=order_count_by_status_body.encode('utf-8'),
    Bucket='aibotodemo',
    Key='data/retail_db/order_count_by_status/part-00000'
)

{'ResponseMetadata': {'RequestId': 'ECGKY55KD4GCVBK0',
  'HostId': 'ltatqMXh6/KVZuOpK8XPgiD6B+YPOCLuQpS4ODiAVqSaqH1HmVaguTsEkLWIyeB7owT4zl4VLZQ=',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amz-id-2': 'ltatqMXh6/KVZuOpK8XPgiD6B+YPOCLuQpS4ODiAVqSaqH1HmVaguTsEkLWIyeB7owT4zl4VLZQ=',
   'x-amz-request-id': 'ECGKY55KD4GCVBK0',
   'date': 'Mon, 04 Jul 2022 23:44:54 GMT',
   'etag': '"2db90e2714348c710cbe934537ae80b3"',
   'server': 'AmazonS3',
   'content-length': '0'},
  'RetryAttempts': 0},
 'ETag': '"2db90e2714348c710cbe934537ae80b3"'}

In [17]:
!aws s3 ls s3://aibotodemo/data/retail_db/order_count_by_status/part-00000

2022-07-05 05:14:54        144 part-00000


In [18]:
!aws s3 cp s3://aibotodemo/data/retail_db/order_count_by_status/part-00000 order_count_by_status.csv

download: s3://aibotodemo/data/retail_db/order_count_by_status/part-00000 to ./order_count_by_status.csv


In [19]:
!cat order_count_by_status.csv

CANCELED,1428
CLOSED,7556
COMPLETE,22899
ON_HOLD,3798
PAYMENT_REVIEW,729
PENDING,7610
PENDING_PAYMENT,15030
PROCESSING,8275
SUSPECTED_FRAUD,1558

In [20]:
!rm order_count_by_status.csv