In [None]:
import boto3
lambda_client = boto3.client("lambda")
s3_client = boto3.client("s3")

In [None]:
def s3_bucket_keys(s3_client, bucket_name, bucket_prefix):
    """Generator for listing S3 bucket keys matching prefix"""

    kwargs = {'Bucket': bucket_name, 'Prefix': bucket_prefix}
    while True:
        resp = s3_client.list_objects_v2(**kwargs)
        for obj in resp['Contents']:
            yield obj['Key']

        try:
            kwargs['ContinuationToken'] = resp['NextContinuationToken']
        except KeyError:
            break

def s3_delete_path(s3_client, bucket, bucket_path):
    try:
        for key in s3_bucket_keys(s3_client, bucket_name=bucket, bucket_prefix=bucket_path):
            s3_client.delete_object(Bucket=bucket, Key=key)
    except Exception as error:
        pass

In [None]:
dynamodb = boto3.resource('dynamodb')
table = dynamodb.Table('')

response = table.scan()
data = response['Items']

while 'LastEvaluatedKey' in response:
    response = table.scan(ExclusiveStartKey=response['LastEvaluatedKey'])
    data.extend(response['Items'])

data

In [None]:
import time

sfn_client=boto3.client('stepfunctions')
def wait_for_sfn_sm(sm_execution_arn):
    status = 'RUNNING'
    while status == 'RUNNING':
        response = sfn_client.describe_execution(executionArn=sm_execution_arn)
        status = response.get('status')
        if status == 'RUNNING':
            time.sleep(15)
        
    return status

In [None]:
import json

filter=[]
sms = []
for item in data:
  corpus_id = item['corpus_id']
  if filter and corpus_id not in filter:
    continue
  payload = json.dumps( { "body": "{ \"CorpusId\": \"" + corpus_id + "\" }" } )

  print(payload)
  response = lambda_client.invoke(
      FunctionName='',
      InvocationType='RequestResponse',
      Payload=payload
  )
  json_obj = json.loads(response['Payload'].read())
  response_body = json.loads(json_obj['body'])
  print(response_body)
  sms.append( (response_body['CorpusStateMachine'],  corpus_id) )

sms

In [None]:
sms

In [None]:
delete_corpus_failed = []
corpora_bucket = ""
for sm, corpus_id in sms:
    status = wait_for_sfn_sm(sm_execution_arn=sm)
    if status == "SUCCEEDED":
        print(f"delete: s3://{corpora_bucket}/{corpus_id}/")
        s3_delete_path(s3_client, corpora_bucket, corpus_id)
    else:
        delete_corpus_failed.append((corpus_id))
    

if delete_corpus_failed:
    print(f"Delete Corpus Failed: {delete_corpus_failed}")