In [67]:
import boto3
import random
import shutil

In [68]:
S3_BUCKET = "s3-avalanche-guard"

In [78]:
# copy an S3 object to another object
# example: copy_s3_object('my_bucket', old_key='tmp/test.txt', new_key='tmp/tmp2/test.txt')
def copy_s3_object(bucket: str, old_key: str, new_key: str) -> None:
    boto3.resource('s3').Object(bucket,  new_key).copy_from(CopySource=f'{bucket}/{old_key}')
    

# move an S3 object to another object
# example: move_s3_object('my_bucket', old_key='tmp/test.txt', new_key='tmp/tmp2/test.txt')
def move_s3_object(bucket: str, old_key: str, new_key: str) -> None:
    boto3.resource('s3').Object(bucket,  new_key).copy_from(CopySource=f'{bucket}/{old_key}')
    boto3.client('s3').delete_object(Bucket=bucket, Key=old_key)

# copy an S3 object to another folder
# example: copy_s3_object('my_bucket', old_folder='tmp/', new_folder='tmp/tmp2/', object_name='test.txt')
def s3_copy_to_another_folder(bucket: str, old_folder: str, new_folder: str, object_name:str) -> None:
    
    old_key = old_folder+object_name
    new_key = new_folder+object_name
    boto3.resource('s3').Object(bucket,  new_key).copy_from(CopySource=f'{bucket}/{old_key}')
    

# copy an S3 object to another folder
# example: copy_s3_object('my_bucket', old_folder='tmp/', new_folder='tmp/tmp2/', object_name='test.txt')
def s3_copy_to_another_folderAndBucket(src_bucket: str, dest_bucket,old_folder: str, new_folder: str, object_name:str) -> None:
    
    old_key = old_folder+object_name
    new_key = new_folder+object_name
    boto3.resource('s3').Object(dest_bucket,  new_key).copy_from(CopySource=f'{src_bucket}/{old_key}')
    
        

    # move an S3 object to another folder
# example: move_s3_object('my_bucket', old_folder='tmp/', new_folder='tmp/tmp2/', object_name='test.txt')
def s3_move_to_another_folder(bucket: str, old_folder: str, new_folder: str, object_name:str) -> None:
    
    old_key = old_folder+object_name
    new_key = new_folder+object_name
    boto3.resource('s3').Object(bucket,  new_key).copy_from(CopySource=f'{bucket}/{old_key}')
    boto3.client('s3').delete_object(Bucket=bucket, Key=old_key)

    
#print bucket contents
def print_all_s3_objects(bucket_name:str, prefix:str):
    s3 = boto3.client('s3')
    paginator = s3.get_paginator('list_objects_v2')
    page_iterator = paginator.paginate(Bucket=bucket_name,Prefix=prefix)

    for page in page_iterator:
        if 'Contents' in page:
            for obj in page['Contents']:
                objkey = obj["Key"]
                if objkey[len(objkey)-len(suffix):] == suffix:
                    print(f'Name: {objkey} | Size: {obj["Size"]}')
        else:
            print("Bucket is empty or does not exist")

# Get bucket contents as a list
def list_all_s3_objects(bucket_name:str, prefix:str, suffix:str, limit:int=100000)-> [str]:
    s3 = boto3.client('s3')
    paginator = s3.get_paginator('list_objects_v2')
    page_iterator = paginator.paginate(Bucket=bucket_name,Prefix=prefix)

    i = 0 
    objlist = []
    for page in page_iterator:
        if 'Contents' in page:
            for obj in page['Contents']:
                objkey = obj["Key"]
                if objkey[len(objkey)-len(suffix):] == suffix:
                    i=i+1
                    if i > limit:
                        break
                    objlist.append( objkey.replace(prefix, ""))
                if i>limit:
                    break
            if i> limit:
                break

    return objlist


# copy multiple objects from one folder to another folder (matching a key prefix)
# example: copy_s3_object_basedOnKey('my_bucket', old_folder='tmp/', new_folder='tmp/tmp2/', object_name='test.txt')
def copy_s3_object_basedOnKey(bucket: str, old_folder: str, new_folder: str, key_prefix:str, limit:int) -> None:
    
    # First get the listing of all the objects
    objectList = list_all_s3_objects(S3_BUCKET, key_prefix)
    i = 0
    for obj in objectList:
        i = i+1
        if i> limit:
            break
        print(f"Copying obejct {obj} from folder {old_folder} to folder {new_folder} . Moved file count={i}                                    ", end='\r')
        s3_copy_to_another_folder(bucket, old_folder, new_folder, obj)

# copy multiple objects from one bucket/folder to another bucket/folder (matching a key prefix)
# example: copy_s3_object_basedOnKey('my_bucket', old_folder='tmp/', new_folder='tmp/tmp2/', object_name='test.txt')
def copy_s3_object_basedOnKeyFromAnotherBucket(src_bucket: str, dest_bucket:str, old_folder: str, new_folder: str, key_prefix:str, limit:int) -> None:
    
    # First get the listing of all the objects
    objectList = list_all_s3_objects(src_bucket, key_prefix)
    i = 0
    for obj in objectList:
        i = i+1
        if i> limit:
            break
        print(f"Copying obejct {obj} from folder {old_folder} to folder {new_folder} . Moved file count={i}                                   ", end='\r')
        s3_copy_to_another_folderAndBucket(src_bucket, dest_bucket, old_folder, new_folder, obj)
        
              


### For Experiment 01 (Train a binary classifier to identify if the picture is a terrain or not)

In [70]:
# Prepare data
# Move all avalanche images to one folder (positive)
# then copy imagenet data and move to another folder (negative)

In [98]:
import os
source_bucket = "jumpstart-cache-prod-us-west-1"
#source_folder = "training-datasets/ai_services_assets/custom_labels/datasets/multi_class_flowers/test"
Dest_folder = "data/experiments/exp01-terrain-binary/negative"

imglist = list_all_s3_objects(source_bucket,"",".jpg",15)

i=0
LIMIT = 15
for imgkey in imglist:
    i=i+1
    if i> LIMIT:
        break
    #filename1 = os.path.basename(imgkey)    
    filename = imgkey.replace("/","-")
    print (f"count={i}, fullkey = {imgkey},  objectname = {filename}", end="\n")
    try:
        boto3.resource('s3').Object(S3_BUCKET, filename ).copy_from(CopySource=f"{source_bucket}/{imgkey}")
    except Exception as e:
        print(f"Skipping this file imgkey = {imgkey} as error happened. Error = {e} ", end="\n")
        if "Access Denied" in str(e):
            i = i-1
        
    

#copy_s3_object_basedOnKeyFromAnotherBucket(source_bucket,S3_BUCKET, source_folder, Dest_folder, "jumpstart-cache-prod-us-west-1/training-datasets/", 4 )

count=1, fullkey = 1p-notebooks-datasets/caltech/images/sample_bath_tub_image.jpg,  objectname = 1p-notebooks-datasets-caltech-images-sample_bath_tub_image.jpg
Skipping this file imgkey = 1p-notebooks-datasets/caltech/images/sample_bath_tub_image.jpg as error happened. Error = An error occurred (AccessDenied) when calling the CopyObject operation: Access Denied 
count=1, fullkey = ai_services_assets/custom_labels/datasets/multi_class_flowers/test/camellia4.jpg,  objectname = ai_services_assets-custom_labels-datasets-multi_class_flowers-test-camellia4.jpg
Skipping this file imgkey = ai_services_assets/custom_labels/datasets/multi_class_flowers/test/camellia4.jpg as error happened. Error = An error occurred (AccessDenied) when calling the CopyObject operation: Access Denied 
count=1, fullkey = ai_services_assets/custom_labels/datasets/multi_class_flowers/test/camellia5.jpg,  objectname = ai_services_assets-custom_labels-datasets-multi_class_flowers-test-camellia5.jpg
Skipping this file i

In [87]:
imgkey = "ai_services_assets/custom_labels/datasets/multi_class_flowers/test/camellia4.jpgets-custom_labels-datasets-multi_class_flowers-test-camellia4.jpg"
filename = "delme1.jpg"
#boto3.resource('s3').Object(S3_BUCKET, filename ).copy_from(CopySource=f"{source_bucket}/{imgkey}")
boto3.resource('s3').Object(S3_BUCKET, filename ).copy_from(CopySource=f"jumpstart-cache-prod-us-west-1/{imgkey}")


NoSuchKey: An error occurred (NoSuchKey) when calling the CopyObject operation: The specified key does not exist.

In [88]:

s3 = boto3.client('s3')
paginator = s3.get_paginator('list_objects_v2')
page_iterator = paginator.paginate(Bucket=bucket_name,Prefix=prefix)

objlist = []
for page in page_iterator:
    if 'Contents' in page:
        for obj in page['Contents']:
            objkey = obj["Key"]
            if objkey[len(objkey)-len(suffix):] == suffix:
                objlist.append( objkey.replace(prefix, ""))

NameError: name 'bucket_name' is not defined