### This notebook (which should eventually be converted to a script) will upload images to Amazon S3.

- what the pieces are and what assumptions are being made


In [None]:
import os
from glob import glob
import boto3
import botocore
from IPython.display import clear_output
import json
import pandas as pd
from PIL import Image

In [None]:
def list_files(path, ext='png'):
    result = [y for x in os.walk(path) for y in glob(os.path.join(x[0], '*.%s' % ext))]
    return result

In [None]:
## helper to speed things up by not uploading images if they already exist, can be overriden 
def check_exists(s3, bucket_name, stim_name):
    try:
        s3.Object(bucket_name,stim_name).load()    
        return True
    except botocore.exceptions.ClientError as e:    
        if (e.response['Error']['Code'] == "404"):
            print('The object does not exist.')
            return False
        else:
            print('Something else has gone wrong with {}'.format(stim_name))

In [None]:
photodraw_testing_data = {'bucket_name': 'photodraw-testing',
                          'path_to_stim': 'sketches',
                          'full_stim_paths': '---------',  # use list_files(path_to_stim)
                          'stim_name': '-----------'} # use os.path.split(path_to_file)[-1]

df = pd.read_csv('photodraw32_metadata.csv')
photodraw32_data = {'bucket_name': 'photodraw32',
                    'path_to_stim': 'photodraw32_stims',
                    'full_stim_paths': df.sketchy_filepath.values,
                    's3_stim_names': df.s3_filename.values}

In [None]:
## set up paths, etc.
bucket_name = photodraw32_data['bucket_name'] ## which S3 bucket to upload to 
path_to_stim = photodraw32_data['path_to_stim']
full_stim_paths = photodraw32_data['full_stim_paths']
print('We have {} images to upload.'.format(len(full_stim_paths)))

In [None]:
## tell user some useful information
print('Path to stimuli is : {}'.format(path_to_stim))
print('Uploading to this bucket: {}'.format(bucket_name))

In [None]:
reallyRun = 0
if reallyRun: 

    ## establish connection to s3 
    s3 = boto3.resource('s3')

    ## create a bucket with the appropriate bucket name
    try: 
        b = s3.create_bucket(Bucket=bucket_name) 
        print('Created new bucket.')
    except:
        b = s3.Bucket(bucket_name)
        print('Bucket already exists.')

    ## do we want to overwrite files on s3?
    overwrite = False
    
    ## set bucket and objects to public
    b.Acl().put(ACL='public-read') ## sets bucket to public

    ## now let's loop through stim paths and actually upload to s3 (woot!)
    for i, path_to_file in enumerate(full_stim_paths):        # use sorted(full_stim_paths) when not using photodraw32
        stim_name = photodraw32_data['s3_stim_names'][i]        # default: os.path.split(path_to_file)[-1]
        if ((check_exists(s3, bucket_name, stim_name)==False) | (overwrite==True)):
            print('Now uploading {} as {} | {} of {}'.format(os.path.split(path_to_file)[-1],stim_name,(i+1),len(full_stim_paths)))
            s3.Object(bucket_name,stim_name).put(Body=open(path_to_file,'rb')) ## upload stimuli
            s3.Object(bucket_name,stim_name).Acl().put(ACL='public-read') ## set access controls
        else: 
            print('Skipping {} | {} of {} because it already exists.'.format(os.path.split(path_to_file)[-1],(i+1),len(full_stim_paths)))
        clear_output(wait=True)
print('Done!')

example aws output: https://photodraw32.s3.amazonaws.com/n02274259_18407_butterfly_19.png

In [None]:
for my_bucket_object in b.objects.all():
    print(my_bucket_object)