## This notebook takes takes the sketches from photodraw2x2 and uploads it to amazon S3

### Import libraries, add helper functions, and set up paths

In [None]:
import os
import boto3
import botocore
import pandas as pd
from PIL import Image
from glob import glob
from IPython.display import clear_output

In [None]:
def list_files(path, ext='png'):
    result = [y for x in os.walk(path) for y in glob(os.path.join(x[0], '*.%s' % ext))]
    return result

## helper to speed things up by not uploading images if they already exist, can be overriden 
def check_exists(s3, bucket_name, stim_name):
    try:
        s3.Object(bucket_name,stim_name).load()    
        return True
    except botocore.exceptions.ClientError as e:    
        if (e.response['Error']['Code'] == "404"):
            print('The object does not exist.')
            return False
        else:
            print('Something else has gone wrong with {}'.format(stim_name))

In [None]:
proj_dir = os.path.abspath('..')
results_dir = os.path.join(proj_dir,'results')
plot_dir = os.path.join(results_dir,'plots')
csv_dir = os.path.join(results_dir,'csv')
exp_dir = os.path.abspath(os.path.join(proj_dir,'experiments'))
sketch_dir = os.path.abspath(os.path.join(proj_dir,'sketches'))
gallery_dir = os.path.abspath(os.path.join(proj_dir,'gallery'))
feature_dir = os.path.abspath(os.path.join(proj_dir,'features'))
stims_dir = os.path.abspath(os.path.join(proj_dir,'stimuli','photodraw32_stims_agglomerate'))

### Create metadata for loading into S3

In [None]:
destinationFiles = list_files(os.path.join(sketch_dir, 'photodraw2x2'), 'png')

In [None]:
photodraw32_s3_sketches_metadata = pd.DataFrame(columns = ['gameID', 
                                                           'condition', 
                                                           'category',
                                                           'image_id',
                                                           'goal',
                                                           'filepath',
                                                           'filename',
                                                           's3_url'])
for file in destinationFiles:
    gameID, trialNum, condition, rest = file.split('\\')[-1].split('.')[0].split('_',3)
    if condition == 'photo':
        category, image, ID, goal = rest.rsplit('_', 3)
        image_id = image + '_' + ID
    else:
        category, image_id, goal = rest.rsplit('_', 2)
    filepath = "..\\" + file.split('\\', 2)[-1]
    filename = os.path.split(file)[1]
    s3_url = "https://photodraw32.s3.amazonaws.com/" + filename
    photodraw32_s3_sketches_metadata = photodraw32_s3_sketches_metadata.append({'gameID' : gameID, 
                                                                                'condition' : condition, 
                                                                                'category' : category, 
                                                                                'image_id' : image_id, 
                                                                                'goal' : goal,
                                                                                'filepath' : filepath,
                                                                                'filename' : filename,
                                                                                's3_url' : s3_url},
                                                                               ignore_index = True)

photodraw32_s3_sketches_metadata.to_csv('photodraw32_s3_sketches_metadata.csv', index=False)
photodraw32_s3_sketches_metadata.head()

### Load into S3

In [None]:
photodraw_testing_data = {'bucket_name': 'photodraw-testing',
                          'path_to_stim': 'images',
                          'full_stim_paths': '---------',  # use list_files(path_to_stim)
                          'stim_name': '-----------'} # use os.path.split(path_to_file)[-1]

df = pd.read_csv('photodraw32_s3_sketches_metadata.csv')
photodraw32_data = {'bucket_name': 'photodraw32',
                    'path_to_stim': 'photodraw32_sketches',
                    'full_stim_paths': df.filepath.values,
                    's3_stim_names': df.filename.values}

In [None]:
## set up paths, etc.
bucket_name = photodraw32_data['bucket_name'] ## which S3 bucket to upload to 
path_to_stim = photodraw32_data['path_to_stim']
full_stim_paths = photodraw32_data['full_stim_paths']
stim_names = photodraw32_data['s3_stim_names']
print('We have {} images to upload.'.format(len(full_stim_paths)))

In [None]:
## tell user some useful information
print('Path to stimuli is : {}'.format(path_to_stim))
print('Uploading to this bucket: {}'.format(bucket_name))

In [None]:
reallyRun = 0
if reallyRun: 

    ## establish connection to s3 
    s3 = boto3.resource('s3')

    ## create a bucket with the appropriate bucket name
    try: 
        b = s3.create_bucket(Bucket=bucket_name) 
        print('Created new bucket.')
    except:
        b = s3.Bucket(bucket_name)
        print('Bucket already exists.')

    ## do we want to overwrite files on s3?
    overwrite = False
    
    ## set bucket and objects to public
    b.Acl().put(ACL='public-read') ## sets bucket to public

    ## now let's loop through stim paths and actually upload to s3 (woot!)
    for i, path_to_file in enumerate(full_stim_paths): 
        stim_name =  os.path.split(path_to_file)[-1]
        if ((check_exists(s3, bucket_name, stim_name)==False) | (overwrite==True)):
            print(f'Now uploading {stim_name} | {i+1} of {len(full_stim_paths)}')
            s3.Object(bucket_name,stim_name).put(Body=open(path_to_file,'rb')) ## upload stimuli
            s3.Object(bucket_name,stim_name).Acl().put(ACL='public-read') ## set access controls
        else: 
            print('Skipping {} | {} of {} because it already exists.'.format(os.path.split(path_to_file)[-1],(i+1),len(full_stim_paths)))
        clear_output(wait=True)
print('Done!')

Example aws output: <br>
https://photodraw32.s3.amazonaws.com/0260-ec8c77a0-b084-4598-88aa-7a76d245f1e8_24_photo_saw_n03474779_668_categorydraw.png

In [None]:
for my_bucket_object in b.objects.all():
    print(my_bucket_object)