# Upload video stimuli to s3

In [None]:
#Which experiment? bucket_name is the name of the experiment and will be name of the databases both on mongoDB and S3
bucket_name = 'human-physics-benchmarking-XXX-pilot' #CHANGE THIS ⚡️

In [None]:
import os
from glob import glob
import boto3
import botocore
from IPython.display import clear_output
import json
import pandas as pd
from PIL import Image

In [None]:
def list_files(paths, ext='mp4'):
    """Pass list of folders if there are stimuli in multiple folders. 
    Make sure that the containing folder is informative, as the rest of the path is ignored in naming. 
    Also returns filenames as uploaded to S3"""
    if type(paths) is not list:
        paths = [paths]
    results = []
    names = []
    for path in paths:
        results += [y for x in os.walk(path) for y in glob(os.path.join(x[0], '*.%s' % ext))]
        names += [os.path.basename(os.path.dirname(y))+'_'+os.path.split(y)[1] for x in os.walk(path) for y in glob(os.path.join(x[0], '*.%s' % ext))]
    return results,names

In [None]:
## helper to speed things up by not uploading images if they already exist, can be overriden 
def check_exists(s3, bucket_name, stim_name):
    try:
        s3.Object(bucket_name,stim_name).load()    
        return True
    except botocore.exceptions.ClientError as e:    
        if (e.response['Error']['Code'] == "404"):
            print('The object does not exist.')
            return False
        else:
            print('Something else has gone wrong with {}'.format(stim_name))

Pass list of folders if there are stimuli in multiple folders. Make sure that the containing folder is informative, as the rest of the path is ignored in naming.

In [None]:
## provide a stem directory
local_stem = 'XXX' #CHANGE THIS ⚡️
dirnames = [d.split('/')[-1] for d in glob(local_stem+'/*')]
paths_to_stim = [local_stem + d for d in dirnames]

full_stim_paths, filenames = [x for x in list_files(paths_to_stim) if x !='.DS_Store'] #generate filenames and stimpaths
full_map_paths, mapnames = [x for x in list_files(paths_to_stim, ext = 'png') if x !='.DS_Store'] #generate filenames and stimpaths for target/zone map
full_hdf5_paths, hdf5names = [x for x in list_files(paths_to_stim, ext = 'hdf5') if x !='.DS_Store'] #generate filenames and stimpaths for hdf5
print('We have {} stimuli to upload.'.format(len(full_stim_paths)))    

In [None]:
# make sure to only up the _img pass
full_stim_paths = [p for p in full_stim_paths if '_img' in p] 
filenames = [p for p in filenames if '_img' in p] 
print('We have {} stimuli to upload.'.format(len(full_stim_paths)))    

Upload to S3. This expects the `.aws/credentials` file in your home directory.

In [None]:
reallyRun = True
upload_hdf5s = True
if reallyRun:

    ## establish connection to s3 
    s3 = boto3.resource('s3')

    ## create a bucket with the appropriate bucket name
    try: 
        b = s3.create_bucket(Bucket=bucket_name) 
        print('Created new bucket.')
#     except NoCredentialsError:
#         print("Credential missing") #.aws/credentials should be in home folder, not in repo folder
    except Exception as e:
        b = s3.Bucket(bucket_name)
        print('Bucket already exists.',e)

    ## do we want to overwrite files on s3?
    overwrite = True
    
    ## set bucket and objects to public
    b.Acl().put(ACL='public-read') ## sets bucket to public

    ## now let's loop through stim paths and actually upload to s3 (woot!)
    for i,path_to_file in enumerate(full_stim_paths):
        stim_name = filenames[i]
        if ((check_exists(s3, bucket_name, stim_name)==False) | (overwrite==True)):
            print('Now uploading {} as {} | {} of {}'.format(os.path.split(path_to_file)[-1],stim_name,(i+1),len(full_stim_paths)))
            s3.Object(bucket_name,stim_name).put(Body=open(path_to_file,'rb')) ## upload stimuli
            s3.Object(bucket_name,stim_name).Acl().put(ACL='public-read') ## set access controls
        else: 
            print('Skipping {} | {} of {} because it already exists.'.format(os.path.split(path_to_file)[-1],(i+1),len(full_stim_paths)))
        clear_output(wait=True)
    print('Done uploading videos')
    for i,path_to_file in enumerate(full_map_paths):
        stim_name = mapnames[i]
        if ((check_exists(s3, bucket_name, stim_name)==False) | (overwrite==True)):
            print('Now uploading {} as {} | {} of {}'.format(os.path.split(path_to_file)[-1],stim_name,(i+1),len(full_map_paths)))
            s3.Object(bucket_name,stim_name).put(Body=open(path_to_file,'rb')) ## upload stimuli
            s3.Object(bucket_name,stim_name).Acl().put(ACL='public-read') ## set access controls
        else: 
            print('Skipping {} | {} of {} because it already exists.'.format(os.path.split(path_to_file)[-1],(i+1),len(full_map_paths)))
        clear_output(wait=True)
    print('Done uploading target/zone maps')
    if upload_hdf5s:
        for i,path_to_file in enumerate(full_hdf5_paths):
            stim_name = hdf5names[i]
            if ((check_exists(s3, bucket_name, stim_name)==False) | (overwrite==True)):
                print('Now uploading {} as {} | {} of {}'.format(os.path.split(path_to_file)[-1],stim_name,(i+1),len(full_hdf5_paths)))
                s3.Object(bucket_name,stim_name).put(Body=open(path_to_file,'rb')) ## upload stimuli
                s3.Object(bucket_name,stim_name).Acl().put(ACL='public-read') ## set access controls
            else: 
                print('Skipping {} | {} of {} because it already exists.'.format(os.path.split(path_to_file)[-1],(i+1),len(full_hdf5_paths)))
            clear_output(wait=True)
    print('Done uploading hdf5s')
print('Done!')

In [None]:
for my_bucket_object in b.objects.all():
    print(my_bucket_object)