### Purpose of this notebook
- parse filenames to generate metadata 
- uploads image data to S3

### setup

In [None]:
import os
import csv
import pandas as pd
import numpy as np
import boto
from IPython.display import clear_output

In [None]:
## define paths
path_to_png = os.path.join('..','images')
out_path = 'parsed.csv'
bucket_name = 'kiddraw-cdm-gallerize-test' 

### parse filenames

This was written specifically to handle filenaming conventions from the `kiddraw` project. To adapt this to another project, write your own custom filename parser.

In [None]:
with open(out_path, 'w') as outcsv:
    fieldnames = ["class", "age", "expID", "sessionID", "filename", "valid"]
    writer = csv.DictWriter(outcsv, fieldnames = fieldnames)    
    writer.writeheader()
    for root, dirs, files in os.walk(path_to_png):
        for i,file in enumerate(files):
            arr = file.split('_')
            className = arr[0]
            age = arr[2][3:]
            expID = arr[3]+arr[4]+arr[5][:2]
            sessionID = arr[5][2:]
            valid = "1"
            writer.writerow({"class": className, "age": age, "expID": expID, "sessionID": sessionID, "filename": file, "valid": valid})
print("Done parsing {} files.".format(i))

### upload images to S3

In [None]:
runThis = 1
if runThis:
    conn = boto.connect_s3()
    b = conn.create_bucket(bucket_name) ### if bucket already exists, then get_bucket, else create_bucket
    for ind,im in enumerate(os.listdir(path_to_png)):
        if im[-3:]=='png':
            print ind, im
            k = b.new_key(im)
            k.set_contents_from_filename(os.path.join(path_to_png,im))
            k.set_acl('public-read')
            clear_output(wait=True)            
print 'Done!'

#### Generates a URL for each image following this rule: 
`'https://' + bucket_name + '.s3.amazonaws.com/' + filename`

So for example: 
If `bucket_name = 'kiddraw-cdm-gallerize-test'` 
and `filename = 'airplane_sketch_age10_cdm_run_v31529008817526.png'` 
then the image URL would be: `https://kiddraw-cdm-gallerize-test.s3.amazonaws.com/airplane_sketch_age10_cdm_run_v31529008817526.png`