In [38]:
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image
import IPython.display as display
import os
import glob
import datetime
import logging
import boto3

In [3]:
# Set the filepath where the mini-NPY files are stored
filepath = "/home/ec2-user/SageMaker"

# Create a list of all mini-NPY files
os.chdir(filepath)
filelist = []
for file in glob.glob("*.npy"):
    filelist.append(file)
    
# Sort the filelist based on the numeric extension
filelist = sorted(filelist,key=lambda x: int(os.path.splitext(x)[0].split('_')[-1]))

['cifar10_ra_2_5_500.npy',
 'cifar10_ra_2_5_1000.npy',
 'cifar10_ra_2_5_1500.npy',
 'cifar10_ra_2_5_2000.npy',
 'cifar10_ra_2_5_2500.npy',
 'cifar10_ra_2_5_3000.npy',
 'cifar10_ra_2_5_3500.npy',
 'cifar10_ra_2_5_4000.npy',
 'cifar10_ra_2_5_4500.npy',
 'cifar10_ra_2_5_5000.npy',
 'cifar10_ra_2_5_5500.npy',
 'cifar10_ra_2_5_6000.npy',
 'cifar10_ra_2_5_6500.npy',
 'cifar10_ra_2_5_7000.npy',
 'cifar10_ra_2_5_7500.npy',
 'cifar10_ra_2_5_8000.npy',
 'cifar10_ra_2_5_8500.npy',
 'cifar10_ra_2_5_9000.npy',
 'cifar10_ra_2_5_9500.npy',
 'cifar10_ra_2_5_10000.npy',
 'cifar10_ra_2_5_10500.npy',
 'cifar10_ra_2_5_11000.npy',
 'cifar10_ra_2_5_11500.npy',
 'cifar10_ra_2_5_12000.npy',
 'cifar10_ra_2_5_12500.npy',
 'cifar10_ra_2_5_13000.npy',
 'cifar10_ra_2_5_13500.npy',
 'cifar10_ra_2_5_14000.npy',
 'cifar10_ra_2_5_14500.npy',
 'cifar10_ra_2_5_15000.npy',
 'cifar10_ra_2_5_15500.npy',
 'cifar10_ra_2_5_16000.npy',
 'cifar10_ra_2_5_16500.npy',
 'cifar10_ra_2_5_17000.npy',
 'cifar10_ra_2_5_17500.npy',
 'cif

In [27]:
# Determine filename for concatenated data based on smaller filename
s = "_"
all_data_filename = s.join(os.path.splitext(filelist[0])[0].split(s)[:-1]) + ".npy"
all_data_filepath = filepath + "/" + all_data_filename
print("Will write aggregated data to:", all_data_filepath)

Will write aggregated data to: /home/ec2-user/SageMaker/cifar10_ra_2_5.npy


In [25]:
# Stack all of the smaller NPY files
os.chdir(filepath)
all_arrays = np.load(os.path.join(filepath, filelist[0]))

for npfile in filelist[1:]:
    all_arrays = np.vstack((all_arrays, (np.load(os.path.join(filepath, npfile)))))

# Save the stacked NPY file 
np.save(all_data_filepath, all_arrays)

In [40]:
# Upload the NPY file to S3
bucket='sagemaker-may29'
prefix = 'sagemaker/RandAugmentation/'
path = '/home/ec2-user/SageMaker/'

s3_resource = boto3.resource("s3", region_name="us-east-2")
my_bucket = s3_resource.Bucket(bucket)
my_bucket.upload_file(all_data_filepath, prefix + all_data_filename)

### Sanity Checking

In [29]:
all_arrays.shape

(50000, 32, 32, 3)

In [36]:
t = np.load(os.path.join(filepath, all_data_filename))
print("All data shape:", t.shape)

k = np.load(os.path.join(filepath, filelist[1]))
k[0]



All data shape: (50000, 32, 32, 3)


array([[[255, 255, 255],
        [245, 241, 246],
        [243, 241, 238],
        ...,
        [163, 162, 189],
        [163, 162, 189],
        [163, 162, 189]],

       [[232, 235, 237],
        [203, 205, 208],
        [196, 202, 178],
        ...,
        [163, 162, 189],
        [163, 162, 189],
        [163, 162, 189]],

       [[226, 230, 227],
        [152, 162, 162],
        [134, 145, 127],
        ...,
        [163, 162, 189],
        [163, 162, 189],
        [163, 162, 189]],

       ...,

       [[227, 230, 225],
        [145, 150, 159],
        [121, 133, 123],
        ...,
        [  0,   0,   0],
        [ 73,  88, 114],
        [240, 239, 252]],

       [[230, 234, 229],
        [189, 195, 176],
        [137, 143, 150],
        ...,
        [  0,   0,   0],
        [ 73,  88, 114],
        [240, 240, 252]],

       [[253, 247, 254],
        [230, 234, 232],
        [226, 228, 224],
        ...,
        [  2,   2,   2],
        [ 75,  90, 118],
        [240, 240, 252]]

In [37]:
t[500]

array([[[255, 255, 255],
        [245, 241, 246],
        [243, 241, 238],
        ...,
        [163, 162, 189],
        [163, 162, 189],
        [163, 162, 189]],

       [[232, 235, 237],
        [203, 205, 208],
        [196, 202, 178],
        ...,
        [163, 162, 189],
        [163, 162, 189],
        [163, 162, 189]],

       [[226, 230, 227],
        [152, 162, 162],
        [134, 145, 127],
        ...,
        [163, 162, 189],
        [163, 162, 189],
        [163, 162, 189]],

       ...,

       [[227, 230, 225],
        [145, 150, 159],
        [121, 133, 123],
        ...,
        [  0,   0,   0],
        [ 73,  88, 114],
        [240, 239, 252]],

       [[230, 234, 229],
        [189, 195, 176],
        [137, 143, 150],
        ...,
        [  0,   0,   0],
        [ 73,  88, 114],
        [240, 240, 252]],

       [[253, 247, 254],
        [230, 234, 232],
        [226, 228, 224],
        ...,
        [  2,   2,   2],
        [ 75,  90, 118],
        [240, 240, 252]]