# Convert Train/Set Dataset to Numpy Array

In [1]:
from zipfile import ZipFile
from skimage.io import imread
import numpy as np

def loadData(filepath):
    X = []   # store images
    Y = []   # store labels
    
    with ZipFile(filepath, 'r') as zipdata:
        namelist = zipdata.namelist()
        
        # loop through every directory/file
        for file_path in namelist:
            # read image files and save to list
            if '.jpg' in file_path or '.JPG' in file_path:
                # get image's label
                Y.append(file_path.split('/')[-2])
                
                # load image into list
                img_bytes = zipdata.open(file_path)
                image = imread(img_bytes)
                X.append(image)
    
    # close zip file
    zipdata.close()
    
    # return images/labels list
    return (X, Y)

In [2]:
# load train/test images into list
X_test, Y_test = loadData('data/test.zip')
X_train, Y_train = loadData('data/train.zip')

# convert list to numpy array
X_test = np.asarray(X_test, dtype='float32')
Y_test = np.asarray(Y_test).reshape(-1,1)
X_train = np.asarray(X_train, dtype='float32')
Y_train = np.asarray(Y_train).reshape(-1,1)

# save numpy arrays
np.save('data/X_train.npy', X_train)
np.save('data/Y_train.npy', Y_train)
np.save('data/X_test.npy', X_test)
np.save('data/Y_test.npy', Y_test)