# Importing PNGs

In [None]:
import numpy as np
import glob
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from skimage import io

### Try opening one image

In [None]:
# opens image
one_image = io.imread('100_images/00000004_000.png', as_grey=True)
one_image = np.asarray(one_image)
imgplot = plt.imshow(one_image, cmap='gray')

### Opening all images in a folder

In [None]:
filetype = 'png'
filepath = '100_images'

globpath = filepath + '/*.' + filetype
#print("Images will be pulled from", globpath, "in numerical order")

image_list = []
for filename in sorted(glob.glob(globpath)):
    im = np.asarray(io.imread(filename, as_grey=True))
    image_list.append(im)

In [None]:
# Plotting one image

imgplot = plt.imshow(image_list[0], cmap='gray')

### Writing a def

In [7]:
def import_grayscale_images(filetype, filepath):
    """Imports images of a certain 'filetype' located at 'filepath'.
       The images are converted to grayscale and imported as numpy array.

       INPUTS:
       * filetype, type of images to be imported. String. e.g. 'png'
       * filepath, path to folder containing all the images. String.

       OUTPUTS:
       * image_list, 3D numpy array containing of shape
         (# of images, pixed height, pixel width), where each pixel is
         represented by a 0-255 grayscale value normalized between 0 and 1.

    """

    import numpy as np
    import glob
    from skimage import io

    # creates the filepath
    globpath = filepath + '/*.' + filetype

    # initializes array that will contain all the images
    image_list = []

    # loops through all filenames in the folder matching the ending .filetype
    for filename in sorted(glob.glob(globpath)):
        # imports and converts grayscale image array to numpy array
        im = np.asarray(io.imread(filename, as_grey=True))
        image_list.append(im)

    return image_list


### Testing the def

In [8]:
mylist = import_grayscale_images('png', '100_images')
print(np.shape(mylist))

(100, 1024, 1024)


In [9]:
mylist

[array([[202, 199, 195, ...,   5,   2,   0],
        [199, 196, 195, ...,   5,   2,   0],
        [196, 194, 193, ...,   5,   2,   0],
        ...,
        [255, 255, 255, ...,   0,   0,   0],
        [255, 255, 254, ...,   0,   0,   0],
        [255, 255, 255, ...,   0,   0,   0]], dtype=uint8),
 array([[208, 205, 206, ..., 204, 215, 139],
        [209, 203, 205, ..., 202, 210, 134],
        [206, 204, 202, ..., 202, 212, 136],
        ...,
        [101,  86,  73, ...,   0,   0,   0],
        [101,  88,  72, ...,   0,   0,   0],
        [ 99,  86,  70, ...,   0,   0,   0]], dtype=uint8),
 array([[ 7, 10,  9, ..., 15, 19, 16],
        [13, 21, 18, ..., 29, 33, 23],
        [12, 19, 17, ..., 26, 28, 17],
        ...,
        [18, 30, 26, ..., 28, 31, 19],
        [21, 34, 30, ..., 29, 31, 19],
        [11, 18, 16, ..., 14, 15,  9]], dtype=uint8),
 array([[199, 175, 152, ..., 252, 251, 251],
        [150, 128, 114, ..., 243, 250, 252],
        [122, 110, 102, ..., 193, 222, 239],
       

# Creating an array of the filenames

In [None]:
filetype = 'png'
filepath = '100_images'

globpath = filepath + '/*.' + filetype
#print("Images will be pulled from", globpath, "in numerical order")

# exports names of images
labels = sorted(glob.glob(globpath))
for i, name in enumerate(labels):
    labels[i] = labels[i][:-(len(filetype)+1)]
    labels[i] = labels[i][(len(filepath)+1):]

In [2]:
# now write a def

def image_name_list(filetype, filepath, withextension):
    """Given a filepath, exports a list with all the filenames
       with extension means """
    
    globpath = filepath + '/*.' + filetype
    
    # exports names of images
    labels = sorted(glob.glob(globpath))
    if withextension:
        for i, name in enumerate(labels):
            labels[i] = labels[i][(len(filepath)+1):]
    else:
        for i, name in enumerate(labels):
            labels[i] = labels[i][:-(len(filetype)+1)]
            labels[i] = labels[i][(len(filepath)+1):]

    return labels



In [38]:
# now test it!

filetype, filepath, withextension = 'png', '100_images', True
labels = image_name_list(filetype, filepath, withextension)
#labels

# Converting the images to an HDF5 File

In [95]:
# Creates HDF5 file

import tables

hdf5_path = "100_images.hdf5"
img_dtype = tables.UInt8Atom()
height, width = 1024, 1024
num_images = len(labels)

hdf5_file = tables.open_file(hdf5_path, mode='w')

image_storage = hdf5_file.create_earray(hdf5_file.root, 'images', \
                                        img_dtype, shape =(0,height, width))

label_names = hdf5_file.create_array(hdf5_file.root, 'image_labels', labels)

In [96]:
export_grayscale_images_HDF5('png', '100_images')

Image data: 10/100
Image data: 20/100
Image data: 30/100
Image data: 40/100
Image data: 50/100
Image data: 60/100
Image data: 70/100
Image data: 80/100
Image data: 90/100
Done !


In [94]:
hdf5_file.close()

In [93]:
def export_grayscale_images_HDF5(filetype, filepath):
    """Imports images of a certain 'filetype' located at 'filepath'.
       The images are converted to grayscale and imported as numpy array.

       INPUTS:
       * filetype, type of images to be imported. String. e.g. 'png'
       * filepath, path to folder containing all the images. String.

       OUTPUTS:
       * image_list, 3D numpy array containing of shape
         (# of images, pixed height, pixel width), where each pixel is
         represented by a 0-255 grayscale value normalized between 0 and 1.

    """

    import numpy as np
    import glob
    from skimage import io

    # creates the filepath
    globpath = filepath + '/*.' + filetype
    
    # calculates total number of images
    num_images = len(glob.glob(globpath))
    
    # loops through all filenames in the folder matching the ending .filetype
    for i, location in enumerate(sorted(glob.glob(globpath))):
        # print how many images are saved every 1000 images
        if i % 10 == 0 and i > 1:
            print('Image data: {}/{}'.format(i, num_images))
        # read an image and resize to (224, 224)
        # cv2 load images as BGR, convert it to RGB
        img = io.imread(location, as_grey=True)
        # add any image pre-processing here
        # save the image
        image_storage.append(img[None])
    
    print('Done !')

In [55]:
filetype = 'png'
filepath = '100_images'
globpath = filepath + '/*.' + filetype

for i, location in enumerate(sorted(glob.glob(globpath))):
    # print how many images are saved every 1000 images
    if i % 10 == 0 and i > 1:
        print('Image data: {}/{}'.format(i, len(labels)))
    # read an image and resize to (224, 224)
    # cv2 load images as BGR, convert it to RGB
    img = io.imread(location, as_grey=True)
    print(img.shape)
    # add any image pre-processing here
    # save the image
    label_names.append()
    image_storage.append(img[None])

(1024, 1024)
(1024, 1024)
(1024, 1024)
(1024, 1024)
(1024, 1024)
(1024, 1024)
(1024, 1024)
(1024, 1024)
(1024, 1024)
(1024, 1024)
Image data: 10/100
(1024, 1024)
(1024, 1024)
(1024, 1024)
(1024, 1024)
(1024, 1024)
(1024, 1024)
(1024, 1024)
(1024, 1024)
(1024, 1024)
(1024, 1024)
Image data: 20/100
(1024, 1024)
(1024, 1024)
(1024, 1024)
(1024, 1024)
(1024, 1024)
(1024, 1024)
(1024, 1024)
(1024, 1024)
(1024, 1024)
(1024, 1024)
Image data: 30/100
(1024, 1024)
(1024, 1024)
(1024, 1024)
(1024, 1024)
(1024, 1024)
(1024, 1024)
(1024, 1024)
(1024, 1024)
(1024, 1024)
(1024, 1024)
Image data: 40/100
(1024, 1024)
(1024, 1024)
(1024, 1024)
(1024, 1024)
(1024, 1024)
(1024, 1024)
(1024, 1024)
(1024, 1024)
(1024, 1024)
(1024, 1024)
Image data: 50/100
(1024, 1024)
(1024, 1024)
(1024, 1024)
(1024, 1024)
(1024, 1024)
(1024, 1024)
(1024, 1024)
(1024, 1024)
(1024, 1024)
(1024, 1024)
Image data: 60/100
(1024, 1024)
(1024, 1024)
(1024, 1024)
(1024, 1024)
(1024, 1024)
(1024, 1024)
(1024, 1024)
(1024, 1024)
(1