In [6]:
### Deep Fashion
import h5py
from PIL import Image 
import os
import numpy as np

def store_many_hdf5(images):
    """ Stores an array of images to HDF5.
        Parameters:
        ---------------
        images       images array, (N, 32, 32, 3) to be stored
        labels       labels array, (N, 1) to be stored
    """
    num_images = len(images)

    # Create a new HDF5 file
    file = h5py.File('DeepFashion.h5', "w")
    
    # Create a dataset in the file
    dataset = file.create_dataset(
        "img", np.shape(np.array(images)), h5py.h5t.STD_U8BE, data=images)

    file.close()
    
images=[]

for filename in os.listdir("Processed_DeepFashion"):
            if ".DS" not in filename and "ipynb" not in filename:
                img = np.asarray(Image.open("Processed_DeepFashion/" + filename))
                images.append(img)
    
store_many_hdf5(images)  

In [14]:
### CUB
import h5py
from PIL import Image 
import os
import numpy as np

def store_many_hdf5(images, poses):
    """ Stores an array of images to HDF5.
        Parameters:
        ---------------
        images       images array, (N, 32, 32, 3) to be stored
        labels       labels array, (N, 1) to be stored
    """
    num_images = len(images)
    print(np.shape(np.array(images)))

    # Create a new HDF5 file
    file = h5py.File('CUB.h5', "w")
    
    # Create a dataset in the file
    file.create_dataset("img", np.shape(np.array(images)), h5py.h5t.STD_U8BE, data = images)
    file.create_dataset("pose", np.shape(np.array(poses)), "float32", data = poses)
    file.create_dataset("train", np.shape(np.array(train_ids)), "int32", data = train_ids)
    file.create_dataset("test", np.shape(np.array(test_ids)), "int32", data = test_ids)
    file.close()
    
images = []
poses = []

fk = open('CUB_200_2011/parts/part_locs.txt', 'r') 
Lines = fk.readlines()
k = 15

with open('CUB_200_2011/images.txt', 'r') as fp:
    with open ('CUB_200_2011/train_test_split.txt', 'r') as f:
        
       line_img = fp.readline()
       line_lbl = f.readline().split()
       line_pose = True
       l_pointer = 0
       idx = 0
       train_ids = []
       test_ids = []
        
       while line_img:
           
           id, path = line_img.split()
           img = Image.open("CUB_200_2011/images/" + path)
           if img.mode != "L":
               mode = line_lbl[1]
               if mode == "0":
                   test_ids.append(idx)
               else:
                   train_ids.append(idx)
               idx += 1
               w, h = img.size
               img = np.asarray(img.resize((128, 128)))
               images.append(img)

               pose = np.zeros((k, 2))    
               for i in range(l_pointer, l_pointer + k):
                    line_pose = Lines[i].split()
                    pose[i - l_pointer] = [float(line_pose[2])/ w, float(line_pose[3])/ h]
               poses.append(pose)
                
           l_pointer += 15
           line_img = fp.readline()
           line_lbl = f.readline().split()
               
               
            
store_many_hdf5(images, poses)  

11780 11780
(11780, 128, 128, 3)


In [24]:
### MAFL
import h5py
from PIL import Image 
import os
import numpy as np

def store_many_hdf5(images, poses):
    """ Stores an array of images to HDF5.
        Parameters:
        ---------------
        images       images array, (N, 32, 32, 3) to be stored
        labels       labels array, (N, 1) to be stored
    """
    num_images = len(images)
    print(np.shape(np.array(images)))

    # Create a new HDF5 file
    file = h5py.File('MAFL.h5', "w")
    
    # Create a dataset in the file
    file.create_dataset("img", np.shape(np.array(images)), h5py.h5t.STD_U8BE, data = images)
    file.create_dataset("pose", np.shape(np.array(poses)), "float32", data = poses)
    file.close()
   


images = []
poses = []
final_poses = []
train_ids = set()
test_ids = set()

k = 5
h = 218
w = 178

with open('img_align_celeba/list_landmarks_align_celeba.txt', 'r') as f: 
    line = f.readline()
    line = f.readline().split()

    while line:
        pose = np.zeros((k, 2))    

        for i in range (1, len(line)-1, 2):
            pose[i//2] = [float(line[i]) /w, float(line[i+1]) /h]
        poses.append(pose)
        line = f.readline().split()
        
print("all landmarks read", len(poses))

        
with open('img_align_celeba/training.txt', 'r') as f:
    path = f.readline().rstrip()
    while path:
        img = Image.open("img_align_celeba/" + path)
        img = np.asarray(img.resize((128, 128)))    
        images.append(img)
        
        idx = int(path[:path.index(".")]) -1
        final_poses.append(poses[idx])
        path = f.readline().rstrip()
    print("train loaded", len(final_poses))

        
with open('img_align_celeba/testing.txt', 'r') as f:
    path = f.readline().rstrip()
    while path:
        img = Image.open("img_align_celeba/" + path)
        img = np.asarray(img.resize((128, 128)))    
        images.append(img)
        
        idx = int(path[:path.index(".")]) -1
        final_poses.append(poses[idx])
        path = f.readline().rstrip()
    print("test loaded", len(final_poses))


store_many_hdf5(images, final_poses)  

all landmarks read 202599
train loaded 19000
test loaded 20000
20000 20000
(20000, 128, 128, 3)
