In [57]:
import os
import numpy as np
import cv2
import h5py
import math

In [58]:
def crop_square(img, size, interpolation=cv2.INTER_AREA):
    h, w = img.shape[:2]
    min_size = np.amin([h,w])

    # Centralize and crop
    crop_img = img[int(h/2-min_size/2):int(h/2+min_size/2), int(w/2-min_size/2):int(w/2+min_size/2)]
    resized = cv2.resize(crop_img, (size, size), interpolation=interpolation)

    return resized

In [59]:
def process_images(img_num, image_path):
    
    img_arr=[]
    
    for i in range(img_num):
        filename = f"{i}.jpg"
        file_path= os.path.join(image_path, filename)
        img=cv2.imread(file_path)
        if img is None:
            continue
        else:
            resized_img=crop_square(img, 64)
            img_arr.append(resized_img)
        
    return img_arr

In [60]:
def create_labels(arr, label):
    dataset=[]

    for i in range(len(arr)):
        image_dict={}
        image_dict['x']= arr[i]
        image_dict['y']= label
        dataset.append(image_dict)
        
    return dataset    

In [61]:
def split_shuffled_data(dataset):
    shuffled_train_set_x=[]
    shuffled_train_set_y=[]

    for i in range(len(dataset)):
        shuffled_train_set_x.append(dataset[i]["x"])
        shuffled_train_set_y.append(dataset[i]["y"])
    
    return shuffled_train_set_x, shuffled_train_set_y

In [62]:
def create_h5file(x,y, df_type):
    h5file = f'{df_type}_set.h5'

    with h5py.File(h5file,'w') as h5f:
        h5f["x"]= x
        h5f["y"]= y
        h5f.close()

In [63]:
cat_files_path = os.path.abspath("PetImages/Cat")
dog_files_path = os.path.abspath("PetImages/Dog")

In [64]:
cat_img_nums= len(os.listdir(cat_files_path))
dog_img_nums= len(os.listdir(dog_files_path))

In [65]:
print("Cats: ", cat_img_nums)
print("Dogs: ", dog_img_nums)

Cats:  12500
Dogs:  12501


In [66]:
cats_arr=process_images(cat_img_nums, cat_files_path)
cat_images = np.array(cats_arr)
cat_images.shape

(12476, 64, 64, 3)

In [67]:
dogs_arr=process_images(dog_img_nums, dog_files_path)
dog_images = np.array(dogs_arr)
dog_images.shape

(12470, 64, 64, 3)

In [68]:
print("Cat Examples: ", cat_images.shape[0])
print("Dog Examples: ", dog_images.shape[0])

Cat Examples:  12476
Dog Examples:  12470


In [92]:
def create_dateset(cat_images, dog_images, train_size=80):
    m1=math.floor(cat_images.shape[0]*(train_size/100))
    m2=math.floor(dog_images.shape[0]*(train_size/100))

    train_cat_images=cat_images[:m1]
    train_dog_images=dog_images[:m2]

    test_cat_images=cat_images[m1:]
    test_dog_images=dog_images[m2:]

    train_cat_dataset=create_labels(train_cat_images,1)
    train_dog_dataset=create_labels(train_dog_images,0)

    test_cat_dataset=create_labels(test_cat_images,1)
    test_dog_dataset=create_labels(test_dog_images,0)

    train_dataset=train_cat_dataset+train_dog_dataset
    test_dataset=test_cat_dataset+test_dog_dataset

    np.random.shuffle(train_dataset)
    np.random.shuffle(test_dataset)

    train_x, train_y = split_shuffled_data(train_dataset)
    test_x, test_y = split_shuffled_data(test_dataset)
    
    train_y=np.array(train_y)
    test_y=np.array(test_y)
    
    train_y = np.reshape(train_y, (train_y.shape[0], 1)).T
    test_y = np.reshape(test_y, (test_y.shape[0], 1)).T

    create_h5file(train_x, train_y, "train")
    create_h5file(test_x, test_y, "test")

In [93]:
def load_dataset():
    df = h5py.File('train_set.h5', "r")
    X_train = np.array(df["x"])
    y_train = np.array(df["y"])
    
    
    df = h5py.File('test_set.h5', "r")
    X_test = np.array(df["x"])
    y_test = np.array(df["y"])
    
    
    return X_train, y_train, X_test, y_test

In [94]:
create_dateset(cat_images,dog_images)

In [95]:
X_train, y_train, X_test, y_test = load_dataset()

In [96]:
print("X_train: ", X_train.shape)
print("y_train: ", y_train.shape)
print("X_test: ", X_test.shape)
print("y_test: ", y_test.shape)

X_train:  (19956, 64, 64, 3)
y_train:  (1, 19956)
X_test:  (4990, 64, 64, 3)
y_test:  (1, 4990)
