In [1]:
import re
import os
import random
import numpy as np
from tqdm import tqdm
from skimage.io import imread
from skimage.transform import resize
import matplotlib.pyplot as plt

# define all the sub names, session names, and frame names into np.array
sub_names_train = np.arange(1, 11)
sub_names_test = np.concatenate([np.arange(31, 39), np.array([40])], axis = -1)
session_names = np.arange(1, 3)
frame_names_train = np.arange(1, 24)
frame_names_test = np.arange(1, 25)

def ReadImage(image_path):
    """
    Function:
        Read and resize images with specific subname, session name and frame name.
    """
    # read the image
    image = imread(image_path)
    # resize the image
    output_size = (64, 64)
    image = resize(image, output_size)
    return image

def GetImageWithSubName(sub, sess, frame):
    """
    Function: 
        Get image path name according to the inputs.
    """
    # read the image path
    folder_name = "sub" + str(sub) + "_session" + str(sess)
    image_path = folder_name + "/" + folder_name + "_frame" + str(frame) + ".png"
    image = ReadImage(image_path)
    sub_name = sub
    return image, sub_name

def GetAllData(sub_names, session_names, frame_names):
    """
    Function:
        Read all images in the dataset with their subset names.
    """
    # subset 35 and subset 37 have no session 2
    sub_no_sess2_test = np.array([35, 37])
    # store pathes
    all_imgs= []
    all_subs = []
    for i in range(len(sub_names)):
        sub = sub_names[i]
        if sub in sub_no_sess2_test:
            sess_len = len(session_names) - 1
        else:
            sess_len = len(session_names)
        for j in range(sess_len):
            sess = session_names[j]
            for k in range(len(frame_names)):
                frame = frame_names[k]
                image, image_sub = GetImageWithSubName(sub, sess, frame)
                all_imgs.append(image)
                all_subs.append(image_sub)
    all_imgs = np.array(all_imgs)
    all_subs = np.array(all_subs)
    return all_imgs, all_subs

def PairWithIndex(all_subs):
    """
    Function:
        Pair each two images with the image indexes, then define labels.
    """
    imgs_ind = np.arange(len(all_subs))
    all_pairs = []
    # loop over each image with the rest images
    while len(imgs_ind) > 0:
        current_img_ind = imgs_ind[0]
        imgs_ind = np.delete(imgs_ind, 0)
        for another_img_ind in imgs_ind:
            if all_subs[current_img_ind] == all_subs[another_img_ind]:
                label = 1
            else:
                label = 0
            all_pairs.append([current_img_ind, another_img_ind, label])
    all_pairs = np.array(all_pairs)
    return all_pairs

def TrainValidSep(all_pairs, validation_rate = 0.01):
    """
    Function:
        Separate the dataset for training into train set and validation set.
    """
    same_pairs = all_pairs[all_pairs[:, 2] == 1]
    diff_pairs = all_pairs[all_pairs[:, 2] == 0]
    # shuffle two sets first
    np.random.shuffle(same_pairs)
    np.random.shuffle(diff_pairs)
    valid_same_len = int(validation_rate * len(same_pairs))
    valid_diff_len = int(validation_rate * len(diff_pairs))
    valid_set = np.concatenate([same_pairs[:valid_same_len,:],\
                               diff_pairs[:valid_diff_len,:]], axis = 0)
    train_set = np.concatenate([same_pairs[valid_same_len:,:],\
                               diff_pairs[valid_diff_len:,:]], axis = 0)
    # shuffle again for creating the final sets
    np.random.shuffle(train_set)
    np.random.shuffle(valid_set)
    return train_set, valid_set

def SameDiffSep(all_pairs):
    """
    Function:
        Seperate the current set into same face set and different face set.
    """
    same_pairs = all_pairs[all_pairs[:, 2] == 1]
    diff_pairs = all_pairs[all_pairs[:, 2] == 0]
    return same_pairs, diff_pairs
    
# get all images and corresponding subset names
train_images, train_subnames = GetAllData(sub_names_train, session_names, frame_names_train)
test_images, test_subnames = GetAllData(sub_names_test, session_names, frame_names_test)
# get all image pairs using index representation
all_train_pairs = PairWithIndex(train_subnames)
all_test_pairs = PairWithIndex(test_subnames)
# shuffle dataset
np.random.shuffle(all_test_pairs)
# get same and diff set of the sets for further usage
train_same_ind, train_diff_ind = SameDiffSep(all_train_pairs)
test_same_ind, test_diff_ind = SameDiffSep(all_test_pairs)
# separate the set into training set and validation set
train_ind, valid_ind =TrainValidSep(all_train_pairs)

print("-------------------------- dataset used for training and validation ----------------------")
print("The orginal set conatains totally \t", len(all_train_pairs), "\t pairs of images, \nwith \t\t\t\t\t", \
      len(train_same_ind), "\t\t pairs showing same faces \nand \t\t\t\t\t", \
      len(train_diff_ind), "\t\t pairs showing different faces.")

print("---------------------------------- dataset used for test ---------------------------------")
print("The test set conatains totally \t\t", len(all_test_pairs), "\t\t pairs of images, \nwith \t\t\t\t\t", \
      len(test_same_ind), "\t\t pairs showing same faces \nand \t\t\t\t\t", \
      len(test_diff_ind), "\t\t pairs showing different faces.")

print("----------------------------- training set and validation set ----------------------------")
print("After separation, the training set contains \t", len(train_ind), "\t pairs of images,\
        \nand validation set contrains \t\t\t", len(valid_ind), "\t\t pairs of images.")


-------------------------- dataset used for training and validation ----------------------
The orginal set conatains totally 	 105570 	 pairs of images, 
with 					 10350 		 pairs showing same faces 
and 					 95220 		 pairs showing different faces.
---------------------------------- dataset used for test ---------------------------------
The test set conatains totally 		 73536 		 pairs of images, 
with 					 8448 		 pairs showing same faces 
and 					 65088 		 pairs showing different faces.
----------------------------- training set and validation set ----------------------------
After separation, the training set contains 	 104515 	 pairs of images,        
and validation set contrains 			 1055 		 pairs of images.


In [2]:
from skimage.color import rgb2gray
from skimage.exposure import equalize_hist
from skimage.filters import roberts, sobel

def GrayGradient(img):
    """
    Function: 
        Find the gradient of the image after grayscaled
    """
    gray_img = rgb2gray(img)
    return sobel(gray_img)

def GrayHistSobel(img):
    """
    Function:
        Apply histogram equalization on the grayscaled image first,
    then find the gradient.
    """
    gray_img = rgb2gray(img)
    gray_hist = equalize_hist(gray_img)
    return sobel(gray_hist)

train_images_new = GrayHistSobel(train_images[0])

print(train_images_new.shape)

ValueError: The parameter `image` must be a 2-dimensional array