This is the code upto feature extraction for CASIA Dataset A

In [1]:
# Loading all the necessary modules
import os
import numpy as np
import cv2
from imageio import imread
from skimage.transform import resize
import math
import pandas as pd
import matplotlib.pyplot as plt
import pickle
from datetime import datetime

In [None]:
# Set the path to the main dataset folder containing all the subfolders and each subfolder contains the another array of subfolders and in these subfolders, we have silhouettes.

dataset_path = ""

# path to save pickle files of widths data along with the file name
pkl_file_path = ""

In [2]:
# A function to smooth the values
# Algorithm:
# It will first add the left neighbours upto the number set in the to_look variable 
# and similarly for the right neighbours
# Then divide the total sum by total elements added including the current indexed element
def smoothening(lst):
        
    to_return = []
    lst_length = len(lst)
    to_look = 2

    for ind, i in enumerate(lst):
        total_counts = 1
        total_sum = i
        l = ind - 1
        r = ind + 1

        # adding the left neigbours
        while l >= 0 and total_counts <= to_look + 1:
            total_sum += lst[l]
            l-=1
            total_counts+=1
        
        # adding the right neigbours
        while r < lst_length and total_counts <= 2*to_look + 1:
            total_sum += lst[r]
            r+=1
            total_counts+=1

        to_return.append(round(total_sum/total_counts,2))
    
    return to_return

In [3]:
# A function to get bounding boxes widths of all images
def bbox(images_path):

    # Bounding Box
    widths = []

    # getting all the images name
    images = os.listdir(images_path)

    for image in images:
        # Loading the images
        img = cv2.imread(f"{images_path}\\{image}")

        # Converting to grayscale image
        gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)

        # Denoising
        blur = cv2.GaussianBlur(gray,(7,7),0)
        blur = cv2.fastNlMeansDenoising(blur,None,30)

        # Setting the threshold
        thresh = cv2.threshold(blur,0,255,cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]

        # Finding the contours
        count = cv2.findContours(blur,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)

        # Selecting the correct contours
        count = count[0] if len(count) == 2 else count[1]
        
        # Sorting
        count = sorted(count, key = lambda x: cv2.boundingRect(x)[0])

        # used for discarding smaller useless bounding boxes
        bbox_length = [len(i) for i in count]
        bbox_length_avg = sum(bbox_length) / len(bbox_length)

        # Drawing the bounded box
        for c in count:
            
            # to discard small bounding boxes
            if len(c) < bbox_length_avg:
                continue

            # to draw the bounding box onto the image and storing the lengths and widths
            x,y,w,h = cv2.boundingRect(c)
            widths.append(w)
            cv2.rectangle(img,(x,y),(x+w,y+h),(36,255,12),2)

        # save resulting image
        # bbox_save_file_path = "C:\\Users\\hp\\Desktop\\Bbox"
        # cv2.imwrite(f"{bbox_save_file_path}\\{f}",img)
        
        # show thresh and result    
        #cv2.imshow("bounding_box", result)
        cv2.waitKey(0)
        cv2.destroyAllWindows()

    # Double Smoothening of the widths to remove unwanted spikes
    smoothen_widths = smoothening(smoothening(widths))
    
    # returning the smoothen widths
    return smoothen_widths

In [4]:
# Gait Cycle Estimation
# Algorithm:
# Step 1: Locate all local maxima
# Step 2: Remove all duplicates (plateau)
# Step 3: If more than 3 local maximas, then remove outliers (removing relatively smaller maximas)
# Step 4: Choose the best 3 consecutive local maximas and this is the 'one gait cycle'
# Step 5: Generating a list of frame names present in one gait cycle and returning it

def gait_cycle_estimation(smoothen_widths,folder,subFolder):

    # storing all the frame numbers which are local maxima
    local_maxs_args = []
    local_maxs = []

    # This string will contain all the information regarding each step of the algorithm
    # and will be added into the 'information1.txt' file for future use
    to_send = ""

    # --------------------------------------------------------------------------------------------
    # Step 1: Locate all local maxima
    for i in range(1, len(smoothen_widths)-1):
        if(smoothen_widths[i-1] < smoothen_widths[i] >= smoothen_widths[i+1]) or (smoothen_widths[i-1] <= smoothen_widths[i] > smoothen_widths[i+1]):
            local_maxs_args.append(i+1)
            local_maxs.append(smoothen_widths[i])

    # Adding the last the width if its greater, because loop ends before it
    last_index = len(smoothen_widths)-1
    if smoothen_widths[-1] > smoothen_widths[-2]:
        local_maxs_args.append(last_index+1)
        local_maxs.append(smoothen_widths[last_index])

    # Updating the information string
    to_send += f"\nBefore removing duplicates, Local Maxima: {local_maxs}"
    to_send += f"\nBefore removing duplicates, Local Maxima Args: {local_maxs_args}"
    print(f"Before removing duplicates, Local Maxima: {local_maxs}")
    print(f"Before removing duplicates, Local Maxima Args: {local_maxs_args}")

    # --------------------------------------------------------------------------------------------    
    # Step 2: Remove all duplicates
    temp1 = []
    temp2 = []
    for i in range(len(local_maxs)-1):
        if local_maxs[i] != local_maxs[i+1]:
            temp1.append(local_maxs[i])
            temp2.append(local_maxs_args[i])
    temp1.append(local_maxs[len(local_maxs)-1])
    temp2.append(local_maxs_args[len(local_maxs_args)-1])

    # Updating the values
    local_maxs = temp1
    local_maxs_args = temp2

    # Updating the information string
    to_send += f"\nAfter removing duplicates, Local Maxima: {local_maxs}"
    to_send += f"\nAfter removing duplicates, Local Maxima Args: {local_maxs_args}"
    print(f"After removing duplicates, Local Maxima: {local_maxs}")
    print(f"After removing duplicates, Local Maxima Args: {local_maxs_args}")

    # --------------------------------------------------------------------------------------------
    # Step 3: If more than 3 local maximas, then remove outliers
    if(len(local_maxs) > 3):
        temp3 = []
        temp4 = []
        maxima_mean = sum(temp1)/len(temp2)
        to_send += f"\nMean of maxima: {maxima_mean}"
        # print(f"Mean of maxima: {maxima_mean}")
        
        for ind, i in enumerate(temp1):
            if i+10 >= maxima_mean:
                temp3.append(i)
                temp4.append(temp2[ind])

        # Updating the values
        local_maxs = temp3
        local_maxs_args = temp4

        # Updating the information string
        to_send += f"\nAfter removing outliers, Local Maxima: {local_maxs}"
        to_send += f"\nAfter removing outliers, Local Maxima Args: {local_maxs_args}"
        print(f"After removing outliers, Local Maxima: {local_maxs}")
        print(f"After removing outliers, Local Maxima Args: {local_maxs_args}")

    # --------------------------------------------------------------------------------------------
    # Step 4: Choose the best 3 consecutive local maximas
    first_args = local_maxs_args[0]
    third_args = local_maxs_args[2]
    first_maxs = local_maxs[0]
    third_maxs = local_maxs[2]
    total_frames = third_args - first_args

    for i in range(2,len(local_maxs)-1):
        y = 0
        z = 0
        curr_frames = abs(local_maxs_args[i+1] - local_maxs_args[i-1])
        if local_maxs[i-1] > first_maxs:
            y = 1
        if local_maxs[i+1] > third_maxs:
            z = 1
        if curr_frames > total_frames and y + z >= 1:
            total_frames = curr_frames
            first = local_maxs_args[i-1]
            third = local_maxs_args[i+1]

    # --------------------------------------------------------------------------------------------
    # Step 5: Generating a list of frame names present in one gait cycle
    one_gait_cycle_frames = []
    for i in range(local_maxs_args[0],local_maxs_args[2]+1):
        frame_name = f"{folder}-{subFolder}-" + f"{1000 + i}"[1:] + ".png"
        one_gait_cycle_frames.append(frame_name)

    # Updating the information string
    to_send += f"\n\nFirst local maxima = {local_maxs[0]}, Frame no. = {one_gait_cycle_frames[0]}"
    to_send += f"\nThird local maxima = {local_maxs[2]}, Frame no. = {one_gait_cycle_frames[-1]}"
    to_send += f"\nTotal frames in the Gait Video: {len(smoothen_widths)}"
    to_send += f"\nTotal frames in one Gait Cycle: {local_maxs_args[2] - local_maxs_args[0] + 1}"
    
    print(f"\nFirst local maxima = {local_maxs[0]}, Frame no. = {one_gait_cycle_frames[0]}")
    print(f"Third local maxima = {local_maxs[2]}, Frame no. = {one_gait_cycle_frames[-1]}")
    print(f"Total frames in the Gait Video: {len(widths)}")
    print(f"Total frames in one Gait Cycle: {local_maxs_args[2] - local_maxs_args[0] + 1}")

    # --------------------------------------------------------------------------------------------
    # returning the information
    return one_gait_cycle_frames, to_send
    # --------------------------------------------------------------------------------------------

In [5]:
# GAIT Energy Image
def gait_energy_image(images_path,one_gait_cycle_frames):

    # reading the frames of one gait cycle
    images = [imread(f"{images_path}\\{f}") for f in one_gait_cycle_frames]

    # A function to calculate the mass centre
    def mass_center(img,is_round=True):

        y_mean = img.mean(axis=1)
        x_mean = img.mean(axis=0)
        y = np.sum(np.arange(y_mean.shape[0]) * y_mean)/np.sum(y_mean)
        x = np.sum(np.arange(x_mean.shape[0]) * x_mean)/np.sum(x_mean)
        
        if is_round:
            return int(round(x)),int(round(y))
        return x,y

    # A function to extract the body image
    def image_extract(img,newsize):
        x_s = np.where(img.mean(axis=0)!=0)[0].min()
        x_e = np.where(img.mean(axis=0)!=0)[0].max()
        
        y_s = np.where(img.mean(axis=1)!=0)[0].min()
        y_e = np.where(img.mean(axis=1)!=0)[0].max()
        
        x_c,_ = mass_center(img)
        x_s = x_c-newsize[1]//2
        x_e = x_c+newsize[1]//2
        
        img = img[y_s:y_e,x_s if x_s > 0 else 0:x_e if x_e < img.shape[1] else img.shape[1]]
        return resize(img,newsize)

    # extracting the body images for calculating the GEI
    row = 128
    col = 128
    images = [image_extract(i,(row,col)) for i in images]

    # Getting the GEI
    gei = np.mean(images,axis=0)

    # Displaying the GEI
    # plt.imshow(gei)

    return gei

In [6]:
#  Generating Gray-level Co-occurrence Matrix (GLCM) of GEI
def glcm(gei,row,col):

    gei_s = [np.round(i*255) for i in gei]
    p = np.zeros((256,256))

    dx_dy =[[1,1],[1,0],[1,-1],[0,-1],[-1,-1],[-1,0],[-1,1],[0,1]]

    # row = col = 128
    for x in range(row):
        for y in range(col):
            for d in dx_dy:
                if 0 <= x+d[0] <= 127 and 0 <= y+d[1] <= 127:
                    temp_x = gei_s[x][y]
                    temp_y = gei_s[x+d[0]][y+d[1]]
                    p[int(temp_x)][int(temp_y)] += 1
    return p

# Generating feature list from gei
def features(gei,row,col,folder,subFolder):
    # -------------------------------
    # Generating Gray-level Co-occurrence Matrix (GLCM) of GEI
    p = glcm(gei,row,col)
    ng = 256
    # -------------------------------
    # Angular Second Moment
    angular_moment = 0
    for x in range(ng):
        for y in range(ng):
            angular_moment += (p[x][y])**2
    # -------------------------------
    # Contrast 
    contrast = 0
    for r in range(ng-1):
        for i in range(ng):
            for j in range(ng):
                if abs(i-j) == r:
                    contrast += (r**2) * p[i][j]
    # -------------------------------
    #  Variance Function
    # first find mu
    u = 0
    for i in range(ng-1):
        for j in range(ng):
            u += i * p[i][j]
    # Variance
    variance = 0
    for i in range(ng-1):
        for j in range(ng):
            variance += (i-u)**2 * (p[i][j])
    # -------------------------------
    #  Inverse different Moment
    idm = 0
    for i in range(ng-1):
        for j in range(ng):
            idm += p[i][j] / (1 + (i-j)**2)
    # -------------------------------
    # Sum Average
    sum_average = 0
    for i in range(255):
        for j in range(256):
            sum_average += (i+j)*p[i][j]
    # -------------------------------
    #  Entropy
    entropy = 0
    for i in range(ng-1):
        for j in range(ng):
            if p[i][j] == 0:
                continue
            entropy += p[i][j] + math.log10(p[i][j])
    # -------------------------------
    # p(x+y)
    pxy = np.zeros(2*ng+1)
    for i in range(ng):
        for j in range(ng):
            pxy[i+j] += p[i][j]
    # -------------------------------
    # p(|x-y|)
    pxy_minus = np.zeros(ng-1)
    for i in range(ng-1):
        for j in range(255):
            pxy_minus[abs(i-j)] += p[i][j]
    # -------------------------------
    # sum variance
    sum_variance = 0
    for r in range(2*ng+1):
        sum_variance += ((r-sum_average)**2)*pxy[r]
    # -------------------------------
    # sum entropy
    sum_entropy = 0
    for r in range(2*ng+1):
        if pxy[r] != 0:
            sum_entropy += pxy[r] * math.log10(pxy[r])
    sum_entropy *= -1
    # -------------------------------
    # difference variance
    diff_var = 0
    for r in range(ng-1):
        sum = 0
        for l in range(ng-1):
            sum += l*pxy_minus[l]
        k = (r - sum)**2
        diff_var += k*pxy_minus[r]
    # -------------------------------
    # difference entropy
    diff_ent = 0
    for r in range(ng-1):
        if pxy_minus[r] != 0:
            diff_ent += pxy_minus[r]*(math.log10(pxy_minus[r]))
    # -------------------------------

    # Creating list and returning it
    # id,angular_moment,contrast,variance,idm,sum_average,entropy
    feature_list = [f"{folder}-{subFolder}",angular_moment,contrast,variance,idm,sum_average,entropy,sum_variance,sum_entropy,diff_var,diff_ent]
    return feature_list
    # -------------------------------

In [79]:
# # Finding the widths of bboxes and saving it for later use

# # GAIT dataset path
dataset_path = "F:\\Sixth Semester\\Capstone\\GaitDatasetA-silh"
Folders = os.listdir(dataset_path)

# A temp dict to hold all the widths data
widths_dict = {}

# # Opening a log file to log activites
with open('log.txt','a') as f:
    try:
        for folder in Folders:
            # Declaring the paths for folders and subfolders
            subfolders_path = f"{dataset_path}\\{folder}"
            subFolders = os.listdir(subfolders_path)
            
            # Selecting only the folders of 90 degree viewing angle image
            subFolders = subFolders[:4]

            for subFolder in subFolders:
                
                # loading the images
                images_path = f"{dataset_path}\\{folder}\\{subFolder}"

                print(f"Current subfolder: {folder}/{subFolder}")
                
                # getting the bounding box widths and saving it
                widths = bbox(images_path)
                
                # Saving the data into the temp dict
                widths_dict[f"{folder}/{subFolder}"] = widths
    
#     # Exception Handling
    except Exception as e:
        f.write(f"\n\n----------------------------------------------------")
        f.write(f"\n\nTime-stamp: {datetime.now()}\nException: {type(e).__name__}\nException message: {e}")

# # Saving the content of the temp dict into pickle file
# # with open('widths_ds1.pkl','wb') as p:
# #     pickle.dump(widths_dict, p)



Current subfolder: fyc/00_1


Current subfolder: fyc/00_2


Current subfolder: fyc/00_3


Current subfolder: fyc/00_4


Current subfolder: hy/00_1


Current subfolder: hy/00_2


Current subfolder: hy/00_3


Current subfolder: hy/00_4


Current subfolder: ljg/00_1


Current subfolder: ljg/00_2


Current subfolder: ljg/00_3


Current subfolder: ljg/00_4


Current subfolder: lqf/00_1


Current subfolder: lqf/00_2


Current subfolder: lqf/00_3


Current subfolder: lqf/00_4


Current subfolder: lsl/00_1


Current subfolder: lsl/00_2


Current subfolder: lsl/00_3


Current subfolder: lsl/00_4


Current subfolder: ml/00_1


Current subfolder: ml/00_2


Current subfolder: ml/00_3


Current subfolder: ml/00_4


Current subfolder: nhz/00_1


Current subfolder: nhz/00_2


Current subfolder: nhz/00_3


Current subfolder: nhz/00_4


Current subfolder: rj/00_1


Current subfolder: rj/00_2


Current subfolder: rj/00_3


Current subfolder: rj/00_4


Current subfolder: syj/00_1


Current subfolder: s

In [8]:
# Loading the widths data and extracting features from it

# GAIT dataset path
Folders = os.listdir(dataset_path)

# path to pickle file of widths data
pkl_path = pkl_file_path

# Dataframe details
columns_list = "id angular_moment contrast variance idm sum_average entropy sum_variance sum_entropy difference_variance difference_entropy".split()
df = pd.DataFrame(columns = columns_list)

# setting the widths and height of gei image
row = 128
col = 128

# setting the row counter
row_counter = 0

# Opening a log file to log activites
with open('log_temp.txt','a') as f, open('information_temp.txt','a') as i:
    try:
        # Loading the widths data from the pickle file 
        with open(pkl_path,'rb') as x:
            widths_dict = pickle.load(x)

        for folder in Folders:

            subfolders_path = f"{dataset_path}\\{folder}"
            subFolders = os.listdir(subfolders_path)
            
            # Selecting only the folders of 90 degree viewing angle image
            subFolders = subFolders[:4]

            for subFolder in subFolders:
                
                # # loading the images
                images_path = f"{dataset_path}\\{folder}\\{subFolder}"
                print(f"\nCurrent subfolder: {folder}/{subFolder}")
                
                # Fetching the widths data of current folder
                widths = widths_dict[f"{folder}/{subFolder}"]

                # gait cycle estimation
                gait_cycle_frames, to_send = gait_cycle_estimation(widths,folder,subFolder)
                
                # GEI generation
                gei = gait_energy_image(images_path,gait_cycle_frames)
                
                # feature extraction
                feature_list = features(gei,row,col,folder,subFolder)

                # # writing in the log file
                # i.write(f"\n\n----------------------------------------------------")
                # i.write(f"\n\n{row_counter}.) Current Row id: {folder}-{subFolder}")
                # i.write(f"\n\nSmoothen Widths length: {len(widths)}")
                # i.write(f"\nSmoothen Widths: {widths}")
                # i.write(f"\n{to_send}")

                # adding the new row to the dataframe
                new_row = pd.Series(feature_list, index = df.columns)
                df.loc[row_counter] = new_row
                row_counter+=1
                
    # Exception Handling 
    except Exception as e:
        f.write(f"\n\n----------------------------------------------------")
        f.write(f"\n\nTime-stamp: {datetime.now()}\nException: {type(e).__name__}\nException message: {e}")

    # # saving the dataframe
    # df.to_csv(f'features_ds1.csv',index = False)


Current subfolder: fyc/00_1
Before removing duplicates, Local Maxima: [66.44, 63.14, 68.17, 69.25, 63.03, 71.13]
Before removing duplicates, Local Maxima Args: [6, 19, 33, 46, 59, 72]
After removing duplicates, Local Maxima: [66.44, 63.14, 68.17, 69.25, 63.03, 71.13]
After removing duplicates, Local Maxima Args: [6, 19, 33, 46, 59, 72]
After removing outliers, Local Maxima: [66.44, 63.14, 68.17, 69.25, 63.03, 71.13]
After removing outliers, Local Maxima Args: [6, 19, 33, 46, 59, 72]

First local maxima = 66.44, Frame no. = fyc-00_1-006.png
Third local maxima = 68.17, Frame no. = fyc-00_1-033.png
Total frames in the Gait Video: 75
Total frames in one Gait Cycle: 28

Current subfolder: fyc/00_2
Before removing duplicates, Local Maxima: [71.14, 67.05, 69.55, 68.3, 61.86, 46.83]
Before removing duplicates, Local Maxima Args: [12, 25, 38, 51, 64, 73]
After removing duplicates, Local Maxima: [71.14, 67.05, 69.55, 68.3, 61.86, 46.83]
After removing duplicates, Local Maxima Args: [12, 25, 38,

KeyboardInterrupt: 