In [17]:
import pandas as pd
import pickle
import numpy as np


In [20]:


def get_multihot(int_num, arr_size=5):
    '''
    example: count = 3
    output array:    [1,1,1,0,0]

    '''
    
    int_num = max(1, min(arr_size, int_num))
    int_num = int_num - 1
    
    multi_hot = np.zeros(arr_size)
    for i in range(int_num):
        multi_hot[i] = 1

    return multi_hot

def min_max_bound(real_val, min_val, max_val):
    bounded_val = max(min_val, min(max_val, real_val))
    return bounded_val


def get_histogram_normalized(list_values):
    list_values = np.array(list_values)
    hist, bin = np.histogram(list_values, bins=np.arange(11))
    count = len(list_values)
    hist = hist/count
    # print(hist)
    return hist  

def create_features(data_list):

    Features_X = []
    Features_Target = []
    Features_imgpaths = []
    feat_cnt = 0

    min_val = 0
    max_val = 10
    

    total_dup = len(data_list)
    # total_dup = 1
    for idx in range(total_dup):
        dup_sets = data_list[idx]

        for img_data in dup_sets:
            # print(img_data.keys())

            ## Image Level Scores
            img_blur_score = img_data['image_level']['image_blur_score']
            kiss_score = img_data['image_level']['kiss_score']
            # print(img_blur_score, kiss_score)

            
            # print(img_data['face_level'])

            blur_scoring_list = []
            closed_eyes_list = []
            emotion_list = []
            eyegaze_list = []
            blur_classification_list = []
            pose_score_list = []
            
            for face_info_curr in img_data['face_level']:
                priority = face_info_curr['priority']
                bb = face_info_curr['bounding_box']
                blur_scoring = face_info_curr['blur_scoring']
                blur_class = face_info_curr['blur_class']
                blur_classification_raw = face_info_curr['blur_classification']

                # print("blur_classification_raw ", blur_classification_raw, type(blur_classification_raw) )
                ## encode blur classification
                blur_thr = 5

                if blur_classification_raw == '':
                    blur_classification_raw = 0
                    # print('********')
                
                if blur_classification_raw < blur_thr:
                    blur_classification = 0
                else:
                    blur_classification = 1
                    
                    
                closed_eyes = face_info_curr['closed_eyes']
                closed_eye_class = face_info_curr['closed_eye_class']
                eyegaze = face_info_curr['eyegaze']
                emotion = face_info_curr['emotion']
                pose_score = face_info_curr['pose_score']
                pose_ce = face_info_curr['pose_ce']
                pose_fs = face_info_curr['pose_fs']
                face_score = face_info_curr['face_score']

                if priority.lower() == "high" or priority.lower() == "mid":
                    blur_scoring_list.append(blur_scoring)
                    closed_eyes_list.append(closed_eyes)
                    emotion_list.append(emotion)
                    eyegaze_list.append(eyegaze)
                    blur_classification_list.append(blur_classification)
                    pose_score_list.append(pose_score)

            count_faces = len(blur_scoring_list)
            if count_faces > 0:
                avg_blur_score = np.mean(blur_scoring_list)
                median_blur_score = np.median(blur_scoring_list)
                max_blur_score = np.max(blur_scoring_list)
                min_blur_score = np.min(blur_scoring_list)
                std_blur_score = np.std(blur_scoring_list)

                avg_closed_eyes = np.mean(closed_eyes_list)
                median_closed_eyes = np.median(closed_eyes_list)
                max_closed_eyes = np.max(closed_eyes_list)
                min_closed_eyes = np.min(closed_eyes_list)
                std_closed_eyes = np.std(closed_eyes_list)

                avg_emotion_list = np.mean(emotion_list)
                median_emotion_list = np.median(emotion_list)
                max_emotion_list = np.max(emotion_list)
                min_emotion_list = np.min(emotion_list)
                std_emotion_list = np.std(emotion_list)

                avg_eyegaze = np.mean(eyegaze_list)

                avg_blur_classification = np.mean(blur_classification_list)
                median_blur_classification = np.median(blur_classification_list)
                max_blur_classification = np.max(blur_classification_list)
                min_blur_classification = np.min(blur_classification_list)
                std_blur_classification = np.std(blur_classification_list)
                
                avg_pose_score = np.mean(pose_score_list)
                median_pose_score = np.median(pose_score_list)
                max_pose_score = np.max(pose_score_list)
                min_pose_score = np.min(pose_score_list)
                std_pose_score = np.std(pose_score_list)


            # print('-->', face_info_curr.keys())

            ## Image Level Target
            user_selection_target = img_data['user_selection_target']

            # ------------------------------------------

            # Defining Features 
            feat_size = 37 # 40
            feat_vector = np.zeros(feat_size)

            f_idx = 0
            feat_vector[f_idx] = img_blur_score; f_idx+=1
            feat_vector[f_idx] = kiss_score; f_idx+=1

            if count_faces > 0:                
                # feat_vector[2] = count_faces  
                
                MAX_FACE_COUNT = 5
                feat_vector[f_idx: f_idx + MAX_FACE_COUNT] = get_multihot(count_faces, arr_size=MAX_FACE_COUNT) ## Max 5 faces
                f_idx = f_idx + MAX_FACE_COUNT + 1
                
                feat_vector[f_idx] = avg_blur_score; f_idx+= 1
                feat_vector[f_idx] = median_blur_score; f_idx+= 1
                feat_vector[f_idx] = max_blur_score; f_idx+= 1
                feat_vector[f_idx] = min_blur_score; f_idx+= 1
                feat_vector[f_idx] = std_blur_score; f_idx+= 1
                
                feat_vector[f_idx] = min_max_bound(avg_closed_eyes, min_val, max_val); f_idx+= 1
                feat_vector[f_idx] = min_max_bound(median_closed_eyes, min_val, max_val); f_idx+= 1
                feat_vector[f_idx] = min_max_bound(max_closed_eyes, min_val, max_val); f_idx+= 1
                feat_vector[f_idx] = min_max_bound(min_closed_eyes, min_val, max_val); f_idx+= 1
                feat_vector[f_idx] = min_max_bound(std_closed_eyes, min_val, max_val); f_idx+= 1

                # feat_vector[f_idx] = avg_emotion_list; f_idx+= 1
                # feat_vector[f_idx] = median_emotion_list; f_idx+= 1
                # feat_vector[f_idx] = max_emotion_list; f_idx+= 1
                # feat_vector[f_idx] = min_emotion_list; f_idx+= 1
                # feat_vector[f_idx] = std_emotion_list; f_idx+= 1

                emotion_hist = get_histogram_normalized(emotion_list)
                # print("emotion_hist ", emotion_hist.shape)
                feat_vector[f_idx: f_idx + len(emotion_hist)] = emotion_hist; f_idx = f_idx + len(emotion_hist) + 1
                

                feat_vector[f_idx] = avg_eyegaze; f_idx+= 1

                # print("blur class f_idx ", f_idx)
                feat_vector[f_idx] = avg_blur_classification; f_idx+= 1
                # feat_vector[f_idx] = median_blur_classification; f_idx+= 1
                feat_vector[f_idx] = max_blur_classification; f_idx+= 1
                # feat_vector[f_idx] = min_blur_classification; f_idx+= 1
                # feat_vector[f_idx] = std_blur_classification; f_idx+= 1

                # if avg_blur_classification < 5:
                #     blur_face_cls = 0
                # else:
                #     blur_face_cls = 1
                

                feat_vector[f_idx] = avg_pose_score; f_idx+= 1
                feat_vector[f_idx] = median_pose_score; f_idx+= 1
                feat_vector[f_idx] = max_pose_score; f_idx+= 1
                feat_vector[f_idx] = min_pose_score; f_idx+= 1
                feat_vector[f_idx] = std_pose_score; f_idx+= 1

            feat_vector = np.round(feat_vector, 1)

            # ------------------------------------------

            if user_selection_target:
                target_vector = 1.
            else:
                target_vector = 0.

            global_id = img_data['global_id']
            
            Features_X.append(feat_vector)
            Features_Target.append(target_vector)
            Features_imgpaths.append(global_id)

    # print("f_idx --> ", f_idx, "Feature size= ", feat_size)
    
    Features_X = np.asarray(Features_X)
    Features_Target = np.asarray(Features_Target)
    Features_imgpaths = np.asarray(Features_imgpaths)

    print("Features_X ", Features_X.shape)

    return Features_X, Features_Target, Features_imgpaths




In [21]:
file_read = open("/Users/apple/work/Culling/selection_algo/dumps/pickle_attributes_AssistedCull__25July.pkl", "rb")
scores_user_data = pickle.load(file_read)
print("Total duplicate sets1: ", len(scores_user_data))


feat_input_all, gt_all, gt_imgpaths_all = create_features(scores_user_data)

Total duplicate sets1:  14194
Features_X  (39287, 37)
