In [489]:
import cv2
import tensorflow as tf
import glob
import os
import numpy as np
import matplotlib.pyplot as plt
import xml.etree.ElementTree as ET
import random
import imutils
import shutil
from tqdm import tqdm
from PIL import Image, ImageOps, ImageEnhance, ImageFilter


class ObjectAugmentation():

    #############################
    # Bounding Box Augmentation #
    #############################

    # returns width, height, top left and bottom right coordinates of object contour's bounding box
    def bbox_info(self, contour, random_x_coord, random_y_coord):
        c = contour

        extreme_Left = tuple(c[c[:, :, 0].argmin()][0])
        extreme_Right = tuple(c[c[:, :, 0].argmax()][0])
        extreme_Top = tuple(c[c[:, :, 1].argmin()][0])
        extreme_Bottom = tuple(c[c[:, :, 1].argmax()][0])

        # coordinates wrt to contour
        x_start = extreme_Left[0]
        y_start = extreme_Top[1]
        x_end = extreme_Right[0]
        y_end = extreme_Bottom[1]

        # width and height of bounding box
        bbox_width = x_end - x_start
        bbox_height = y_end - y_start

        #coordinates of bbox wrt to background image
        x_start = random_x_coord + x_start
        y_start = random_y_coord + y_start
        x_end = x_start + bbox_width
        y_end = y_start + bbox_height

        return(x_start, y_start, x_end, y_end, bbox_width, bbox_height)


    # get xml path for labelled person
    def find_xml(self, img_fname):
        if '.jpg' in img_fname:
            fname = img_fname.split('.jpg')[0]
        elif '.png' in img_fname:
            fname = img_fname.split('.png')[0]
        xml_path = fname + '.xml'
        return xml_path

    
    # get coordinates of bbox in xml file
    def extract_coords(self, xml_path, obj_name):
        x_value = 1
        y_value = 1
        coords = []
        tree = ET.parse(xml_path)
        root = tree.getroot()
        for object in root.findall('object'):
            name = object.find('name').text
            if(name == obj_name):
                bbox = object.find('bndbox')
                # for cords in bbox:
                xmin = int(int((bbox.find('xmin').text))/x_value) #1.02 - 1.03 for side views
                ymin = int(int((bbox.find('ymin').text))/y_value)
                xmax = int(int((bbox.find('xmax').text))*x_value)
                ymax = int(int((bbox.find('ymax').text))*y_value)
                coords.append([xmin,ymin,xmax,ymax])
            
        return coords

    
    # returns normalized coordinates of bbox
    def normalize_coords(self, bbox_info, frame_width, frame_height):
        # get bbox information
        obj_x_start, obj_y_start, obj_x_end, obj_y_end, obj_width, obj_height = bbox_info
         
        # center points of object
        center_x = (obj_x_start + obj_x_end)/2
        center_y = (obj_y_start + obj_y_end)/2

        # normalize coordinates
        normalized_center_x = center_x/frame_width
        normalized_center_y = center_y/frame_height

        normalized_obj_width = obj_width/frame_width
        normalized_obj_height = obj_height/frame_height

        return [normalized_center_x, normalized_center_y, normalized_obj_width, normalized_obj_height]
    
    
    # creates rotation matrix to rotate bbox
    def rotation_matrix(self, img_center_point, augmented_img_dim, angle):
        #get new dimensions of augmented image
        new_img_width, new_img_height = augmented_img_dim

        #get center point coords
        img_center_x, img_center_y = img_center_point

        #get rotation matrix
        matrix = cv2.getRotationMatrix2D((img_center_x, img_center_y), angle, 1)

        #adjust rotation matrix to take into account translation
        matrix[0,2] += (new_img_width/2) - img_center_x
        matrix[1,2] += (new_img_height/2) - img_center_y

        return matrix
    
    # returns corner points of bbox: array of shape '1 x 8'
    def get_corner_points(self, bbox_info):
        #get bbox information
        xmin, ymin, xmax, ymax, _, _ = bbox_info
        
        #top left corner
        x1, y1 = xmin, ymin

        #top right corner
        x2, y2 = xmax, ymin

        #bottom left corner
        x3, y3 = xmin, ymax
        
        #bottom right corner
        x4, y4 = xmax, ymax
        corner_points = np.hstack((x1,y1,x2,y2,x3,y3,x4,y4))
        
        return corner_points
        
    # returns new corner points of bbox after augmentation: rotation will cause bbox to be at an angle so we will find the tightest upright box to enclose object
    def get_enclosed_box(self, corner_points):
        #get x and y coordinates separately
        x_ = corner_points[:,[0]]
        y_ = corner_points[:,[1]]
        
        #get the coordinates of upright bbox
        xmin = np.min(x_)
        ymin = np.min(y_)
        xmax = np.max(x_)
        ymax = np.max(y_)
        
        enclosed_box = np.hstack((xmin, ymin, xmax, ymax))
        
        return enclosed_box
        

    # returns new bbox coordinates given rotation angle
    # NOTE: reference: https://blog.paperspace.com/data-augmentation-for-object-detection-rotation-and-shearing/
    def rotate_bbox(self, bbox_info, augmentation):
        #get augmentation information
        augmented_img, augment = augmentation

        #get new width and height of augmented image
        new_img_width, new_img_height = augmented_img.size

        #get initial bbox information
        xmin, ymin, xmax, ymax, old_img_width, old_img_height = bbox_info

        #get center point of initial image: rotation is about the center point of entire image
        img_center_x, img_center_y = old_img_width//2, old_img_height//2

        #if augmentation is flipping object horizontally
        if augment == 'mirrored':
            new_xmin = old_img_width - bbox_info[2]
            new_xmax = old_img_width - bbox_info[0]
            bbox_info[0] = new_xmin
            bbox_info[2] = new_xmax
            return bbox_info
        
        elif type(augment) == int:
            angle = augment
        
        #create rotation matrix
        rot_matrix = self.rotation_matrix((img_center_x, img_center_y), augmented_img.size, angle)

        #get corner points of bbox in a 1x8 vector
        corner_points = self.get_corner_points(bbox_info)
        corner_points = corner_points.reshape(-1,2)
        corner_points = np.hstack((corner_points, np.ones((corner_points.shape[0],1))))

        #get new coordinates of bbox after rotation
        new_corner_points = np.dot(rot_matrix, corner_points.T).T

        #rotate bbox and get new bbox coordinates
        new_enclosed_bbox = self.get_enclosed_box(new_corner_points)
        
        #update bbox information
        new_bbox_info = list(new_enclosed_bbox)
        new_bbox_info.extend([new_img_width, new_img_height])
        
        return new_bbox_info



    # updates head bbox for each augmentation
    def update_head_bbox_info(self, initial_img_size, bg_size, head_bbox_file, random_coords, augmentations):
        #get generated random_coords
        random_x_coord, random_y_coord = random_coords

        #get background frame width and height
        frame_width, frame_height = bg_size

        #get coordinates of bbox from XML file
        xml_path = self.find_xml(head_bbox_file)
        coords = self.extract_coords(xml_path, 'head')[0]

        #rotate head bbox and update latest bbox information
        coords.extend(initial_img_size)
        new_bbox_info = coords

        for augmentation in augmentations:
            new_bbox_info = self.rotate_bbox(new_bbox_info, augmentation)
        
        #coordinates of bbox wrt background image
        bbox_width = abs(new_bbox_info[2] - new_bbox_info[0])
        bbox_height = abs(new_bbox_info[3] - new_bbox_info[1])

        new_bbox_info[0] = random_x_coord + new_bbox_info[0]     #x_start = random_x + xmin
        new_bbox_info[1] = random_y_coord + new_bbox_info[1]     #y_start = random_y + ymin
        new_bbox_info[2] = new_bbox_info[0] + bbox_width   #x_end = x_start + bbox_width
        new_bbox_info[3] = new_bbox_info[1] + bbox_height  #y_end = y_start + bbox_height
        
        new_bbox_info[4], new_bbox_info[5] = bbox_width, bbox_height
        
        #normalize head bbox coords
        norm_head_bbox_info = self.normalize_coords(new_bbox_info, frame_width, frame_height)

        norm_head_bbox_info.insert(0, 0) #head class is index 0
        return norm_head_bbox_info


    # returns YOLO information given object mask for augmentation
    def get_yolo_information(self, obj_mask, random_x_coord, random_y_coord, frame_width, frame_height):
        # get contour of object
        height, width = obj_mask.shape[:2]
        obj_mask = cv2.resize(obj_mask, (height, width)) #resize mask to get correct bbox size
        contour = self.object_contour(obj_mask)

        # get dimensions and coordinates of object: obj_x_start, obj_y_start, obj_x_end, obj_y_end, obj_width, obj_height
        bbox_info = self.bbox_info(contour, random_x_coord, random_y_coord)
        
        # get normalized bbox info
        norm_bbox_info = self.normalize_coords(bbox_info, frame_width, frame_height)

        return norm_bbox_info



    # check validity of coordinates: if center point is out of image, disregard entire yolo information 
    def verify_yolo_information(self, yolo_info):
        label, center_x, center_y, bbox_width, bbox_height = yolo_info
        if type(label) != int:
            return False
        if center_x>1 or center_x<0:
            return False
        if center_y>1 or center_y<0:
            return False
        
        return True

    #########################
    # Tel-Aviv Augmentation #
    #########################

    # initialize smallest and largest object sizes and boundary coordinates given a background image
    def initialize_sizes_and_coordinates(self, bg_path, scale=1, TAV_cam_view=None, same_environment=None):
        #NOTE: for same_environment objects are scaled according to Tel-Aviv frame dimension: 1920 x 1080 (height, width)
        #      Thus, any other dimensions will be scaled to TAV frame dimension
        #NOTE: np.array([width, height])

        if not same_environment:
            if not TAV_cam_view: #i.e generic bg images
                # get width and height of background image
                bg_width, bg_height = Image.open(bg_path).size

                # get ratios of width and height
                width_ratio, height_ratio = (bg_width/1080), (bg_height/1920)
                nearest_obj = np.array([60*width_ratio , 150*height_ratio])*scale #closest to cam
                furthest_obj = np.array([20*width_ratio, 50*height_ratio])*scale #furthest to cam     

                boundary_x, boundary_y = 0, 0
            
            elif 'cam1' in TAV_cam_view: #nearest: leftside, furthest: rightside
                nearest_obj = np.array([150 , 165])*scale #leftside
                furthest_obj = np.array([120 , 135])*scale #rightside

                boundary_x, boundary_y = 0, 400

            elif 'cam2' in TAV_cam_view:
                nearest_obj = np.array([120 , 135])*scale #leftside
                furthest_obj = np.array([80 , 95])*scale #rightside

                boundary_x, boundary_y = 0, 600

            elif 'cam3' in TAV_cam_view or 'cam4' in TAV_cam_view:
                nearest_obj = np.array([150 , 200])*scale #bottom
                furthest_obj = np.array([30, 60])*scale #top
            
                if 'cam3' in TAV_cam_view:
                    boundary_x, boundary_y = 900, 850
                else:

                    boundary_x, boundary_y = 950, 850

            return (nearest_obj, furthest_obj, boundary_x, boundary_y)

        else:
            # all same_environment objects are scaled to TAV already
            nearest_obj = None
            furthest_obj =  None 
            if not TAV_cam_view:
                boundary_x, boundary_y = 200, 200
            elif 'cam1' in TAV_cam_view:
                boundary_x, boundary_y = 30, 400
            elif 'cam2' in TAV_cam_view:
                boundary_x, boundary_y = 30, 400
            elif 'cam3' in TAV_cam_view:
                boundary_x, boundary_y = 1100, 650
            elif 'cam4' in TAV_cam_view:
                boundary_x, boundary_y = 750, 850
            
            return (nearest_obj, furthest_obj, boundary_x, boundary_y)

    # returns random coordinates
    def random_coords(self, frame_width, frame_height, boundary_x, boundary_y, TAV_cam_view=None):
        if not TAV_cam_view:
            x, y = random.randint(boundary_x, frame_width-200), random.randint(boundary_y, frame_height-200)
        elif 'cam1' in TAV_cam_view:
            x, y = random.randint(boundary_x, frame_width-400), random.randint(boundary_y, 900)
        elif 'cam2' in TAV_cam_view:
            x, y = random.randint(boundary_x, 1500), random.randint(boundary_y, 850)
        
        elif 'cam3' in TAV_cam_view:
            x, y = random.randint(boundary_x, frame_width-200), random.randint(100, boundary_y)
        
        elif 'cam4' in TAV_cam_view:
            x, y = random.randint(1, boundary_x), random.randint(100, boundary_y)
        return (x,y)



    # returns desired size of object given random coords
    def desired_size(self, random_coords, nearest_obj, furthest_obj, frame_width, frame_height, TAV_cam_view=None):
        # get x-y coordinates
        x,y = random_coords
        if not TAV_cam_view:
            size = ((y/frame_height) * (nearest_obj- furthest_obj)) + furthest_obj
            size = tuple(map(int,size))
        elif 'cam1' in TAV_cam_view or 'cam2' in TAV_cam_view: #sideview
            size = ((x/frame_width) * (nearest_obj- furthest_obj)) + furthest_obj
            size = tuple(map(int,size))
        elif 'cam3' in TAV_cam_view or 'cam4' in TAV_cam_view: #front-back view
            size = ((y/frame_height) * (nearest_obj- furthest_obj)) + furthest_obj
            size = tuple(map(int,size))
        return size


    # returns new resized image based on desired width: for cam1 and cam2
    def image_resize_by_width(self, img, desired_width):
        orig_width, orig_height = img.size
        ratio = desired_width/orig_width
        
        new_size = (desired_width, int(orig_height*ratio))

        new_img = img.resize(new_size)
        return new_img



    # returns new resized image based on desired height: for cam3 and cam4
    def image_resize_by_height(self, img, desired_height):
        orig_width, orig_height = img.size
        ratio = desired_height/orig_height

        new_size = (int(orig_width*ratio), desired_height)

        new_img = img.resize(new_size)
        return new_img



    # returns resized image based on cam_view
    def image_resize(self, img, desired_size, TAV_cam_view=None):
        if not TAV_cam_view:
            return self.image_resize_by_height(img, desired_size[1])
        elif 'cam1' in TAV_cam_view or 'cam2' in TAV_cam_view:
            return self.image_resize_by_width(img, desired_size[0])
        
        elif 'cam3' in TAV_cam_view or 'cam4' in TAV_cam_view:
            return self.image_resize_by_height(img, desired_size[1])




    #################################
    # Augmentation Helper Functions #
    #################################

    # ensures image format is PNG when running opencv operations
    def png_format(self, img, pixel_threshold):

        if type(img) is str: #if image path
            filename = img.split('\\')[-1]
            if 'png' in filename: #if image is PNG
                img = cv2.imread(img, -1)  #imread_unchanged: read image and include alpha channel
                return img

            else:
                jpg_or = cv2.imread(img)


        else: # if image object (PIL format)
            img_array = np.array(img)

            if img_array.shape[2] == 4: # if image object is PNG
                return img_array
            
            else: #image object is RGB: 3 channels
                jpg_or = img_array
        
        z = np.ones(jpg_or.shape[:-1] + (1,), dtype=jpg_or.dtype)
        z = z*255
        jpg = np.concatenate((jpg_or, z), axis=-1)


        white_pixels = np.where(
            (jpg[:, :, 0] >= pixel_threshold) & 
            (jpg[:, :, 1] >= pixel_threshold) & 
            (jpg[:, :, 2] >= pixel_threshold))

        for x,y in zip(white_pixels[0],white_pixels[1]):
            jpg[x,y,3] = 0

        png = cv2.cvtColor(jpg, cv2.COLOR_BGR2BGRA)

        return png



    # condition for overlapping object with other objects
    def overlap_coords(self, non_overlap_size_lst, threshold = 1):
        #NOTE: threshold value determines how much overlapping is allowed (1: 0% overlap allowed, 0: 100% overlap allowed)

        if len(non_overlap_size_lst) == 1:
            return False

        else:
            # get current size and coordinates
            current_size, current_coords = non_overlap_size_lst[-1]

            # desired width and desired height of current size
            current_width, current_height = current_size

            # get current coordinates
            current_x, current_y = current_coords #current random coordinates

            # get width and height thresholds
            width_threshold, height_threshold = current_width*threshold, current_height*threshold

            # check that current size satisfy overlapping condition
            for indx in range(len(non_overlap_size_lst)-1):
                # get past coordinates
                past_coords = non_overlap_size_lst[indx][1]
                past_x, past_y = past_coords

                # get difference in width and height
                width_diff, height_diff = abs(current_x - past_x), abs(current_y - past_y)

                #if difference is smaller than threshold: too much overlapping
                if width_diff < width_threshold or height_diff < height_threshold:
                    non_overlap_size_lst.pop(-1) #remove current coordinates
                    return True
            
            return False


    # returns random coordinates and desired size of object given those coordinates
    def get_random_coords_and_size(self, obj_path, non_overlap_size_lst, frame_width, frame_height, nearest_obj, furthest_obj, boundary_x, boundary_y, TAV_cam_view=None, same_environment = False):
        # get image in PIL format
        orig_img = Image.open(obj_path)

        # initialize random coordinates
        random_coordinates = self.random_coords(frame_width, frame_height, boundary_x, boundary_y, TAV_cam_view)

        # initialize desired size given random coordinates
        if not same_environment:

            size = self.desired_size(random_coordinates, nearest_obj, furthest_obj, frame_width, frame_height, TAV_cam_view)
        
            # resize image based on desired size
            img = self.image_resize(orig_img, size, TAV_cam_view)
            size = img.size

        else:
            size = orig_img.size

        # keep track of non-overlapping objects
        non_overlap_size_lst.append((size, random_coordinates))

        # condition to ensure objects do not overlap too much
        counter = 100000
        while self.overlap_coords(non_overlap_size_lst) and counter!=0:
            # re-initialize coordinates
            random_coordinates = self.random_coords(frame_width, frame_height, boundary_x, boundary_y, TAV_cam_view)

            #re-initialize desired size
            if not same_environment:
                size = self.desired_size(random_coordinates, nearest_obj, furthest_obj, frame_width, frame_height, TAV_cam_view)
                
                # resize image based on re-initialized desired size
                img = self.image_resize(orig_img, size, TAV_cam_view)
                size = img.size

            counter-=1
            if counter==0:
                print('unable to find a non-overlapping point')

        return random_coordinates, size, non_overlap_size_lst



    # returns randomly augmented object image: version 1 -> non-living objects
    def random_augmentv1(self, image):
        yes_or_no = ['Yes', 'No']

        if random.choice(yes_or_no) == 'Yes':
            image = ImageOps.mirror(image) #flip horizontally

        if random.choice(yes_or_no) == 'Yes':
            if np.array(image).shape[2] == 4: #if image is PNG, i.e 4 channels
                brightness_factor = random.uniform(0.5,1.5) #adjust brightness
                image = ImageEnhance.Brightness(image).enhance(brightness_factor) 

        if random.choice(yes_or_no) == 'Yes':
            angle = random.randint(1, 20) #rotate image counter-clockwise
            if np.array(image).shape[2] == 3: #if image is JPEG, i.e 3 channels
                image = image.rotate(angle, Image.NEAREST, fillcolor='white', expand=True)
            elif np.array(image).shape[2] == 4: #if image is PNG, i.e 4 channels
                image = image.rotate(angle, Image.NEAREST, expand=True)

        # if random.choice(yes_or_no) == 'Yes':
        #     if np.array(image).shape[2] == 4: #if image is PNG, i.e 4 channels
        #         contrast_factor = random.uniform(0.5, 1.5) #adjust contrast
        #         image = ImageEnhance.Contrast(image).enhance(contrast_factor)

        if random.choice(yes_or_no) == 'Yes':
            image = image.filter(ImageFilter.BoxBlur(1)) #add box blurring, radius size = 1
        
        # if random.choice(yes_or_no) == 'Yes':
        #     factor = random.randint(-5,5)
        #     image = ImageEnhance.Color(image).enhance(factor) #adjust saturation

        return image


    # returns randomly augmented object image: version 2 -> living objects
    def random_augmentv2(self, image, bg_image, random_coords, head_bbox_file=None, fallen_person = False):
        yes_or_no = ['Yes', 'No']
        augmentations = []

        # initial image and background sizes
        initial_img_size = image.size
        bg_size = bg_image.size

        if fallen_person: #rotate upright object on its side to mimick fallen object
            if np.array(image).shape[2] == 3:
                image = image.rotate(90, Image.NEAREST, fillcolor = 'white', expand= True)
            elif np.array(image).shape[2] == 4: #if image is PNG, i.e 4 channels
                image = image.rotate(90, Image.NEAREST, expand =True)
            augmentations.append((image, 90))

        if random.choice(yes_or_no) == 'Yes':
            image = ImageOps.mirror(image) #flip horizontally
            augmentations.append((image,'mirrored'))

        if random.choice(yes_or_no) == 'Yes':
            if np.array(image).shape[2] == 3:
                angle = random.randint(-20, 20)
                image = image.rotate(angle, Image.NEAREST, fillcolor = 'white', expand = True) #rotate person at a slight angle
                augmentations.append((image,angle))
            elif np.array(image).shape[2] == 4: #if image is PNG, i.e 4 channels
                image = image.rotate(90, Image.NEAREST, expand =True)
            augmentations.append((image, angle))
            
        # if random.choice(yes_or_no) == 'Yes':
        #     if np.array(image).shape[2] == 4: #if image is PNG, i.e 4 channels
        #         contrast_factor = random.uniform(0.5, 1.5) #adjust contrast
        #         image = ImageEnhance.Contrast(image).enhance(contrast_factor)

        if head_bbox_file:
            head_bbox_info = self.update_head_bbox_info(initial_img_size, bg_size, head_bbox_file, random_coords, augmentations)
            return image, head_bbox_info

        return image

    # returns roi of background given object and random coords
    def bg_roi_img(self, resized_obj_img, bg_img, random_x_coord, random_y_coord):

        # convert PIL format to array for opencv operations
        bg = np.array(bg_img)
        
        # dimesions of resized object image
        width, height = resized_obj_img.size

        # get desired coordinates of ROI in background image to place object
        desired_x_start = random_x_coord
        desired_y_start = random_y_coord
        desired_x_end = desired_x_start + width
        desired_y_end = desired_y_start + height
        
        # crop background image based on ROI coordinates
        roi = bg[desired_y_start:desired_y_end, desired_x_start:desired_x_end]
    
        #change width, height to height, width
        roi = cv2.resize(roi, (height, width))

        return roi #opencv format



    # returns contour of object in image
    def object_contour(self, obj_mask):
        # find all contours from object mask
        contours = cv2.findContours(obj_mask.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        contours = imutils.grab_contours(contours)

        # get largest contour: assuming it is the object
        contour = max(contours, key = cv2.contourArea)
        return contour


    
    # returns object mask and inverted mask
    def obj_mask_and_inverted_mask(self, resized_img):
        # dimesions of resized object given random coords
        width, height = resized_img.size

        # read image using opencv to perform bitwise operations
        object = self.png_format(resized_img, 240)
        object = cv2.resize(object, (height, width))

        # create object mask and inverted mask
        obj_mask = object[:,:,3]
        obj_mask_inverted = cv2.bitwise_not(obj_mask) #returns one’s complement of the number


        return (obj_mask, obj_mask_inverted) #opencv format



    # returns object background and foreground images given object mask and inverted mask
    def object_foreground_and_background(self, obj_mask, obj_mask_inverted, roi, resized_object_img):
        # dimesions of resized object given random coords
        width, height = resized_object_img.size

        # read image and convert to BGR for opencv operations
        object = self.png_format(resized_object_img, 240)
        object = cv2.resize(object, (height, width))
        object = cv2.cvtColor(object, cv2.COLOR_RGB2BGR)
        
        # get bg and fg images
        bg = cv2.bitwise_and(roi, roi, mask = obj_mask_inverted)
        fg = cv2.bitwise_and(object, object, mask = obj_mask)

        # convert foreground to RGB
        fg = cv2.cvtColor(fg, cv2.COLOR_BGR2RGB)

        return(bg, fg) #opencv format




    # returns object img with selected background
    def img_with_mask(self, resized_object_img, bg_img, random_coordinates, frame_width, frame_height, TAV_cam_view = None):
        # get x-y coordinates
        random_x_coord, random_y_coord = random_coordinates

        # get ROI of background image
        bg_roi = self.bg_roi_img(resized_object_img, bg_img, random_x_coord, random_y_coord)
        
        # get object mask and inverted mask
        obj_mask, obj_mask_inverted = self.obj_mask_and_inverted_mask(resized_object_img)
        
        # get YOLO information
        yolo_info = self.get_yolo_information(obj_mask, random_x_coord, random_y_coord, frame_width, frame_height)

        # get object background and foreground images
        obj_bg, obj_fg = self.object_foreground_and_background(obj_mask, obj_mask_inverted, bg_roi, resized_object_img)

        # combine obj bg and fg images together
        img_with_mask = cv2.add(obj_bg, obj_fg)
        img_with_mask = Image.fromarray(np.uint8(img_with_mask))
        img_with_mask = img_with_mask.resize(resized_object_img.size) #opencv height/width to PIL height/width

        return (img_with_mask, yolo_info)



    # overlays object on background
    def overlay_img_on_bg(self, img_with_mask, bg_img, random_coords):
        bg_img.paste(img_with_mask, random_coords)
        return bg_img


    # converts RGBA PNG to RGB
    def format_rgb(self, image):
        background = Image.new('RGB', image.size, (255, 255, 255))
        background.paste(image, mask = image.split()[3])
        return background
    

    


    ##################################
    # Preprocessing Helper Functions #
    ##################################

    # creates text file for each background image
    def create_yolo_txt_file(self, yolo_lst, save_bg_file_name, YOLO_txt = False):
        save = save_bg_file_name + '.txt'
        if YOLO_txt:
            with open(save, "w") as txt_object:
                for yolo_info in yolo_lst:
                    for i in range(len(yolo_info)):
                        txt_object.write(str(yolo_info[i]))
                        if i < len(yolo_info)-1:
                            txt_object.write(' ')
                    
                    txt_object.write('\n')


    # returns a list of sampled paths
    def sample_objects(self, sub_obj_folder_path, num_of_objects):
        # load object list
        lst = os.listdir(sub_obj_folder_path)
        jpg_filter = filter(lambda x:x[-4:]== '.jpg' or x[-4:] == '.png', lst)
        lst = list(jpg_filter)
        
        if num_of_objects>len(lst):
            num_of_objects = random.randint(1,len(lst))
        else:
            num_of_objects = num_of_objects

        # randomly sample object paths
        sampled_object_paths = random.sample(lst, num_of_objects)

        # get list of full-path objects
        for indx in range(len(sampled_object_paths)):
            path = sampled_object_paths[indx]
            fullpath = sub_obj_folder_path + '\\' + path
            sampled_object_paths[indx] = fullpath

        return sampled_object_paths



    # get list of sampled object paths 
    def get_object_paths(self, main_object_folder_path, num_of_objects, same_environment = None):
        lst_object_paths = []
        lst_names = []

        # if path is list: list of different object folder paths
        if type(main_object_folder_path) is list:

            # iterate through each object folder path
            for obj_folder in main_object_folder_path:
    
                foldername = obj_folder.split('\\')[-1]
                
                #only consider objects from same cam view: e.g object: cam1\obj_name , bg: cam1\bg_name
                if same_environment and foldername in same_environment:
                    sampled_object_paths = self.sample_objects(obj_folder, num_of_objects)
                    lst_names.append(foldername)
                    return (sampled_object_paths, lst_names)
                
                # sample each sub folder
                sampled_object_paths = self.sample_objects(obj_folder, num_of_objects)

                lst_object_paths.extend(sampled_object_paths)
                lst_names.append(foldername)

            # sample all object paths: to get a variety of different objects
            main_sampled_object_paths = random.sample(lst_object_paths, num_of_objects)

            # sort object name types in alpha-numeric order
            lst_names.sort()
            return (main_sampled_object_paths, lst_names)

        # if path is string: 1 object folder path
        elif type(main_object_folder_path) is str:
            foldername = main_object_folder_path.split('\\')[-1]
            
            #only consider persons from same cam view
            if same_environment and foldername in same_environment:
                sampled_object_paths = self.sample_objects(main_object_folder_path, num_of_objects)
                lst_names.append(foldername)
                return (sampled_object_paths, lst_names)
            
            # sample object folder
            sampled_object_paths = self.sample_objects(main_object_folder_path, num_of_objects)

            lst_object_paths.extend(sampled_object_paths)
            lst_names.append(foldername)
            return (lst_object_paths, lst_names)
        else:
            print('invalid object path!')



    ########################
    # Executable Functions #
    ########################

    # returns augmented background image with specified number of objects 
    def augmented_bg_with_objects(self, bg_path, num_of_objects, object_folder_path, same_environment=False):
        
        # create YOLO list for YOLO text file
        YOLO_lst = []

        # get filename of background
        bg_name = bg_path.split('\\')[-1]
        if 'cam' in bg_name:
            TAV_cam_view = bg_name
        else:
            TAV_cam_view = None

        # get list of object image paths
        sampled_object_paths, YOLO_CLASSES = self.get_object_paths(object_folder_path, num_of_objects, same_environment=TAV_cam_view)
        
        # background image dimension
        bg_img = Image.open(bg_path).convert('RGB')
        frame_width, frame_height = bg_img.size

        # create non-overlapping coordinates list
        non_overlap_size_lst = []

        for obj_path in sampled_object_paths:
            # object class name
            class_name = obj_path.split('\\')[-2]

            # initialize smallest and largest object sizes and boundary coordinates
            nearest_obj, furthest_obj, boundary_x, boundary_y = self.initialize_sizes_and_coordinates(bg_path, scale=1, TAV_cam_view = TAV_cam_view, same_environment=same_environment)

            # get random coordinates and desired size given those coordinates and updated coordinates list
            random_coordinates, desired_obj_size, non_overlap_size_lst = self.get_random_coords_and_size(obj_path, non_overlap_size_lst, frame_width, frame_height, nearest_obj, furthest_obj, boundary_x, boundary_y, TAV_cam_view, same_environment=same_environment)

            # resize object image based on desired size
            object_img = Image.open(obj_path)

            if not same_environment:
                resized_object_img = self.image_resize(object_img, desired_obj_size, TAV_cam_view)
            else:
                #NOTE: seems like strollers were shrunk by half its original size, so resizing is needed here
                resized_object_img = object_img.resize((object_img.size[0]*2,object_img.size[1]*2))

            # randomly augment resized object
            resized_object_img = self.random_augmentv1(resized_object_img)

            # obtain image with mask
            img_mask, yolo_info = self.img_with_mask(resized_object_img, bg_img, random_coordinates, frame_width, frame_height, TAV_cam_view)
            
            # overlay object on background
            bg_img = self.overlay_img_on_bg(img_mask, bg_img, random_coordinates)

            # insert CLASSID into yolo_info list
            yolo_info.insert(0, YOLO_CLASSES.index(class_name))
            YOLO_lst.append(yolo_info)
        YOLO_lst = []
        #convert to RGB format
        # try:
        #     bg_img = format_rgb(bg_img)
        # except:
        #     pass
        
        return bg_img, YOLO_lst


    # generate fallen person on background
    def augmented_bg_with_fallen_person(self, bg_path, num_of_objects, obj_folder_path):
        # create YOLO list for YOLO text file
        YOLO_lst = []

        # get filename of background
        bg_name = bg_path.split('\\')[-1]
        if 'cam' in bg_name:
            TAV_cam_view = bg_name
        else:
            TAV_cam_view = None
        
        # get list of object image paths
        sampled_object_paths, YOLO_CLASSES = self.get_object_paths(obj_folder_path, num_of_objects, same_environment=TAV_cam_view)

        # background image dimension
        bg_img = Image.open(bg_path).convert('RGB')
        frame_width, frame_height = bg_img.size

        # create non-overlapping coordinates list
        non_overlap_size_lst = []

        for obj_path in sampled_object_paths:
            # object class folder name
            class_name = obj_path.split('\\')[-2]
            
            # initialize smallest and largest object sizes and boundary coordinates
            nearest_obj, furthest_obj, boundary_x, boundary_y = self.initialize_sizes_and_coordinates(bg_path, scale=1, TAV_cam_view=TAV_cam_view, same_environment=True)

            # get random coordinates and desired size given those coordinates and updated coordinates list
            random_coordinates, desired_obj_size, non_overlap_size_lst = self.get_random_coords_and_size(obj_path, non_overlap_size_lst, frame_width, frame_height, nearest_obj, furthest_obj, boundary_x, boundary_y, TAV_cam_view, same_environment = True)

            # resize object image based on desired size
            object_img = Image.open(obj_path)
            # resized_object_img = self.image_resize(object_img, desired_obj_size, TAV_cam_view)

            # augment resized object
            resized_object_img, head_yolo_info = self.random_augmentv2(object_img, bg_img, random_coordinates, head_bbox_file=obj_path, fallen_person=True)

            # obtain image with mask
            img_mask, person_yolo_info = self.img_with_mask(resized_object_img, bg_img, random_coordinates, frame_width, frame_height, TAV_cam_view)
            
            # overlay object on background
            bg_img = self.overlay_img_on_bg(img_mask, bg_img, random_coordinates)

            # insert CLASSID into yolo_info list
            person_yolo_info.insert(0, 1) #insert index '1' since 'person' is index '1' in YOLOv4PA
            
            # append information to YOLO list if information is valid
            if self.verify_yolo_information(person_yolo_info):
                YOLO_lst.append(person_yolo_info)

            if self.verify_yolo_information(head_yolo_info):    
                YOLO_lst.append(head_yolo_info)
                
        #convert to RGB format
        # try:
        #     bg_img = format_rgb(bg_img)
        # except:
        #     pass
        
        return bg_img, YOLO_lst
    

    # generate augmented backgrounds with objects
    def generate_augmented_backgrounds(self, num_of_augmented_frames, num_of_objects, obj_folder_path, bg_images_path, save_path, YOLO_txt = False, fallen_person = None):
        # num_of_augmented_frames: specify number of frames to be augmented
        # num_of_objects: specified number of objects if <10, else, randomly generate number of objects to put inside each bg image
        # obj_folder_path: path for folder of objects
        # bg_images_path: path for folder of backgrounds
        # save_path: path to save augmented images
        # YOLO_txt: True if generating yolo text files, else False
        
    

        
        for i in tqdm(range(num_of_augmented_frames)):
            # get list of background image paths
            lst_bg_paths = os.listdir(bg_images_path)

            # randomly sample background image in the list
            bg_name = random.choice(lst_bg_paths)
            bg_path = bg_images_path + '\\' + bg_name
            bg_name = bg_name.split('.')[0]

            # complete path to save background image
            save_bg_name = save_path + '\\' + f'{bg_name}_{i}'

            #NOTE: there are occasions where random coordinates generated contain a 0, thus we avoid an error by implementing Try/Except
            try: 
                # get augmented background and YOLO list
                if fallen_person:
                    bg_img,YOLO_lst = self.augmented_bg_with_fallen_person(bg_path, num_of_objects, obj_folder_path)

                else:
                    bg_img, YOLO_lst = self.augmented_bg_with_objects(bg_path, num_of_objects, obj_folder_path, same_environment=False)

                # save augmented background (and YOLO txt file if activated)
                bg_img.save(save_bg_name + '.jpg')
                self.create_yolo_txt_file(YOLO_lst, save_bg_name, YOLO_txt)
            except Exception as e:
                print(e)
                pass



    

In [507]:
########################################################################################
# TESTING
bg_path = r"D:\Daniel\PMD\YOLOv4\falling_person\TAV_backgrounds\night_TAV_backgrounds\cam1_night.jpg"
obj_folder_path = r"D:\Daniel\PMD\YOLOv4\falling_person\labelled_pedestrians\night\cam1"
obj_folder_path = r"D:\Daniel\PMD\YOLOv4\YOLOv4_negatives\PMDs\pmd_pngs\bicycle_all"
# img, lst = ObjectAugmentation().augmented_bg_with_fallen_person(bg_path, 1, obj_folder_path)
# img, lst = ObjectAugmentation().augmented_bg_with_objects(bg_path, 2, obj_folder_path)
# plt.imshow(img)
#########################################################################################

bg_images_paths = [ #dogs
                    r"D:\Daniel\PMD\YOLOv4\falling_person\TAV_backgrounds\night_TAV_backgrounds",

                    #person
                    r"D:\Daniel\PMD\YOLOv4\falling_person\TAV_backgrounds\night_TAV_backgrounds",

                    #cones
                    r'D:\Daniel\PMD\YOLOv4\YOLOv4_negatives\cones\background_images',

                    #colour noise
                    r'D:\Daniel\PMD\YOLOv4\YOLOv4_negatives\cones\background_images',

                    #PMDs
                    r"D:\Daniel\PMD\YOLOv4\falling_person\TAV_backgrounds\day_night_TAV_backgrounds"
                    ]

obj_paths = [   #dogs
                [r'D:\Daniel\PMD\YOLOv4\YOLOv4_negatives\dogs\TAV_dog_images\cam1',
                r'D:\Daniel\PMD\YOLOv4\YOLOv4_negatives\dogs\TAV_dog_images\cam2',
                r'D:\Daniel\PMD\YOLOv4\YOLOv4_negatives\dogs\TAV_dog_images\cam3',
                r'D:\Daniel\PMD\YOLOv4\YOLOv4_negatives\dogs\TAV_dog_images\cam4'],
                
                #person
                [r"D:\Daniel\PMD\YOLOv4\falling_person\labelled_pedestrians\night\cam1",
                r"D:\Daniel\PMD\YOLOv4\falling_person\labelled_pedestrians\night\cam2",
                r"D:\Daniel\PMD\YOLOv4\falling_person\labelled_pedestrians\night\cam3",
                r"D:\Daniel\PMD\YOLOv4\falling_person\labelled_pedestrians\night\cam4"],

                #cones
                r'D:\Daniel\PMD\YOLOv4\YOLOv4_negatives\cones\cone_png',

                #colour noise
                r"D:\Daniel\PMD\YOLOv4\YOLOv4_negatives\colour_noise\colour_noise_pngs",
            
                #PMDs
                [r"D:\Daniel\PMD\YOLOv4\YOLOv4_negatives\PMDs\pmd_pngs\bicycle_all",
                r"D:\Daniel\PMD\YOLOv4\YOLOv4_negatives\PMDs\pmd_pngs\motorcycle_all",
                r"D:\Daniel\PMD\YOLOv4\YOLOv4_negatives\PMDs\pmd_pngs\scooter_all"]
            ]

save_paths = [      #dogs
                    r"D:\Daniel\PMD\YOLOv4\YOLOv4_negatives\dogs\augmented_bg_frames\dogs_augmented_bg_frames_3",

                    #person
                    r"D:\Daniel\PMD\YOLOv4\falling_person\augmented_bg_frames\person_augmented_backgrounds_3",
                    
                    #cones
                    r"D:\Daniel\PMD\YOLOv4\YOLOv4_negatives\cones\augmented_bg_frames\cones_augmented_bg_frames_2",

                    #colour noise
                    r"D:\Daniel\PMD\YOLOv4\YOLOv4_negatives\colour_noise\augmented_bg_frames\noise_augmented_frames_2",
            
                    #PMDs
                    r"D:\Daniel\PMD\YOLOv4\YOLOv4_negatives\PMDs\pmd_augmented_bg_frames"
            ]



indx = 1
bg_images_path = bg_images_paths[indx]
obj_folder_path = obj_paths[indx]
save_path = save_paths[indx]

# Non-living objects
# ObjectAugmentation().generate_augmented_backgrounds(7000, 3, obj_folder_path, bg_images_path, save_path, YOLO_txt=True, fallen_person=False)


# Living objects
ObjectAugmentation().generate_augmented_backgrounds(15000, 1, obj_folder_path, bg_images_path, save_path, YOLO_txt=True, fallen_person=True)


100%|██████████| 15000/15000 [11:46<00:00, 21.22it/s]


In [504]:
obj_folder_path = r"D:\Daniel\PMD\YOLOv4\YOLOv4_negatives\colour_noise\colour_noise_imgs"
save_path = r"D:\Daniel\PMD\YOLOv4\falling_person\labelled_pedestrians\night"
pathnames = [r"D:\Daniel\PMD\YOLOv4\falling_person\labelled_pedestrians\night\cam1\Screenshot 2021-11-23 at 12.15.45 PM.jpg",
r"D:\Daniel\PMD\YOLOv4\falling_person\labelled_pedestrians\night\cam1\Screenshot 2021-11-23 at 12.16.28 PM.jpg"]
for path in pathnames:
    pathname = path.split('\\')[-1].split('.jpg')[0]
    # img = cv2.imread(path)
    # img = cv2.cvtColor(img, cv2.COLOR_BGRA2RGB)
    img = ObjectAugmentation().png_format(path, 220)
    cv2.imwrite(save_path + '\\' + pathname + '.png', img)


In [365]:
np.array([120 , 135])*2.5

array([300. , 337.5])