In [1]:
import copy
from scipy.spatial.distance import euclidean
import json
import numpy as np
from fastdtw import fastdtw
import os
import cv2
import matplotlib.pyplot as plt
from  IPython.display import clear_output
import copy
import math
import time 

import imgaug.augmenters as iaa
from imgaug.augmentables.bbs import BoundingBox, BoundingBoxesOnImage
import imgaug as ia

#pip install imgaug
#pip install fastdtw

# Process Halfs

In [2]:
class VideoDatasetSSL():

    def __init__(self, path_bar_trayectory, output_path, videos_path, desired_length_frames):
        super(VideoDatasetSSL, self).__init__()
        self.path_bar_trayectory = path_bar_trayectory
        self.output_path = output_path
        self.videos_path = videos_path
        self.desired_length = desired_length_frames
        self.trayectories = {}
        self.videos_files = []
        self.dtw_path = {}
        self.get_all_trayectories()

    def get_all_trayectories(self):
        files = [f for f in os.listdir(self.videos_path)]
        files = sorted(files)  
        self.videos_files = files
        for file in files:
            name_ = file.split(".")[0]
            path = os.path.join(self.path_bar_trayectory, name_+".json")
            f = open(path)
            data = json.load(f) 
            trayectory_ = self.get_trayectory(data)
            self.trayectories[name_] = trayectory_
    
    def get_size(self):
        return len(self.videos_files)

    def obtener_std(self):
        count = []
        for video_name in self.videos_files:
            path_to_video = os.path.join(self.videos_path, video_name)
            cap= cv2.VideoCapture(path_to_video)
            totalframecount= int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
            count.append(totalframecount)
        count = np.array(count)
        print('Std: {0}, Mean: {1}, Max: {2}, Min: {3}'.format(np.std(count), np.mean(count), np.max(count), np.min(count)))

    def normalizar_posiciones_y(self, posiciones_y):
        pos_min = min(posiciones_y)
        pos_max = max(posiciones_y)
        amplitud_vertical = pos_max - pos_min

        if amplitud_vertical == 0:
            factor_escala = 1
        else:
            factor_escala = 1 / amplitud_vertical

        posiciones_y_normalizadas = np.array([[(pos - pos_min) * factor_escala] for pos in posiciones_y])
        return posiciones_y_normalizadas
    
    def get_trayectory(self, data):
        vertical_trayectory = np.zeros(len(data))
        trayectoria_normalizada = []
        for cnt, _ in enumerate(data):
            if len(data[cnt][0]) > 0:
                x1,y1,x2,y2,_ = data[cnt][0][0]
                center_x, center_y = int(x1 + (x2-x1)/2) , int(y1 + (y2 - y1)/2)
                vertical_trayectory[cnt] = center_y
            else:
                indices = np.nonzero(vertical_trayectory)[0]
                if len(indices) > 0:
                    j = indices[indices < cnt][-1]
                    value = vertical_trayectory[j]
                    vertical_trayectory[cnt] = value
        
        trayectoria_normalizada = self.normalizar_posiciones_y(vertical_trayectory)
        return trayectoria_normalizada
    
    def video_alignment(self, video1_embeddings, video2_embeddings):
        distance, path = fastdtw(video1_embeddings, video2_embeddings, dist=euclidean)
        return path, distance
    
    def find_lowest_point_index(self, y_trajectory):
        return np.argmin(y_trajectory)

    def split_trajectory(self, y_trajectory):
        lowest_point_index = self.find_lowest_point_index(y_trajectory)
        first_half = y_trajectory[:lowest_point_index]
        second_half = y_trajectory[lowest_point_index:]
        if lowest_point_index == 0 or lowest_point_index == len(y_trajectory):
            half_length = len(y_trajectory) // 2
            first_half = y_trajectory[:half_length]
            second_half = y_trajectory[half_length:]
        return first_half, second_half

    def save_frames(self, frames, output_path):
        for j in range(len(frames)):
            image_array = np.array(frames[j])
            out_path = output_path+"/{}".format(str(j).zfill(3)) +".jpg"
            cv2.imwrite(out_path, image_array)

    def save_sync_videos(self, path, frames, output_path):
        f_h_anchor = []
        s_h_negative = []
        
        for i,j in path:
            frame1 = frames[i]
            frame2 = frames[len(frames)-j-1]
            f_h_anchor.append(frame1)
            s_h_negative.append(frame2)

        p1 = output_path+"/f_h"
        p2 = output_path+"/s_h"

        try:
            os.mkdir(p1)
            os.mkdir(p2)
        except:
            print("duplicated")

        #print(p1,p2)
        self.save_frames(f_h_anchor, p1)
        self.save_frames(s_h_negative,p2)
    
    def interpolate_trajectory(self, _trajectory, num_frames = 200):
        trajectory = []
        scale_factor = len(_trajectory) / num_frames
        
        for i in range(num_frames):
            original_frame_idx = int(i * scale_factor)
            y = _trajectory[original_frame_idx]
            if original_frame_idx < len(_trajectory) - 1:
                next_y = _trajectory[original_frame_idx + 1]
                interpolated_y = (y + next_y) / 2
                trajectory.append(interpolated_y)
            else:
                trajectory.append(y)
        
        return trajectory
    
    def load_images(self,path):
        files = os.listdir(path)
        files.sort(key=lambda x: int(x.split('.')[0]))
        #print(files)
        frames = [cv2.imread(path+"/"+name, cv2.COLOR_BGR2RGB) for name in files]
        return frames
    
    def obtener_mitad(self, idx):
        video_name = self.videos_files[idx]
        name_ = video_name.split('.')[0]
        #path_ = os.path.join(self.input_path, video_name)
        path_ = os.path.join(self.output_path, name_)
        path_fh = os.path.join(path_, "_f_h")
        path_sh = os.path.join(path_, "_s_h")
        f1 = self.load_images(path_fh)
        f2 = self.load_images(path_sh)

        for i in range(len(f1)):
            frame = np.hstack((f1[i], f2[i]))
            clear_output(wait=True)
            plt.imshow(frame)
            plt.show()
    
    def adjust_points_for_horizontal_flip(self,points, frame_width):
        points_2d = np.reshape(points, (-1, 2))
        points_2d[:, 0] = frame_width - points_2d[:, 0]
        adjusted_points = points_2d.flatten()

    def load_video(self, path):
        video  = cv2.VideoCapture(path)
        success = True
        count = 1
        frames = []
        while success:
            success, frame = video.read()
            if success:
                frame = cv2.resize(frame, (480,480), cv2.INTER_AREA)
                frames.append(frame)
                count+=1
            else:
                break
        return frames

    def __getitem__(self, idx):
        video_name = self.videos_files[idx]
        name_ = video_name.split('.')[0]
        trajectory = self.trayectories[name_]
        path_to_video = os.path.join(self.videos_path, video_name)
        frames = self.load_video(path_to_video)
        first_half, second_half = self.split_trajectory(trajectory)
        second_half = np.flip(second_half)
        path, cost = self.video_alignment(first_half,second_half)
        
        tam = math.ceil(len(path) / self.desired_length)
        final_path = []
        for i in range(0,len(path), tam):
            final_path.append(path[i])
        
        while len(final_path) < self.desired_length:
            final_path.append(path[-1])

        path_ = os.path.join(self.output_path, name_)
        try:
            os.mkdir(path_)
        except:
            pass
        #self.save_sync_videos(final_path, frames, path_)

In [3]:
path_bar_trayectory = "./FAQA/bar_trajectories_raw/"
output_path = "./FAQA/Images/"
videos_path = "./FAQA/videos/"
desired_length_frames = 16

SSL_dataset = VideoDatasetSSL(path_bar_trayectory, output_path, videos_path, desired_length_frames)
#SSL_dataset.obtener_mitad(0) Unicamente para mostrar los videos alineados

In [4]:
#SSL_dataset.obtener_std()

In [5]:
#Generate all frames
n = SSL_dataset.get_size()
for i in range(n):
    SSL_dataset.__getitem__(i)  
    print("El video "+ str(i) + " ha sido procesado")

El video 0 ha sido procesado
El video 1 ha sido procesado
El video 2 ha sido procesado
El video 3 ha sido procesado
El video 4 ha sido procesado
El video 5 ha sido procesado
El video 6 ha sido procesado
El video 7 ha sido procesado
El video 8 ha sido procesado
El video 9 ha sido procesado
El video 10 ha sido procesado
El video 11 ha sido procesado
El video 12 ha sido procesado
El video 13 ha sido procesado
El video 14 ha sido procesado
El video 15 ha sido procesado
El video 16 ha sido procesado
El video 17 ha sido procesado
El video 18 ha sido procesado
El video 19 ha sido procesado
El video 20 ha sido procesado
El video 21 ha sido procesado
El video 22 ha sido procesado
El video 23 ha sido procesado
El video 24 ha sido procesado
El video 25 ha sido procesado
El video 26 ha sido procesado
El video 27 ha sido procesado
El video 28 ha sido procesado
El video 29 ha sido procesado
El video 30 ha sido procesado
El video 31 ha sido procesado
El video 32 ha sido procesado
El video 33 ha sido 

# Add and Check Augmentations

In [6]:
class PlotVideos:
    def __init__(self, input_path, total = 1000):
        super(PlotVideos, self).__init__()
        self.input_path = input_path
        with open(os.path.join(self.input_path, 'ohp_ssl_boxes_16.json'), 'r') as file_object:
            self.boxes = json.load(file_object)
        self.videos = []
        self.total = total
        self.get_list()

    def get_list(self):
        path = os.path.join(self.input_path,"Images")
        videos = os.listdir(path)
        videos.sort()
        for video_name in videos[:self.total]:
            self.videos.append(os.path.join(path,video_name))
    
    def denormalise(self, image):
        #image = image.numpy().transpose(1, 2, 0)  # PIL images have channel last
        mean = [0.485, 0.456, 0.406]
        stdd = [0.229, 0.224, 0.225]
        image = (image * stdd + mean).clip(0, 1)
        image = cv2.resize(image, (480,480), interpolation = cv2.INTER_AREA)
        return image
    
    def transform_boxes(self, boxes, crop_size=(225, 225), frame_width=480, frame_height=480, flip_horizontal = True):
        adjusted_boxes = []
        for box in boxes:
            box = box.clone()  # Realizar una copia para evitar modificar la caja original
            crop_width, crop_height = crop_size

            # Ajustar la posición x e y de la caja para el RandomCrop
            box[:, 0::2] = np.maximum(0, box[:, 0::2] - crop_width)
            box[:, 1::2] = np.maximum(0, box[:, 1::2] - crop_height)

            # Supongamos que flip_horizontal es True si se aplicó una inversión horizontal    
            if flip_horizontal:
                box[:, 0::2] = np.maximum(0, crop_width - box[:, 0::2])  # Reflejar respecto al eje vertical

            # Supongamos que frame_width y frame_height son las dimensiones originales de los frames
            box[:, 0::2] = np.minimum(box[:, 0::2], frame_width - crop_width)
            box[:, 1::2] = np.minimum(box[:, 1::2], frame_height - crop_height)

            adjusted_boxes.append(box)
        return adjusted_boxes

    def plot_sync_videos(self, idx):
        f_h_anchor, f_h_positive, s_h_negative, box_anc, box_pos, box_neg = self.__getitem__(idx)
        #b1, b2, b3 = self.load_boxes(idx)
        h,w,_ = f_h_anchor[0].shape
        
        box_anc_ = []
        box_pos_ = []
        box_neg_ = []

        for i in range(16):
            box_anc_1 = [box_anc[i][0], box_anc[i][1], box_anc[i][4], box_anc[i][5]]
            box_pos_1 = [box_pos[i][0], box_pos[i][1], box_pos[i][4], box_pos[i][5]]
            box_neg_1 = [box_neg[i][0], box_neg[i][1], box_neg[i][4], box_neg[i][5]]
            box_anc_.append(box_anc_1)
            box_pos_.append(box_pos_1)
            box_neg_.append(box_neg_1)

        shift = np.random.randint(0, 25)
        f_h_anchor_aug, box_anc_ = self.add_video_transforms(copy.deepcopy(f_h_anchor), box_anc_, shift)
        s_h_negative_aug, box_pos_ = self.add_video_transforms(copy.deepcopy(s_h_negative), box_pos_, shift)
        f_h_positive_aug, box_neg_ = self.add_video_transforms(copy.deepcopy(f_h_positive), box_neg_, shift)
        
        #print(f_h_anchor)
        for i in range(16):
            #print(np.array(box_anc)[0])
            print(np.array(f_h_anchor_aug[0]).shape)
            #print(box_anc[i].x1)
            puntos1 = box_anc_[i]
            puntos2 = box_pos_[i]
            puntos3 = box_neg_[i]
            
            anc = np.ascontiguousarray(f_h_anchor_aug[i], dtype=np.uint8)
            neg = np.ascontiguousarray(s_h_negative_aug[i], dtype=np.uint8)
            pos = np.ascontiguousarray(f_h_positive_aug[i], dtype=np.uint8)

            

            #    img = np.ascontiguousarray(img, )

            
            cv2.rectangle(anc,(int(puntos1.x1), int(puntos1.y1)), (int(puntos1.x2), int(puntos1.y2)), (0,255,0), 2)
            cv2.rectangle(pos,(int(puntos2.x1), int(puntos2.y1)), (int(puntos2.x2), int(puntos2.y2)), (0,255,0), 2)
            cv2.rectangle(neg,(int(puntos3.x1), int(puntos3.y1)), (int(puntos3.x2), int(puntos3.y2)), (0,255,0), 2)

            #cv2.rectangle(anc,(int(puntos1[0]), int(puntos1[1])), (int(puntos1[2]), int(puntos1[3])), (0,255,0), 2)
            #cv2.rectangle(pos,(int(puntos2[0]), int(puntos2[1])), (int(puntos2[2]), int(puntos2[3])), (0,255,0), 2)
            #cv2.rectangle(neg,(int(puntos3[0]), int(puntos3[1])), (int(puntos3[2]), int(puntos3[3])), (0,255,0), 2)

            frame = np.hstack((anc, pos, neg))
            clear_output(wait=True)
            plt.imshow(frame)
            plt.show()
            time.sleep(1)

    def temporal_shift(self, frames, boxes, shift_amount=3):
        return frames[shift_amount:] + frames[:shift_amount], boxes[shift_amount:] + boxes[:shift_amount]
    
    def RandomHorizontalFlip(self, clip, bboxes):
        h,w,c = clip[0].shape
        points_2d = np.reshape(bboxes, (-1, 2))
        points_2d[:, 0] = w - points_2d[:, 0]
        adjusted_points = points_2d.flatten()
        return adjusted_points
        

    def apply_channel_shuffle(self,images, random_state, parents, hooks):
        num_channels = 3  # Suponiendo que tienes imágenes RGB con 3 canales (R, G, B)
        channel_permutation = np.random.permutation(num_channels)
        return [image[:, :, channel_permutation] for image in images]
    
    def keypoint_func(self,keypoints_on_images, random_state, parents, hooks):
        return keypoints_on_images

    def add_video_transforms(self, frames, boxes, shift_amount):
        frames, boxes = self.temporal_shift(frames, boxes, shift_amount)
        bbs_ = []
        for i in range(16):
            bbs_.append(BoundingBox(x1=boxes[i][0], y1=boxes[i][1], x2=boxes[i][2], y2=boxes[i][3]))
        bbs = BoundingBoxesOnImage(bbs_, shape=frames[0].shape)
        rotation_angle = np.random.randint(-25, 25)
        sigma_value  = np.random.uniform(0, 1.0) 
        num_channels = 3  # Suponiendo que tienes imágenes RGB con 3 canales (R, G, B)
        channel_permutation = np.random.permutation(num_channels)
        saturation = np.random.randint(0,50)
        translate = np.random.randint(-20,20)
        scale_factor = 1.2

        video_augmenter = iaa.Sequential([
            #iaa.AssertShape((None, 480, 480, 3)),
            iaa.Fliplr(p=0.5),
            #iaa.ChannelShuffle(),
            iaa.CoarseDropout((0.1, 0.15), size_percent=(0.03, 0.03)),
            iaa.WithHueAndSaturation(
                iaa.WithChannels(0, iaa.Add(saturation))
            ),
            iaa.Lambda(self.apply_channel_shuffle, self.keypoint_func),  # Aplica la permutación a todos los frames
            iaa.Affine(rotate=(rotation_angle), scale=scale_factor),
            iaa.TranslateX(px=translate),  # Rotación en el rango de -25 a 25 grados
            iaa.GaussianBlur(sigma=sigma_value),  # Desenfoque gaussiano
             #Does not work
        ], random_order=False)

        # Aplicar las augmentations a todos los frames y bounding boxes
        video_, boxes_ = video_augmenter(images=frames, bounding_boxes=bbs)
        return video_, boxes_
    
    def load_boxes(self, idx):
        video_path = self.videos[idx]
        name = video_path.split('/')[-1]
        b1 = self.boxes[name+"_f_h"]
        b2 = self.boxes[name+"_s_h"]
        return b1,b2,b1
    
    def load_video(self,path, add_augmentations = False):
        transform = transforms.Compose([transforms.CenterCrop(H), transforms.ToTensor(),
                                        transforms.Normalize(mean=[0.485, 0.456, 0.406], 
                                        std=[0.229, 0.224, 0.225])])
        files = os.listdir(path)
        files.sort(key=lambda x: int(x.split('.')[0]))
        #images = torch.zeros(num_frames, C, 480, 480)
        images = []
        for i, name in enumerate(files):
            path_to_image = path+"/"+name
            image = cv2.imread(path_to_image)
            im_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            images.append(im_rgb)
        return images
    
    def __getitem__(self, idx):
        video_path = self.videos[idx]
        path_fh = os.path.join(video_path, "f_h")
        path_sh = os.path.join(video_path, "s_h")
        f1 = self.load_video(path_fh)
        f2 = self.load_video(path_sh)
        f3 = self.load_video(path_fh, add_augmentations=True)
        b1, b2, b3 = self.load_boxes(idx)
        #boxes1 = torch.tensor(b1)  # [16,8]
        #boxes2 = torch.tensor(b2)
        #boxes3 = torch.tensor(b3)
        
        boxes1 = list(b1)
        boxes2 = list(b2)
        boxes3 = list(b3)
        return f1, f3, f2, boxes1, boxes3, boxes2 

In [None]:
input_path = "./OHP_Unlabeled/"
transformations = PlotVideos(input_path)
transformations.plot_sync_videos(30)