In [13]:
import json_tricks as json
import cv2
import numpy as np
import torch
from tqdm import tqdm
import os

from my_utils.gen_utils import parse_label_file, find_largest_exp_folder
from my_utils.postprocess import NMSPostprocess
from my_utils.ava_postprocessing import concatenate_results


In [14]:
model_name = 'VMAEv2'

video_path = '../input_dir/markt2_fight.mp4'

video_name = video_path.split('/')[-1].split('.')[0]


In [15]:
interesting_actions_indices = [5, 64, 71, 75]
threshold_action = {5: 0.1, 64: 0.1, 71: 0.1, 75: 0.1}
interesting_actions_labels = {5:'fall', 64:'fight', 71:'kick', 75:'push'}

In [16]:
action_colors = {5: (0, 0, 255),
                64 : (255, 0, 0),
                71: (255, 165, 0),
                75: (128, 0, 128)} 


In [17]:
output_directory = '../output_dir/{}/{}/patch_batch'.format(video_name,model_name)

In [18]:
exp_output_directory = find_largest_exp_folder(output_directory)

In [20]:
all_results_json_path = os.path.join(exp_output_directory, 'result.json')
all_results_json_path

'../output_dir/markt2_fight/VMAEv2/patch_batch/exp_2/result.json'

In [21]:
with open(all_results_json_path, 'r') as f:
    all_results_dict = json.load(f)

In [22]:
new_results_dict = concatenate_results(all_results_dict, top_k=5)

In [26]:
new_results_dict['124'].shape


(39, 16)

In [None]:
interesting_results_dict = {}
other_results_dict = {}

with tqdm(total=len(all_results_dict)) as pbar:
    
    for cur_frame, results_frame in all_results_dict.items():
    
        if cur_frame not in interesting_results_dict.keys():
            interesting_results_dict[cur_frame] = None
            other_results_dict[cur_frame] = None
        
        results_frame_np = np.concatenate(results_frame, axis=0)
        
        
        top_action_indices_frame = results_frame_np[:, 5:10].astype('int') + 1
        top_values_frame = results_frame_np[:, 10:]
        
        list_interesting_actors = []
        # looping over all actors detected on this frame
        for actor_index, top_action_indices in enumerate(top_action_indices_frame):
            interesting_actor = False
            # looping over all actions of one actor
            for act_index, act in enumerate(top_action_indices):
                if act in interesting_actions_indices:
                    if top_values_frame[actor_index, act_index] >= threshold_action[act]:
                        interesting_actor = True
                        break
                    
            if interesting_actor:
                list_interesting_actors.append(actor_index)
        
        if len(list_interesting_actors) != 0:
            
            indices_set = set(list_interesting_actors)

            # Create boolean masks for the two conditions: indices in the list and indices not in the list
            indices_in_list_mask = np.array([i in indices_set for i in range(len(results_frame_np))])
            indices_not_in_list_mask = ~indices_in_list_mask

            # Use boolean indexing to create the two arrays
            interesting_results_dict[cur_frame] = results_frame_np[indices_in_list_mask]
            other_results_dict[cur_frame] = results_frame_np[indices_not_in_list_mask]
        else:
            other_results_dict[cur_frame] = results_frame_np
    
        # Update tqdm progress bar
        pbar.update(1)
        
            
            
            
        

In [None]:
import os
interesting_json_path = os.path.join('../output_dir/markt2_fight/VMAEv2/patch_batch/exp_1/', 'interesting_result.json')

# Save the dictionary as a JSON file
with open(interesting_json_path, 'w') as f:
    json.dump(interesting_results_dict, f, indent=4)


In [None]:
output_directory = '../output_dir/markt2_fight/VMAEv2/patch_batch/exp_1/'
video_path = '../input_dir/markt2_fight.mp4'

In [None]:
from my_utils.gen_utils import parse_label_file

In [None]:
file_path = 'labels.txt'  # Specify the path to your text file
label_dict = parse_label_file(file_path)

In [None]:

vis_iteresting_frames_list = []


output_directory_frames = os.path.join(output_directory, 'inter_frames')
os.makedirs(output_directory_frames, exist_ok=True)


from my_utils.video_processing import get_frame_from_video




# Initialize tqdm with the length of all_results_dict
with tqdm(total=len(interesting_results_dict)) as pbar:
    # loop over all frames
    for cur_frame, results_frame in interesting_results_dict.items():
        
        if results_frame is None:
            continue
            
        obj_scores_frame = results_frame[:, :1]
        bboxes_frame = results_frame[:, 1:5]
        top_indices_frame = results_frame[:, 5:10].astype('int') + 1
        top_values_frame = results_frame[:, 10:]
        
        frame = get_frame_from_video(video_path, int(cur_frame))
        
        vis_inter_frame = cv2.cvtColor(frame.copy(), cv2.COLOR_BGR2RGB)
        
        id_actor = 0

        # loop over all actors in this frame
        for object_score, bbox, top_action_indices, top_action_scores in zip(obj_scores_frame, bboxes_frame, top_indices_frame, top_values_frame):
            x1, y1, x2, y2 = bbox.astype(int)
            
            # loop over all actions of this actor to choose color of action bbox
            print(top_action_indices)
            for act_index, act in enumerate(top_action_indices):
                if act in interesting_actions_indices:
                    bbox_action_color = action_colors[int(act)]
                    main_interesting_act = act # the main interesting action of actor
                    break
                    
                   
            # add ID of actor for frame visualization
            long_text = '{}-{}:'.format(id_actor, np.round(object_score, 2)) 
            for act, score in zip(top_action_indices, top_action_scores):
                long_text += '{}_{}-'.format(label_dict[act].replace('(', '').replace(')', '').split('/')[0], 
                                                 (np.round(score, 2)))
                    
            
            
            # plot the bbox of interesting actor and adding interesting action
            cv2.rectangle(vis_inter_frame, (x1, y1), (x2, y2), bbox_action_color, 2)
            #id_text = '{}:{}'.format(id_actor, small_text)
            id_text = '{}'.format(id_actor)
            id_actor += 1
    
            cv2.putText(vis_inter_frame, id_text, (x1+10, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (255, 0, 0), 2)
            cv2.putText(vis_inter_frame, str(cur_frame), (40, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 4)
                
            # add text containing all actions of interesting actor
            cv2.putText(vis_inter_frame, 
                        long_text[:-1], 
                        (20, 100 + 20 * id_actor), 
                        cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)
                
                
            
            
        
        
       
        vis_iteresting_frames_list.append(vis_inter_frame)
        frame_path = os.path.join(output_directory_frames, f"int_frame_{cur_frame}.jpg")
        cv2.imwrite(frame_path, cv2.cvtColor(vis_inter_frame, cv2.COLOR_BGR2RGB))
        
        # Update tqdm progress bar
        pbar.update(1)


        
    
    


In [None]:
act

In [None]:
interesting_results_dict['24'][:,5:10]

In [None]:
act_index

In [None]:
interesting_results_dict

In [None]:
my_nms = NMSPostprocess(match_threshold=0.7, sort_metric="scores", match_metric="IOU")

In [None]:
with tqdm(total=len(interesting_results_dict)) as pbar:
    
    for cur_frame, results_frame in interesting_results_dict.items():
        
        if results_frame is not None and results_frame.shape[0] > 1:
            

In [None]:
json_path = '../output_dir/markt2_fight/VMAEv2/patch_batch/exp_1/result.json'

In [None]:
with open(json_path, 'r') as f:
    results_dict = json.load(f)

In [None]:
len(results_dict['25'])

In [None]:
results_np = np.concatenate(results_dict['25'], axis=0)

In [None]:
results_np2 = np.concatenate(results_dict['25'], axis=0)

In [None]:
results_np.shape

In [None]:
results_torch = torch.from_numpy(results_np)

In [None]:
def nms(
    predictions: torch.tensor,
    match_metric: str = "IOU",
    match_threshold: float = 0.5,
):
    """
    Apply non-maximum suppression to avoid detecting too many
    overlapping bounding boxes for a given object.
    Args:
        predictions: (tensor) The location preds for the image
            along with the class predscores, Shape: [num_boxes,5].
        match_metric: (str) IOU or IOS
        match_threshold: (float) The overlap thresh for
            match metric.
    Returns:
        A list of filtered indexes, Shape: [ ,]
    """

    # we extract coordinates for every
    # prediction box present in P
    x1 = predictions[:, 1]
    y1 = predictions[:, 2]
    x2 = predictions[:, 3]
    y2 = predictions[:, 4]

    # we extract the confidence scores as well
    scores = predictions[:, 0]

    # calculate area of every block in P
    areas = (x2 - x1) * (y2 - y1)
    print(areas.shape)
    print(scores.shape)

    # sort the prediction boxes in P
    # according to their confidence scores
    order = scores.argsort()

    # initialise an empty list for
    # filtered prediction boxes
    keep = []

    while len(order) > 0:
        # extract the index of the
        # prediction with highest score
        # we call this prediction S
        idx = order[-1]

        # push S in filtered predictions list
        keep.append(idx.tolist())

        # remove S from P
        order = order[:-1]

        # sanity check
        if len(order) == 0:
            break

        # select coordinates of BBoxes according to
        # the indices in order
        xx1 = torch.index_select(x1, dim=0, index=order)
        xx2 = torch.index_select(x2, dim=0, index=order)
        yy1 = torch.index_select(y1, dim=0, index=order)
        yy2 = torch.index_select(y2, dim=0, index=order)

        # find the coordinates of the intersection boxes
        xx1 = torch.max(xx1, x1[idx])
        yy1 = torch.max(yy1, y1[idx])
        xx2 = torch.min(xx2, x2[idx])
        yy2 = torch.min(yy2, y2[idx])

        # find height and width of the intersection boxes
        w = xx2 - xx1
        h = yy2 - yy1

        # take max with 0.0 to avoid negative w and h
        # due to non-overlapping boxes
        w = torch.clamp(w, min=0.0)
        h = torch.clamp(h, min=0.0)

        # find the intersection area
        inter = w * h

        # find the areas of BBoxes according the indices in order
        rem_areas = torch.index_select(areas, dim=0, index=order)

        if match_metric == "IOU":
            # find the union of every prediction T in P
            # with the prediction S
            # Note that areas[idx] represents area of S
            union = (rem_areas - inter) + areas[idx]
            # find the IoU of every prediction in P with S
            match_metric_value = inter / union

        elif match_metric == "IOS":
            # find the smaller area of every prediction T in P
            # with the prediction S
            # Note that areas[idx] represents area of S
            smaller = torch.min(rem_areas, areas[idx])
            # find the IoU of every prediction in P with S
            match_metric_value = inter / smaller
        else:
            raise ValueError()

        # keep the boxes with IoU less than thresh_iou
        mask = match_metric_value < match_threshold
        order = order[mask]
    return keep

In [None]:
keep = nms(results_torch, match_metric='IOS')

In [None]:
len(keep)

In [None]:
results_pp = results_torch[keep]

In [None]:
results_pp_np = results_pp.numpy()

In [None]:
results_pp_np.shape

In [None]:
results_np.shape

In [None]:
output_nms, new_keep = my_nms(results_np2)

In [None]:
output_nms.shape

In [None]:
new_keep

In [None]:
results_pp_np = results_pp.numpy()



In [None]:
obj_scores_frame = results_pp_np[:, :1]
bboxes_frame = results_pp_np[:, 1:5]
top_indices_frame = results_pp_np[:, 5:10]
top_values_frame = results_pp_np[:, 10:]
        


In [None]:
from my_utils.video_processing import get_frame_from_video


In [None]:
frame = get_frame_from_video('../input_dir/markt2_fight.mp4', 25)
        
vis_frame = cv2.cvtColor(frame.copy(), cv2.COLOR_BGR2RGB)

In [None]:
for object_score, bbox, top_action_indices, top_action_scores in zip(obj_scores_frame, bboxes_frame, top_indices_frame, top_values_frame):
    x1, y1, x2, y2 = bbox.astype(int)
            
    cv2.rectangle(vis_frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
            

In [None]:
import matplotlib.pyplot as plt

In [None]:
plt.imshow(vis_frame)

In [None]:
frame = get_frame_from_video('../input_dir/markt2_fight.mp4', 110)
        
vis_frame = cv2.cvtColor(frame.copy(), cv2.COLOR_BGR2RGB)

plt.imshow(vis_frame)

In [None]:
cv2.imwrite("../output_dir/mark2_fight_110.jpg", cv2.cvtColor(vis_frame, cv2.COLOR_BGR2RGB))