# Extract clips of actions from full video
This notebook will take the VIA annotations and extract each annotation (action) into a video clip and place the clips into their respective class folder.  
We will also produce a csv version of the annotations here, that will be later used to measure the accuracy of the model (in predict_video_and_score.ipynb).  
After this notebook is done, run 02_preprocess_vid2img.py

## Setup

In [1]:
import json
import glob
import cv2
import os
from numpy import random
from pathlib import Path
from matplotlib import pyplot as plt
from env_vars import VIDEOS_DIR, PREPROCESSED_DATA_ROOT, CLIPS_DIR
from moviepy.video.io.ffmpeg_tools import ffmpeg_extract_subclip
from collections import defaultdict
print(f'{VIDEOS_DIR}\n{PREPROCESSED_DATA_ROOT}\n{CLIPS_DIR}')

C:\Users\User1\Desktop\projects\ITE_APAMS\ite_dataset\videos
C:\Users\User1\Desktop\projects\ITE_APAMS\ite_dataset
C:\Users\User1\Desktop\projects\ITE_APAMS\ite_dataset\clips


In [8]:
#make clips dir
Path(CLIPS_DIR).mkdir(exist_ok=True)

In [9]:
# name of your attribute in VIA
# e.g: if you called your action attribute as "action_attribute", your ATTRIBUTE_NAME should be "action_attribute"
ATTRIBUTE_NAME = 'action'

### Get paths of all video files

In [10]:
#get paths of all video files
video_files_paths = glob.glob(os.path.join(VIDEOS_DIR, '*', '*', '*.MP4'))
vid_path_dict = {}
for path in video_files_paths:
    vid_filename = os.path.basename(path)
    vid_path_dict[vid_filename] = path


In [11]:
vid_path_dict

{'VID00002.MP4': 'C:\\Users\\User1\\Desktop\\projects\\ITE_APAMS\\ite_dataset\\videos\\210302\\A\\VID00002.MP4',
 'A_1.MP4': 'C:\\Users\\User1\\Desktop\\projects\\ITE_APAMS\\ite_dataset\\videos\\210304\\A\\A_1.MP4',
 'A_2.MP4': 'C:\\Users\\User1\\Desktop\\projects\\ITE_APAMS\\ite_dataset\\videos\\210304\\A\\A_2.MP4',
 'A_3.MP4': 'C:\\Users\\User1\\Desktop\\projects\\ITE_APAMS\\ite_dataset\\videos\\210304\\A\\A_3.MP4',
 'B1_1.MP4': 'C:\\Users\\User1\\Desktop\\projects\\ITE_APAMS\\ite_dataset\\videos\\210304\\B\\B1_1.MP4',
 'B1_2.MP4': 'C:\\Users\\User1\\Desktop\\projects\\ITE_APAMS\\ite_dataset\\videos\\210304\\B\\B1_2.MP4',
 'B2_1.MP4': 'C:\\Users\\User1\\Desktop\\projects\\ITE_APAMS\\ite_dataset\\videos\\210304\\B\\B2_1.MP4',
 'C1_1.MP4': 'C:\\Users\\User1\\Desktop\\projects\\ITE_APAMS\\ite_dataset\\videos\\210304\\CD\\C1_1.MP4',
 'C1_2.MP4': 'C:\\Users\\User1\\Desktop\\projects\\ITE_APAMS\\ite_dataset\\videos\\210304\\CD\\C1_2.MP4',
 'C1_3.MP4': 'C:\\Users\\User1\\Desktop\\projects\\

### Get all json file paths and json data

In [12]:
#get all json file paths
json_files = glob.glob(os.path.join(VIDEOS_DIR,'*', '*', '*.json'))


#get list of json filenames
filenames = [os.path.basename(json_file) for json_file in json_files]
        
#read json into list
data_list = []
for file_name in json_files:
    with open(file_name) as file:
        data = json.load(file)
        data_list.append(data)


### Get timespans for each action on each video:
- (action_name, video_filename, annotation_id, z)
- z: timespan in secs?

#### Key Details from Exploring the JSON
- data['metadata'] = annotations
- data['metadata'].keys() = annotation ids
- data['metadata']['annotation_id']['av'].keys() = attribute ids
- data['metadata']['annotation_id']['av']['attribute_id'] = label

In [13]:
#ann_dict where each key is a video filename and the values are the annotations that pertain to that video
ann_dict = defaultdict(list)

#iterate through each json file
for data in data_list:
    """
    metadata is a dictionary of annotations like:
    annotation_id: {annotation}
    
    """
    for annotation_id, annotation_dict in data['metadata'].items():
        
        z = annotation_dict['z'] # time
        
        # attribute values: each annotation has multiple attributes. Each attribute can either 
        #have an object and action, or just an action
        av = annotation_dict['av'] 
        
        attr_names = []
        action_attr_id = 0
        for attribute_id in av.keys():
            attr_name = data['attribute'][attribute_id]['aname']
            attr_names.append(attr_name)
            if attr_name == ATTRIBUTE_NAME:
                action_attr_id = attribute_id # store the attribute_id for action
                
        #skip if it's an annotation for an object
        if len(z)!=2:
            continue
            
        video_id = annotation_dict['vid'] #id of video file for this annotation
        video_filename = data['file'][video_id]['fname']
        action_name = av[action_attr_id]
        
        print(action_name, video_filename, annotation_id, z)

        ann_dict[video_filename].append({'action_name':action_name, 'annotation_id':annotation_id, 'z':z})


switch_off_power_source VID00002.MP4 1_3RYVLAUg [176.175, 178.615]
place_anti_static_mat VID00002.MP4 1_ZxoZlxCY [190.955, 193.175]
disconnect_display_cable VID00002.MP4 1_2fACxd5g [220.892, 222.332]
wear_wrist_wrap VID00002.MP4 1_HIS8BxKg [251.104, 261.164]
wear_wrist_wrap VID00002.MP4 1_Dt4WCi33 [269.374, 271.303]
remove_hdd VID00002.MP4 1_lIKYJHrJ [372.676, 376.527]
disconnect_hdd_power_cable VID00002.MP4 1_9ELOuiqh [315.147, 319.43]
disconnect_odd_power_cable VID00002.MP4 1_DSEbHcOd [418.23, 424.343]
remove_odd VID00002.MP4 1_8WN61nUT [444.834, 449.174]
remove_vga VID00002.MP4 1_XpFLUMWs [483.702, 486.898]
remove_ram VID00002.MP4 1_rVFlLbxw [494.668, 496.337]
remove_ram VID00002.MP4 1_0vnDxNjw [499.736, 501.656]
insert_ram VID00002.MP4 1_BL2tuwpo [879.009, 888.897]
insert_odd VID00002.MP4 1_GCmCsfKD [905.381, 913.575]
connect_odd_power_cable VID00002.MP4 1_zk3GqD1M [955.527, 968.027]
connect_odd_data_cable VID00002.MP4 1_0UyEF6I2 [984.147, 998.182]
insert_hdd VID00002.MP4 1_BUbt32U

In [14]:
#count how many annotations
c=0
for filename, annotations in ann_dict.items():
    c+=len(annotations)
print(c)

5873


In [16]:
#sort annotations by time so that the extraction of the no_action clips can be done correctly
for filename, annotations in ann_dict.items():
#     print(f'before sorting:')
#     for i in annotations[:10]:
#         print(i)
    
    annotations = sorted(annotations, key=lambda i: i['z'][0])
    ann_dict[filename]=annotations
#     print(f'after sorting:')
#     for i in ann_dict[filename][:10]:
#         print(i)

### Extract clips from videos
- P.S CSV FILE CONTAINING THE ACTION TIMESTAMP IS SAVED HERE FOR ALL THE FULL VIDEOS

In [17]:
#extract clips from videos
random.seed(112)
buffer = 0.5 # buffer between no_action and action

for filename, annotations in ann_dict.items():
    print(filename)
    video_path = vid_path_dict[filename]#os.path.join(VIDEOS_DIR, filename)
    prev_end = 0
    with open(video_path+'.csv', 'w') as csv_file:
        csv_file.write(f'action,z_start,z_end\n')
        for annotation in annotations:
            action = annotation['action_name']
            
            z_start, z_end = annotation['z'] #z is in seconds
                
            annotation_id = annotation['annotation_id']
            target_dir = os.path.join(CLIPS_DIR, action)
            Path(target_dir).mkdir(exist_ok=True)

            target_filepath = os.path.join(target_dir, f'{annotation_id}_{filename}')


            ffmpeg_extract_subclip(video_path, z_start, z_end, targetname=target_filepath)
            csv_file.write(f'{action},{z_start},{z_end}\n')
            
            #extract no action clip
            action = 'no_action'
            annotation_id = annotation['annotation_id']
            target_dir = os.path.join(CLIPS_DIR, action)
            Path(target_dir).mkdir(exist_ok=True)

            duration = random.uniform(3, 10)
            #add buffer to z_start and prev_end
            z_start -= buffer
            prev_end += buffer
            #skip if gap is too small / overlap
            if z_start - prev_end < 0.3:
                continue
            
            if z_start - prev_end < duration: # if gap between action is too short, take whole gap as no action
                na_start = prev_end
                na_end = z_start
            else:
                na_start = random.uniform(prev_end, z_start-duration)
                na_end = na_start + duration

            target_filepath = os.path.join(target_dir, f'{annotation_id}_no_action_{filename}')
    #         print('action: ', z_start, z_end)
    #         print('no action:', na_start, na_end)
    #         print(target_filepath)
            ffmpeg_extract_subclip(video_path, na_start, na_end, targetname=target_filepath)
            csv_file.write(f'{action},{na_start},{na_end}\n')
            prev_end = z_end

VID00002.MP4
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
Moviepy - Running:
>>> "+ " ".join(cmd)


KeyboardInterrupt: 