In [1]:
import pandas as pd
import os
import os.path as osp
from glob import glob
from decord import VideoReader
import cv2
from tqdm import tqdm

In [2]:


dataset = 'validate'
# dataset = 'train'
path_to_data = f'../../smg/smg_data_phase1/smg_skeleton_{dataset}'
path_to_data = osp.join(*path_to_data.split('/'))


In [7]:
def extract_and_save_frames(video_path, start_indices, end_indices, output_folder, debug=False, verbose=False, margin=0, overwrite_flag=False):
    # Create output folder if it doesn't exist
    os.makedirs(output_folder, exist_ok=True)

    # Load the video using VideoReader
    vr = VideoReader(video_path)
    list_of_paths = []
    
    len_vr = len(vr)
    # Iterate over start and end indices
    for i, (start_ind, end_ind) in enumerate(zip(start_indices, end_indices), start=1):
        
        start_ind = max(0, start_ind-margin)
        end_ind = min(end_ind+margin, len_vr-1)
        
        # Create a VideoWriter object
        output_filename = os.path.join(output_folder, f"{os.path.basename(video_path).split('.')[0]}_{i:04d}.mp4")
        list_of_paths.append(output_filename)
        if osp.exists(output_filename) and not overwrite_flag:
            continue
            
        if not debug:
            # Extract frames between start and end indices
            frames = vr.get_batch(list(range(start_ind, end_ind + 1))).asnumpy()

            fps = vr.get_avg_fps()
            height, width, _ = frames[0].shape
        
            fourcc = cv2.VideoWriter_fourcc(*'mp4v')
            
            out = cv2.VideoWriter(output_filename, fourcc, fps, (width, height))
            
            # Write each frame to the video file
            for frame in tqdm(frames, total=len(frames), leave=False, disable= not verbose):
                frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)  # Convert RGB to BGR
                out.write(frame)
            
            # Release the VideoWriter object
            out.release()
        
    if verbose:
        print("Frames extracted and saved successfully.")
    return list_of_paths

In [8]:
def clip_files(path_to_data, save_folder, debug=False, verbose=False, margin=0):
    all_csv_files = glob(osp.join(path_to_data,'*',"*labels.csv"))
    df_list = []
    column_names = ["label","start_frame","end_frame"]
    for csv_file in tqdm(all_csv_files, total=len(all_csv_files)):
            
        temp_df = pd.read_csv(csv_file, header=None, names=column_names)
        
        file_parts = csv_file.split(os.sep)
    
        file_path = osp.join(osp.join(*file_parts[:-4]), 'SMG_RGB_Phase1',f'smg_rgb_{dataset}',file_parts[-2],file_parts[-2]+'_color.mp4'  )
        temp_df['paths'] = extract_and_save_frames(video_path=file_path, start_indices=temp_df.start_frame, end_indices= temp_df.end_frame, output_folder=save_folder, debug=debug, verbose=verbose, margin=margin)
        temp_df['basename'] = file_parts[-2]
        temp_df['durations'] = temp_df.end_frame - temp_df.start_frame
        df_list.append(temp_df)
    return df_list

In [9]:
save_folder = osp.join(osp.join(*path_to_data.split(os.sep)[:-2]),'smg_split_files', 'train')
os.makedirs(save_folder, exist_ok=True)

In [10]:
df_list = clip_files(path_to_data, save_folder, verbose=False, margin=5)


100%|██████████| 5/5 [00:01<00:00,  4.09it/s]


In [11]:
df_ret = pd.concat(df_list)
df_ret.to_csv(osp.join(save_folder,'..',f'{dataset}.csv'))

In [12]:
df_ret

Unnamed: 0,label,start_frame,end_frame,paths,basename,durations
0,5,940,1029,..\..\smg\smg_split_files\train\Sample0031_col...,Sample0031,89
1,5,1245,1329,..\..\smg\smg_split_files\train\Sample0031_col...,Sample0031,84
2,5,1482,1587,..\..\smg\smg_split_files\train\Sample0031_col...,Sample0031,105
3,5,1744,1826,..\..\smg\smg_split_files\train\Sample0031_col...,Sample0031,82
4,5,1865,1933,..\..\smg\smg_split_files\train\Sample0031_col...,Sample0031,68
...,...,...,...,...,...,...
99,13,23616,23675,..\..\smg\smg_split_files\train\Sample0035_col...,Sample0035,59
100,9,23991,24062,..\..\smg\smg_split_files\train\Sample0035_col...,Sample0035,71
101,5,24361,24454,..\..\smg\smg_split_files\train\Sample0035_col...,Sample0035,93
102,5,24478,24539,..\..\smg\smg_split_files\train\Sample0035_col...,Sample0035,61
