In [21]:
import os, torch

In [22]:
max_frame = 12 * 30 # 12s * 30 fps
frame_skip = 5
frame_size = 256
parted = True
print("Total frames:", max_frame / frame_skip)

Total frames: 72.0


In [23]:
import cv2

def load_video(video_path):
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        raise IOError(f"Cannot open video {video_path}")

    frames = []
    while True:
        ret, frame = cap.read()
        
        # Ensure that it is in RGB and resize to frame_sizexframe_size
        if ret:
            # frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frame = cv2.resize(frame, (frame_size, frame_size))
        else:
            break

        frames.append(frame)

    cap.release()
    return frames

def load_video_tensor(video_path):
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        raise IOError(f"Cannot open video {video_path}")

    frames = []
    while True:
        ret, frame = cap.read()
        
        # Ensure that it is in RGB and resize to frame_sizexframe_size
        if ret:
            # frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frame = cv2.resize(frame, (frame_size, frame_size))
        else:
            break
        
        frame_tensor = torch.tensor(frame, dtype=torch.uint8)
        frames.append(frame_tensor)

    cap.release()
    frames = torch.stack(frames)
    return frames

In [24]:
tensor = False
input_folder = "./jigsaws"
output_folder = f"jigsaws_frameskip_{frame_skip}_{max_frame//30}s{'_tensor' if tensor else ''}{'' if parted else '_unparted'}_{frame_size}size"
if not os.path.exists(output_folder):
    os.makedirs(output_folder)
else:
    if os.listdir(output_folder):
        print(f"Output folder {output_folder} is not empty. Continue? (y/n)")
        if input().lower() != 'y':
            exit()
os.listdir(input_folder)

['Experimental_setup', 'Knot_Tying', 'Needle_Passing', 'Suturing']

In [25]:
from concurrent.futures import ThreadPoolExecutor

metadata = []

def task(microsurgery_class, sub_microsurgery_class, video_name_file):
    frames = load_video(f"{input_folder}/"+microsurgery_class+"/"+sub_microsurgery_class+"/"+video_name_file)
    metadata.append({
        "path": f"{output_folder}/"+microsurgery_class+"/"+sub_microsurgery_class+"/"+video_name_file,
        "frames": len(frames)
    })
    
    if os.path.exists(f"{output_folder}/"+microsurgery_class) == False:
        os.mkdir(f"{output_folder}/"+microsurgery_class)

    if os.path.exists(f"{output_folder}/"+microsurgery_class+"/"+sub_microsurgery_class) == False:
        os.mkdir(f"{output_folder}/"+microsurgery_class+"/"+sub_microsurgery_class)

    total_parts = len(frames) // max_frame
    # total_parts = 1
    for index, part in enumerate(range(1, total_parts+1)):
        if os.path.exists(f"{output_folder}/"+microsurgery_class+"/"+sub_microsurgery_class+"/"+video_name_file+"_"+str(part)) == False:
            os.mkdir(f"{output_folder}/"+microsurgery_class+"/"+sub_microsurgery_class+"/"+video_name_file+"_"+str(part))

        for index, frame in enumerate(frames[index * max_frame: max_frame * part: frame_skip]):
            cv2.imwrite(f"{output_folder}/"+microsurgery_class+"/"+sub_microsurgery_class+"/"+video_name_file+"_"+str(part)+"/image_"+str(index)+".jpg", frame)

############################################################################
def no_subclass(microsurgery_class, video_name_file):
    frames = load_video(f"{input_folder}/"+microsurgery_class+"/"+video_name_file)
    metadata.append({
        "path": f"{output_folder}/"+microsurgery_class+"/"+video_name_file,
        "frames": len(frames)
    })
    if os.path.exists(f"{output_folder}/"+microsurgery_class) == False:
        os.mkdir(f"{output_folder}/"+microsurgery_class)

    total_parts = len(frames) // max_frame
    # total_parts = 1
    for index, part in enumerate(range(1, total_parts+1)):
        if os.path.exists(f"{output_folder}/"+microsurgery_class+"/"+video_name_file+"_"+str(part)) == False:
            os.mkdir(f"{output_folder}/"+microsurgery_class+"/"+video_name_file+"_"+str(part))

        for index, frame in enumerate(frames[index * max_frame: max_frame * part: frame_skip]):
            cv2.imwrite(f"{output_folder}/"+microsurgery_class+"/"+video_name_file+"_"+str(part)+"/image_"+str(index)+".jpg", frame)

############################################################################
def no_subclass_no_parting(microsurgery_class, video_name_file):
    frames = load_video(f"{input_folder}/"+microsurgery_class+"/"+video_name_file)
    metadata.append({
        "path": f"{output_folder}/"+microsurgery_class+"/"+video_name_file,
        "frames": len(frames)
    })
    if os.path.exists(f"{output_folder}/"+microsurgery_class) == False:
        os.mkdir(f"{output_folder}/"+microsurgery_class)

    total_parts = len(frames) // max_frame
    # total_parts = 1
    for index, part in enumerate(range(1, total_parts+1)):
        if os.path.exists(f"{output_folder}/"+microsurgery_class+"/"+video_name_file) == False:
            os.mkdir(f"{output_folder}/"+microsurgery_class+"/"+video_name_file)

        for idx, frame in enumerate(frames[index * max_frame: max_frame * part: frame_skip], start=max_frame * (part-1) // frame_skip):
            cv2.imwrite(f"{output_folder}/"+microsurgery_class+"/"+video_name_file+"/image_"+str(idx)+".jpg", frame)

############################################################################
def as_tensor(microsurgery_class, video_name_file):
    frames = load_video_tensor(f"{input_folder}/"+microsurgery_class+"/"+video_name_file)
    if os.path.exists(f"{output_folder}/"+microsurgery_class) == False:
        os.mkdir(f"{output_folder}/"+microsurgery_class)

    total_parts = len(frames) // max_frame
    if total_parts == 0:
        total_parts = 1
    # total_parts = 1
    for index, part in enumerate(range(1, total_parts+1)):
        torch.save(frames[index * max_frame: max_frame * part: frame_skip], f"{output_folder}/"+microsurgery_class+"/"+video_name_file+"_"+str(part)+".pt")

############################################################################
def handle_jigsaws(laparo_class, video_name_file, expertise_class):
    frames = load_video(f"{input_folder}/"+laparo_class+"/video/"+video_name_file)
    metadata.append({
        "path": f"{output_folder}/"+laparo_class + "_" + expertise_class +"/"+video_name_file,
        "frames": len(frames)
    })
    if os.path.exists(f"{output_folder}/"+laparo_class + "_" + expertise_class ) == False:
        os.mkdir(f"{output_folder}/"+laparo_class + "_" + expertise_class )

    total_parts = len(frames) // max_frame
    # total_parts = 1
    for index, part in enumerate(range(1, total_parts+1)):
        if os.path.exists(f"{output_folder}/"+laparo_class+f"_{expertise_class}"+"/"+video_name_file+"_"+str(part)) == False:
            os.mkdir(f"{output_folder}/"+laparo_class+f"_{expertise_class}"+"/"+video_name_file+"_"+str(part))

        for index, frame in enumerate(frames[index * max_frame: max_frame * part: frame_skip]):
            cv2.imwrite(f"{output_folder}/"+laparo_class+f"_{expertise_class}"+"/"+video_name_file+"_"+str(part)+"/image_"+str(index)+".jpg", frame)

In [26]:
from tqdm import tqdm
import pandas as pd

# job_list = []
# with ThreadPoolExecutor(10) as executor:
#     for microsurgery_class in os.listdir(f"{input_folder}"):
#         print(microsurgery_class)
#         for sub_microsurgery_class in os.listdir(f"{input_folder}/"+microsurgery_class):
#             for video_name_file in os.listdir(f"{input_folder}/"+microsurgery_class+"/"+sub_microsurgery_class):
#                 job = executor.submit(task, microsurgery_class, sub_microsurgery_class, video_name_file)
#                 job_list.append(job)

# job_list = []
# with ThreadPoolExecutor() as executor:
#     for microsurgery_class in os.listdir(f"{input_folder}"):
#         print(microsurgery_class)
#         for video_name_file in os.listdir(f"{input_folder}/"+microsurgery_class):
#             if tensor:
#                 job = executor.submit(as_tensor, microsurgery_class, video_name_file)
#             else: 
#                 if parted:
#                     job = executor.submit(no_subclass, microsurgery_class, video_name_file)
#                 else:
#                     job = executor.submit(no_subclass_no_parting, microsurgery_class, video_name_file)
#             job_list.append(job)

job_list = []
with ThreadPoolExecutor() as executor:
    for laparo_class in os.listdir(f"{input_folder}"):
        if laparo_class == 'Experimental_setup':
            continue
        video_metadata = pd.read_csv(f"{input_folder}/{laparo_class}/meta_file_{laparo_class}.txt", sep='\t', header=None)
        video_metadata.columns = ['video_name', 'idk', 'class'] + ['idk'] * 8
        video_metadata['video_name'] += '_capture2'

        for video_name_file in os.listdir(f"{input_folder}/{laparo_class}/video"):
            # Only use capture2
            if 'capture2' not in video_name_file:
                continue
            expertise_class = video_metadata[video_metadata['video_name'] == video_name_file.split('.')[0]]['class'].item()
            job = executor.submit(handle_jigsaws, laparo_class, video_name_file, expertise_class)
            job_list.append(job)


for job in tqdm(job_list):
    job.result() 

100%|██████████| 103/103 [00:00<00:00, 102982.91it/s]


In [27]:
import pandas as pd

df = pd.DataFrame(metadata)
df

Unnamed: 0,path,frames
0,jigsaws_frameskip_5_12s_256size/Knot_Tying_I/K...,922
1,jigsaws_frameskip_5_12s_256size/Knot_Tying_I/K...,1014
2,jigsaws_frameskip_5_12s_256size/Knot_Tying_I/K...,1071
3,jigsaws_frameskip_5_12s_256size/Knot_Tying_E/K...,1052
4,jigsaws_frameskip_5_12s_256size/Knot_Tying_I/K...,1075
...,...,...
98,jigsaws_frameskip_5_12s_256size/Suturing_N/Sut...,4286
99,jigsaws_frameskip_5_12s_256size/Suturing_N/Sut...,4415
100,jigsaws_frameskip_5_12s_256size/Suturing_N/Sut...,5213
101,jigsaws_frameskip_5_12s_256size/Suturing_N/Sut...,5656


In [28]:
df.to_csv(f"jigsaws_metadata.csv", index=False)

In [29]:
import matplotlib
df[df["frames"] > 20 * 30].count()

path      103
frames    103
dtype: int64