# 02 - Preprocessing A2C Echocardiographic Videos

This notebook prepares the A2C (Apical 2 Chamber) video data for deep learning b
y:
- Labeling videos as MI or non-MI based on filenames
- Stratified splitting into training, validation, and test sets
- Copying video files into split folders
- Extracting a fixed number of frames per video
- Saving extracted frames to structured direc
The output of this notebook is used as input to the model training phase.tories


In [5]:
import os
import shutil
from sklearn.model_selection import StratifiedShuffleSplit
import myutils  

In [6]:
path2data = "../data"
sub_folder = "A2C"
sub_folder_jpg = "A2C_jpg"
path2aCatgs = os.path.join(path2data, sub_folder)

train_folder = os.path.join(path2data, "A2C_training")
val_folder = os.path.join(path2data, "A2C_validation")
test_folder = os.path.join(path2data, "A2C_test")

for folder in [train_folder, val_folder, test_folder]:
    os.makedirs(folder, exist_ok=True)


In [7]:
videos, labels = [], []
for root, _, files in os.walk(path2aCatgs):
    for file in files:
        if file.endswith(".avi"):
            videos.append(os.path.join(root, file))
            labels.append(0 if "n" in file.lower() else 1)

print(f"Found {len(videos)} videos.")


Found 148 videos.


In [8]:
sss = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=0)
train_idx, test_idx = next(sss.split(videos, labels))

train_videos = [videos[i] for i in train_idx]
train_labels = [labels[i] for i in train_idx]
test_videos = [videos[i] for i in test_idx]
test_labels = [labels[i] for i in test_idx]

sss_val = StratifiedShuffleSplit(n_splits=1, test_size=0.5, random_state=0)
val_idx, final_test_idx = next(sss_val.split(test_videos, test_labels))

val_videos = [test_videos[i] for i in val_idx]
val_labels = [test_labels[i] for i in val_idx]
final_test_videos = [test_videos[i] for i in final_test_idx]
final_test_labels = [test_labels[i] for i in final_test_idx]

print(f"Training set: {len(train_videos)} videos")
print(f"Validation set: {len(val_videos)} videos")
print(f"Final Test set: {len(final_test_videos)} videos")


Training set: 118 videos
Validation set: 15 videos
Final Test set: 15 videos


In [9]:
def move_videos(video_list, dest_folder):
    for video in video_list:
        shutil.copy(video, os.path.join(dest_folder, os.path.basename(video)))

move_videos(train_videos, train_folder)
move_videos(val_videos, val_folder)
move_videos(final_test_videos, test_folder)


In [10]:
n_frames = 15
train_frame_count = 0
val_frame_count = 0
test_frame_count = 0

for split, videos_folder in zip(["train", "val", "final_test"], [train_folder, val_folder, test_folder]):
    for video in os.listdir(videos_folder):
        video_path = os.path.join(videos_folder, video)
        frames, _ = myutils.get_frames(video_path, n_frames=n_frames)
        path2store = video_path.replace(sub_folder, sub_folder_jpg).replace(".avi", "")
        os.makedirs(path2store, exist_ok=True)
        myutils.store_frames(frames, path2store)

        if split == "train":
            train_frame_count += len(frames)
        elif split == "val":
            val_frame_count += len(frames)
        else:
            test_frame_count += len(frames)

print(f"Total frames in training set: {train_frame_count}")
print(f"Total frames in validation set: {val_frame_count}")
print(f"Total frames in final test set: {test_frame_count}")


Total frames in training set: 1888
Total frames in validation set: 240
Total frames in final test set: 240


In [11]:
def count_videos(folder):
    mi_count = 0
    non_mi_count = 0
    for file in os.listdir(folder):
        if file.endswith(".avi"):
            if "n" in file.lower():
                non_mi_count += 1
            else:
                mi_count += 1
    return mi_count, non_mi_count

for name, path in {
    "Training": train_folder,
    "Validation": val_folder,
    "Test": test_folder
}.items():
    mi, non_mi = count_videos(path)
    print(f"{name}: {mi} MI videos, {non_mi} non-MI videos")


Training: 63 MI videos, 55 non-MI videos
Validation: 8 MI videos, 7 non-MI videos
Test: 8 MI videos, 7 non-MI videos
