##### Use this notebook with 'e6691-2024spring-project-DGJM-dg3370-jm5679/data/ecg_abnormal_cropped' and 'e6691-2024spring-project-DGJM-dg3370-jm5679/data/ecg_normal_cropped' 
##### These two directories contain subdirectories corresponding to each ECG image, which each contain 12 cropped images

In [1]:
import os
import shutil
from sklearn.model_selection import KFold

In [2]:
def rename_and_copy(src, dst_folder, prefix):
    """
    Copy files from src to dst_folder, renaming them to include the prefix.
    """
    for img in os.listdir(src):
        old_path = os.path.join(src, img)
        new_name = f"{prefix}-{img}"
        new_path = os.path.join(dst_folder, new_name)
        shutil.copy2(old_path, new_path)
        
def organize_dataset(root_dir, classes, k_folds=5):
    kf = KFold(n_splits=k_folds, shuffle=True, random_state=42)

    for class_name in classes:
        class_dir = os.path.join(root_dir, class_name)
        all_folders = [os.path.join(class_dir, f) for f in os.listdir(class_dir) if os.path.isdir(os.path.join(class_dir, f))]
        
        for fold, (train_val_idx, test_idx) in enumerate(kf.split(all_folders)):
            print(f"Processing fold {fold+1} for class {class_name}")

            # Create fold specific directories
            fold_dir = os.path.join(root_dir, f'fold_{fold+1}', class_name)
            train_dir = os.path.join(fold_dir, 'train')
            val_dir = os.path.join(fold_dir, 'val')
            test_dir = os.path.join(fold_dir, 'test')
            os.makedirs(train_dir, exist_ok=True)
            os.makedirs(val_dir, exist_ok=True)
            os.makedirs(test_dir, exist_ok=True)
            
            # Further split train_val into train and val (e.g., 80-20 split)
            train_idx, val_idx = next(KFold(n_splits=5).split(train_val_idx))  # 80-20 split

            # Organize test, train, and val directories
            for idx in test_idx:
                folder_name = os.path.basename(all_folders[idx])
                dest_path = os.path.join(test_dir, folder_name)
                os.makedirs(dest_path, exist_ok=True)
                rename_and_copy(all_folders[idx], dest_path, folder_name)

            for idx in train_idx:
                folder_name = os.path.basename(all_folders[train_val_idx[idx]])
                dest_path = os.path.join(train_dir, folder_name)
                os.makedirs(dest_path, exist_ok=True)
                rename_and_copy(all_folders[train_val_idx[idx]], dest_path, folder_name)

            for idx in val_idx:
                folder_name = os.path.basename(all_folders[train_val_idx[idx]])
                dest_path = os.path.join(val_dir, folder_name)
                os.makedirs(dest_path, exist_ok=True)
                rename_and_copy(all_folders[train_val_idx[idx]], dest_path, folder_name)


In [3]:
def remove_ipynb_checkpoints(root_dir):
    for dirpath, dirnames, filenames in os.walk(root_dir, topdown=False):
        if os.path.basename(dirpath) == '.ipynb_checkpoints':
            shutil.rmtree(dirpath)
            print(f"Removed: {dirpath}")

# Usage
root_dir = './data/ecg_normal_cropped/'
remove_ipynb_checkpoints(root_dir)

In [4]:
def remove_ipynb_checkpoints(root_dir):
    for dirpath, dirnames, filenames in os.walk(root_dir, topdown=False):
        if os.path.basename(dirpath) == '.ipynb_checkpoints':
            shutil.rmtree(dirpath)
            print(f"Removed: {dirpath}")

# Usage
root_dir = './data/ecg_abnormal_cropped/'
remove_ipynb_checkpoints(root_dir)

In [5]:
# Usage
root_dir = './data'
classes = ['ecg_abnormal_cropped', 'ecg_normal_cropped']
organize_dataset(root_dir, classes)

Processing fold 1 for class ecg_abnormal_cropped
Processing fold 2 for class ecg_abnormal_cropped
Processing fold 3 for class ecg_abnormal_cropped
Processing fold 4 for class ecg_abnormal_cropped
Processing fold 5 for class ecg_abnormal_cropped
Processing fold 1 for class ecg_normal_cropped
Processing fold 2 for class ecg_normal_cropped
Processing fold 3 for class ecg_normal_cropped
Processing fold 4 for class ecg_normal_cropped
Processing fold 5 for class ecg_normal_cropped


In [6]:
import os
import shutil

def restructure_and_remove_directories(base_dir):
    folds = [f for f in os.listdir(base_dir) if f.startswith('fold_')]
    sections = ['train', 'test', 'val']
    classes = ['normal', 'abnormal']

    for fold in folds:
        fold_path = os.path.join(base_dir, fold)
        for cls in classes:
            # Define the top-level directory to remove after moving all images
            class_top_dir = os.path.join(fold_path, f'ecg_{cls}_cropped')

            for section in sections:
                # Target directory for this section
                target_dir = os.path.join(fold_path, section, cls)
                os.makedirs(target_dir, exist_ok=True)

                # Original directory for this section
                original_section_dir = os.path.join(class_top_dir, section)

                # Move images from original to target directory
                if os.path.exists(original_section_dir):
                    for root, dirs, files in os.walk(original_section_dir):
                        for file in files:
                            if file.endswith('.png'):
                                src_path = os.path.join(root, file)
                                dst_path = os.path.join(target_dir, file)
                                shutil.move(src_path, dst_path)
                                # print(f"Moved {src_path} to {dst_path}")

            # Remove the top-level class directory after all sections are processed
            if os.path.exists(class_top_dir):
                shutil.rmtree(class_top_dir)
                print(f"Removed directory and all contents: {class_top_dir}")

# Usage
base_dir = './data'
restructure_and_remove_directories(base_dir)


Removed directory and all contents: ./data/fold_4/ecg_normal_cropped
Removed directory and all contents: ./data/fold_4/ecg_abnormal_cropped
Removed directory and all contents: ./data/fold_3/ecg_normal_cropped
Removed directory and all contents: ./data/fold_3/ecg_abnormal_cropped
Removed directory and all contents: ./data/fold_1/ecg_normal_cropped
Removed directory and all contents: ./data/fold_1/ecg_abnormal_cropped
Removed directory and all contents: ./data/fold_5/ecg_normal_cropped
Removed directory and all contents: ./data/fold_5/ecg_abnormal_cropped
Removed directory and all contents: ./data/fold_2/ecg_normal_cropped
Removed directory and all contents: ./data/fold_2/ecg_abnormal_cropped
