In [54]:
import os
import pandas as pd
import shutil
from pathlib import Path

image_path = 'data/ptb-xl_images_multiprocessed'


In [55]:
mi_ischemia_labels = [
    'AMI', 'IMI', 'PMI', 'LMI', 'ALMI', 'ASMI', 'ILMI', 'IPMI', 'IPLMI',
    'ISC', 'ISCAL', 'ISCAN', 'ISCIL', 'ISCLA', 'ISCIN', 'ISCAS',
    'INJAS', 'INJAL', 'INJIL', 'INJLA', 'INJIN'
]

conduction_labels = [
    '1AVB', '2AVB', '3AVB',
    'CRBBB', 'IRBBB', 'CLBBB', 'ILBBB',
    'LAFB', 'LPFB', 'IVCD'
]

arrhythmia_labels = [
    'AFIB', 'AFLT', 'PSVT',
    'PAC', 'PVC',
    'SVARR', 'SARRH',
    'SBRAD', 'WPW', 'PACE', 'SR'
]

hypertrophy_labels = [
    'LVH', 'RVH',
    'LAO-LAE', 'RAO-RAE'
]

morphology_labels = [
    'INVT', 'LOWT', 'NDT', 'NST',
    'HVOLT', 'LVOLT', 'LNGQT',
    'PRC(S)', 'ABQRS', 'LPR', 'QWAVE'
]

other_labels = [
    'ANEUR', 'DIG', 'SEHYP', 'BIGU', 'EL'
]

normal_labels = ['NORM']

all_labels = (
    mi_ischemia_labels +
    conduction_labels +
    arrhythmia_labels +
    hypertrophy_labels +
    morphology_labels +
    other_labels +
    normal_labels
)

label_to_category = {}

for label in mi_ischemia_labels:
    label_to_category[label] = 'MI'
for label in conduction_labels:
    label_to_category[label] = 'Conduction'
for label in arrhythmia_labels:
    label_to_category[label] = 'Arrhythmia'
for label in hypertrophy_labels:
    label_to_category[label] = 'Hypertrophy'
for label in morphology_labels:
    label_to_category[label] = 'Morphology'
for label in other_labels:
    label_to_category[label] = 'Other'
for label in normal_labels:
    label_to_category[label] = 'Normal'




In [56]:
original_names = []
filenames = []
for filename in os.listdir(image_path):
    original_names.append(filename)
    filenames.append(filename.split('_')[1].split('.')[0])
df = pd.DataFrame({'filename': original_names, 'extract': filenames})
unique_names = df['extract'].unique()
print(unique_names)
for name in unique_names:
    shape = df[df['extract'] == name].shape
    if shape[0] > 1000:
        print(f"{name}: ", shape)

df['label'] = df['extract'].map(label_to_category)
df['label'].value_counts()

['ISCAL' 'INVT' 'CRBBB' 'NDT' 'NORM' 'ISC' 'ASMI' 'AFIB' '1AVB' 'IVCD'
 'IRBBB' 'ABQRS' 'LAFB' 'ANEUR' 'IMI' 'HVOLT' 'SR' 'EL' 'AFLT' 'LOWT'
 'AMI' 'LVH' 'LAO-LAE' 'LVOLT' 'NST' 'CLBBB' 'LMI' 'ISCIN' 'INJAS' 'SBRAD'
 '2AVB' 'DIG' 'ALMI' 'LPR' 'BIGU' 'PMI' 'ILMI' 'INJAL' '3AVB' 'PACE'
 'IPMI' 'ISCLA' 'LPFB' 'IPLMI' 'ISCAS' 'SARRH' 'ILBBB' 'ISCAN' 'ISCIL'
 'LNGQT' 'PRC(S)' 'PSVT' 'INJIL' 'RAO-RAE' 'RVH' 'INJLA' 'PAC' 'SVARR'
 'WPW' 'QWAVE' 'PVC' 'SEHYP' 'INJIN']
NDT:  (1003, 2)
NORM:  (8637, 2)
AFIB:  (1196, 2)
ABQRS:  (3145, 2)


label
Normal         8637
Morphology     4901
Conduction     2945
MI             2923
Arrhythmia     1572
Hypertrophy     595
Other           225
Name: count, dtype: int64

In [57]:
df.head()

Unnamed: 0,filename,extract,label
0,10001_ISCAL.png,ISCAL,MI
1,10002_INVT.png,INVT,Morphology
2,10003_CRBBB.png,CRBBB,Conduction
3,10004_NDT.png,NDT,Morphology
4,10005_NORM.png,NORM,Normal


In [58]:
INPUT_BASE = 'data/ptb-xl_images_multiprocessed/'
OUTPUT_BASE = 'data/sorted_images/'
os.makedirs(OUTPUT_BASE, exist_ok=True)

label_counters = {}

for _, row in df.iterrows():
    src_path = f"{INPUT_BASE}{row['filename']}"
    label = row['label']

    if pd.isna(label):
        continue

    label_folder = f"{OUTPUT_BASE}{label}/"
    os.makedirs(label_folder, exist_ok=True)

    count = label_counters.get(label, 0) + 1
    label_counters[label] = count
    new_filename = f"{label}_{count:05d}{Path(row['filename']).suffix}"

    dst_path = f"{label_folder}{new_filename}"
    print(src_path)
    print(dst_path)

    shutil.move(src_path, dst_path)

data/ptb-xl_images_multiprocessed/10001_ISCAL.png
data/sorted_images/MI/MI_00001.png
data/ptb-xl_images_multiprocessed/10002_INVT.png
data/sorted_images/Morphology/Morphology_00001.png
data/ptb-xl_images_multiprocessed/10003_CRBBB.png
data/sorted_images/Conduction/Conduction_00001.png
data/ptb-xl_images_multiprocessed/10004_NDT.png
data/sorted_images/Morphology/Morphology_00002.png
data/ptb-xl_images_multiprocessed/10005_NORM.png
data/sorted_images/Normal/Normal_00001.png
data/ptb-xl_images_multiprocessed/10006_ISC_.png
data/sorted_images/MI/MI_00002.png
data/ptb-xl_images_multiprocessed/10007_NORM.png
data/sorted_images/Normal/Normal_00002.png
data/ptb-xl_images_multiprocessed/10008_NORM.png
data/sorted_images/Normal/Normal_00003.png
data/ptb-xl_images_multiprocessed/10009_ASMI.png
data/sorted_images/MI/MI_00003.png
data/ptb-xl_images_multiprocessed/1000_NORM.png
data/sorted_images/Normal/Normal_00004.png
data/ptb-xl_images_multiprocessed/10010_AFIB.png
data/sorted_images/Arrhythmia/A