In [1]:
import cv2
import os
import h5py
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder

In [2]:
# Path to your folder
folder_path = 'images-224/images-224'

In [3]:
df = pd.read_csv('cardiomegaly.csv')

In [4]:
df.head()

Unnamed: 0,Image Index,Cardiomegaly
0,00004445_001.png,1
1,00013593_002.png,0
2,00001301_042.png,1
3,00016433_000.png,0
4,00007718_008.png,0


In [5]:
df.shape

(5544, 2)

In [7]:
image_names = df['Image Index'].tolist()

def preprocess_xray(img_path, target_size=(224, 224)):
    # Load grayscale
    img = cv2.imread(img_path, 0)
    if img is None:
        return None
    
    # Resize
    img = cv2.resize(img, target_size)
    
    # Apply CLAHE for contrast enhancement
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
    img = clahe.apply(img)
    
    # Optional: Gaussian blur for noise reduction
    img = cv2.GaussianBlur(img, (3,3), 0)
    
    # Normalize to [0,1] then standardize
    img = img.astype(np.float32) / 255.0
    
    # Keep as 2D array with channel dimension for CNN
    img = np.expand_dims(img, axis=-1)  # Shape: (224, 224, 1)
    
    return img

# Modified processing loop
processed_images = []
labels = []

for i, filename in enumerate(image_names):
    img_path = os.path.join(folder_path, filename)
    if os.path.exists(img_path):
        img = preprocess_xray(img_path)
        
        if img is not None:
            processed_images.append(img)
            # Extract label from your df
            label = df[df['Image Index'] == filename]['Cardiomegaly'].values[0]
            labels.append(label)
            print(f"Processed: {filename}")

# Convert to numpy arrays (don't flatten!)
X = np.array(processed_images)  # Shape: (n_samples, 224, 224, 1)
y = np.array(labels)

X = (X - X.mean(axis=(1,2), keepdims=True)) / (X.std(axis=(1,2), keepdims=True) + 1e-8)

Processed: 00004445_001.png
Processed: 00013593_002.png
Processed: 00001301_042.png
Processed: 00016433_000.png
Processed: 00007718_008.png
Processed: 00009047_000.png
Processed: 00004526_009.png
Processed: 00018802_000.png
Processed: 00005641_000.png
Processed: 00017698_004.png
Processed: 00005060_000.png
Processed: 00003973_013.png
Processed: 00012364_004.png
Processed: 00006875_003.png
Processed: 00015914_008.png
Processed: 00009647_003.png
Processed: 00000143_003.png
Processed: 00017448_006.png
Processed: 00004344_038.png
Processed: 00002704_013.png
Processed: 00012990_000.png
Processed: 00025860_000.png
Processed: 00009734_000.png
Processed: 00025895_002.png
Processed: 00022566_007.png
Processed: 00003129_011.png
Processed: 00005699_004.png
Processed: 00010007_043.png
Processed: 00016800_009.png
Processed: 00005066_049.png
Processed: 00028216_001.png
Processed: 00015693_002.png
Processed: 00011098_000.png
Processed: 00000615_000.png
Processed: 00012161_012.png
Processed: 00020107_

Processed: 00022068_000.png
Processed: 00010711_006.png
Processed: 00016934_041.png
Processed: 00016034_004.png
Processed: 00000887_000.png
Processed: 00019367_000.png
Processed: 00007453_002.png
Processed: 00018253_004.png
Processed: 00005064_007.png
Processed: 00017445_002.png
Processed: 00006712_002.png
Processed: 00012276_002.png
Processed: 00000001_000.png
Processed: 00015009_011.png
Processed: 00019006_001.png
Processed: 00001373_035.png
Processed: 00009996_013.png
Processed: 00000032_040.png
Processed: 00003501_001.png
Processed: 00019508_013.png
Processed: 00027292_001.png
Processed: 00022391_000.png
Processed: 00005689_000.png
Processed: 00021918_000.png
Processed: 00025851_005.png
Processed: 00004344_036.png
Processed: 00025238_007.png
Processed: 00016743_010.png
Processed: 00025845_000.png
Processed: 00010693_026.png
Processed: 00016361_000.png
Processed: 00028598_000.png
Processed: 00007050_002.png
Processed: 00021618_015.png
Processed: 00005532_027.png
Processed: 00004069_

Processed: 00007510_000.png
Processed: 00004533_018.png
Processed: 00014338_000.png
Processed: 00010936_022.png
Processed: 00000116_037.png
Processed: 00010020_007.png
Processed: 00001373_031.png
Processed: 00006060_000.png
Processed: 00009841_000.png
Processed: 00025444_000.png
Processed: 00006808_012.png
Processed: 00016175_011.png
Processed: 00013615_022.png
Processed: 00017980_007.png
Processed: 00006220_008.png
Processed: 00029291_000.png
Processed: 00013670_150.png
Processed: 00009526_000.png
Processed: 00015770_004.png
Processed: 00015316_000.png
Processed: 00006431_003.png
Processed: 00004809_008.png
Processed: 00002435_001.png
Processed: 00010437_002.png
Processed: 00017524_033.png
Processed: 00027938_000.png
Processed: 00010766_000.png
Processed: 00007018_012.png
Processed: 00014528_005.png
Processed: 00020945_025.png
Processed: 00008519_000.png
Processed: 00009407_000.png
Processed: 00001182_006.png
Processed: 00018845_004.png
Processed: 00000857_000.png
Processed: 00029102_

Processed: 00004533_013.png
Processed: 00012470_012.png
Processed: 00016400_000.png
Processed: 00014822_026.png
Processed: 00017070_006.png
Processed: 00007526_004.png
Processed: 00013306_000.png
Processed: 00012646_002.png
Processed: 00022925_002.png
Processed: 00028593_003.png
Processed: 00011448_010.png
Processed: 00001848_000.png
Processed: 00019051_000.png
Processed: 00029950_001.png
Processed: 00010644_002.png
Processed: 00026715_002.png
Processed: 00009365_003.png
Processed: 00009293_009.png
Processed: 00028827_000.png
Processed: 00009096_000.png
Processed: 00027415_035.png
Processed: 00029472_000.png
Processed: 00027705_000.png
Processed: 00013310_039.png
Processed: 00015023_001.png
Processed: 00002256_012.png
Processed: 00023487_003.png
Processed: 00003406_004.png
Processed: 00019961_005.png
Processed: 00001855_019.png
Processed: 00013257_000.png
Processed: 00011619_002.png
Processed: 00006160_014.png
Processed: 00021006_011.png
Processed: 00005870_000.png
Processed: 00005556_

Processed: 00018629_003.png
Processed: 00011695_002.png
Processed: 00022126_000.png
Processed: 00017156_000.png
Processed: 00013594_052.png
Processed: 00003541_001.png
Processed: 00006459_000.png
Processed: 00028016_000.png
Processed: 00000798_001.png
Processed: 00013150_000.png
Processed: 00010199_000.png
Processed: 00020597_000.png
Processed: 00005532_026.png
Processed: 00003989_013.png
Processed: 00012952_008.png
Processed: 00021260_005.png
Processed: 00001855_001.png
Processed: 00026050_004.png
Processed: 00028495_000.png
Processed: 00017138_036.png
Processed: 00006875_007.png
Processed: 00004479_000.png
Processed: 00006450_001.png
Processed: 00005127_000.png
Processed: 00023093_009.png
Processed: 00019363_023.png
Processed: 00011237_031.png
Processed: 00002233_000.png
Processed: 00015736_000.png
Processed: 00007735_040.png
Processed: 00019889_003.png
Processed: 00007018_050.png
Processed: 00018867_004.png
Processed: 00025840_001.png
Processed: 00022515_001.png
Processed: 00011842_

Processed: 00019924_008.png
Processed: 00018741_000.png
Processed: 00020748_000.png
Processed: 00025290_018.png
Processed: 00015768_004.png
Processed: 00015314_003.png
Processed: 00023647_000.png
Processed: 00012565_000.png
Processed: 00019045_004.png
Processed: 00001641_002.png
Processed: 00025382_004.png
Processed: 00028611_004.png
Processed: 00013937_006.png
Processed: 00026371_001.png
Processed: 00014933_006.png
Processed: 00023860_000.png
Processed: 00011925_002.png
Processed: 00005201_003.png
Processed: 00002350_002.png
Processed: 00011780_002.png
Processed: 00001373_042.png
Processed: 00003939_000.png
Processed: 00001249_009.png
Processed: 00020703_003.png
Processed: 00016607_028.png
Processed: 00025110_002.png
Processed: 00004808_034.png
Processed: 00022815_078.png
Processed: 00012176_013.png
Processed: 00019479_006.png
Processed: 00013249_027.png
Processed: 00016484_025.png
Processed: 00015442_025.png
Processed: 00004605_002.png
Processed: 00016785_002.png
Processed: 00021105_

Processed: 00023768_000.png
Processed: 00018944_010.png
Processed: 00011877_001.png
Processed: 00006875_031.png
Processed: 00002442_000.png
Processed: 00022393_000.png
Processed: 00010159_001.png
Processed: 00000116_016.png
Processed: 00000798_006.png
Processed: 00005275_000.png
Processed: 00007399_000.png
Processed: 00025529_018.png
Processed: 00005778_006.png
Processed: 00013682_000.png
Processed: 00008006_001.png
Processed: 00020042_006.png
Processed: 00028131_011.png
Processed: 00004526_016.png
Processed: 00000294_000.png
Processed: 00009629_000.png
Processed: 00004755_015.png
Processed: 00022572_060.png
Processed: 00027436_000.png
Processed: 00027357_014.png
Processed: 00018992_001.png
Processed: 00009001_007.png
Processed: 00010478_012.png
Processed: 00029513_004.png
Processed: 00003973_001.png
Processed: 00001157_001.png
Processed: 00004775_000.png
Processed: 00004381_044.png
Processed: 00011140_015.png
Processed: 00006483_000.png
Processed: 00011018_000.png
Processed: 00007061_

Processed: 00028534_001.png
Processed: 00017335_000.png
Processed: 00012741_004.png
Processed: 00015542_000.png
Processed: 00000800_001.png
Processed: 00016934_009.png
Processed: 00022458_002.png
Processed: 00008309_009.png
Processed: 00028690_000.png
Processed: 00020629_014.png
Processed: 00013546_000.png
Processed: 00013249_033.png
Processed: 00010625_014.png
Processed: 00005022_000.png
Processed: 00013601_004.png
Processed: 00012259_000.png
Processed: 00004883_002.png
Processed: 00021024_004.png
Processed: 00029940_007.png
Processed: 00011322_002.png
Processed: 00020185_012.png
Processed: 00007055_005.png
Processed: 00014365_000.png
Processed: 00001616_010.png
Processed: 00018750_000.png
Processed: 00011322_001.png
Processed: 00019863_013.png
Processed: 00008930_004.png
Processed: 00000131_002.png
Processed: 00029225_000.png
Processed: 00016414_002.png
Processed: 00019396_000.png
Processed: 00008394_001.png
Processed: 00026635_000.png
Processed: 00014365_001.png
Processed: 00017216_

Processed: 00013962_006.png
Processed: 00007688_000.png
Processed: 00006481_032.png
Processed: 00010936_001.png
Processed: 00006674_001.png
Processed: 00013615_050.png
Processed: 00014335_000.png
Processed: 00015587_000.png
Processed: 00029914_002.png
Processed: 00022815_009.png
Processed: 00027415_074.png
Processed: 00011849_000.png
Processed: 00001096_001.png
Processed: 00029379_000.png
Processed: 00014771_012.png
Processed: 00014650_001.png
Processed: 00014470_004.png
Processed: 00027296_003.png
Processed: 00011911_000.png
Processed: 00012215_003.png
Processed: 00011424_006.png
Processed: 00022283_037.png
Processed: 00018278_001.png
Processed: 00010436_004.png
Processed: 00016414_000.png
Processed: 00018062_004.png
Processed: 00007442_017.png
Processed: 00011857_002.png
Processed: 00025144_000.png
Processed: 00014706_023.png
Processed: 00026492_000.png
Processed: 00023823_000.png
Processed: 00005413_001.png
Processed: 00015770_034.png
Processed: 00023325_028.png
Processed: 00013992_

Processed: 00019982_001.png
Processed: 00004470_008.png
Processed: 00022834_001.png
Processed: 00016175_007.png
Processed: 00017232_000.png
Processed: 00008822_001.png
Processed: 00004381_029.png
Processed: 00029347_000.png
Processed: 00027221_001.png
Processed: 00016841_002.png
Processed: 00009611_004.png
Processed: 00024323_000.png
Processed: 00020640_000.png
Processed: 00012491_001.png
Processed: 00015882_000.png
Processed: 00013520_022.png
Processed: 00019024_001.png
Processed: 00028509_016.png
Processed: 00025869_000.png
Processed: 00017138_022.png
Processed: 00014626_029.png
Processed: 00022312_012.png
Processed: 00006481_012.png
Processed: 00024210_000.png
Processed: 00016205_009.png
Processed: 00018233_016.png
Processed: 00027685_000.png
Processed: 00028871_004.png
Processed: 00000796_008.png
Processed: 00028876_017.png
Processed: 00008676_000.png
Processed: 00007558_010.png
Processed: 00020703_033.png
Processed: 00010994_004.png
Processed: 00006993_001.png
Processed: 00004533_

Processed: 00007045_002.png
Processed: 00000711_002.png
Processed: 00020106_007.png
Processed: 00001708_000.png
Processed: 00027645_000.png
Processed: 00007274_000.png
Processed: 00011827_014.png
Processed: 00003111_000.png
Processed: 00021506_001.png
Processed: 00009365_002.png
Processed: 00005066_016.png
Processed: 00006436_004.png
Processed: 00002889_000.png
Processed: 00016414_001.png
Processed: 00020374_003.png
Processed: 00001517_013.png
Processed: 00024800_000.png
Processed: 00012628_020.png
Processed: 00007082_000.png
Processed: 00001582_023.png
Processed: 00020582_000.png
Processed: 00020038_000.png
Processed: 00025068_000.png
Processed: 00002320_004.png
Processed: 00004824_011.png
Processed: 00016406_005.png
Processed: 00027585_000.png
Processed: 00024439_000.png
Processed: 00016673_000.png
Processed: 00015401_007.png
Processed: 00028020_009.png
Processed: 00023924_000.png
Processed: 00006851_025.png
Processed: 00016291_017.png
Processed: 00022672_004.png
Processed: 00021201_

Processed: 00009811_005.png
Processed: 00004006_068.png
Processed: 00010389_000.png
Processed: 00012532_007.png
Processed: 00007098_001.png
Processed: 00006875_028.png
Processed: 00006304_007.png
Processed: 00027484_000.png
Processed: 00022832_023.png
Processed: 00026318_002.png
Processed: 00014827_003.png
Processed: 00021212_010.png
Processed: 00020699_018.png
Processed: 00007551_018.png
Processed: 00003087_000.png
Processed: 00001831_000.png
Processed: 00009636_001.png
Processed: 00004858_022.png
Processed: 00015799_016.png
Processed: 00012987_004.png
Processed: 00017635_007.png
Processed: 00022186_000.png
Processed: 00020158_009.png
Processed: 00003534_002.png
Processed: 00014121_000.png
Processed: 00022741_000.png
Processed: 00028890_007.png
Processed: 00029919_002.png
Processed: 00005681_033.png
Processed: 00003973_006.png
Processed: 00020213_027.png
Processed: 00014772_000.png
Processed: 00002742_000.png
Processed: 00020883_001.png
Processed: 00029915_003.png
Processed: 00019682_

Processed: 00018462_000.png
Processed: 00023128_012.png
Processed: 00008309_005.png
Processed: 00013741_000.png
Processed: 00009863_001.png
Processed: 00023325_032.png
Processed: 00015658_006.png
Processed: 00008251_010.png
Processed: 00027074_000.png
Processed: 00006441_000.png
Processed: 00011741_001.png
Processed: 00001203_011.png
Processed: 00003291_002.png
Processed: 00019750_025.png
Processed: 00010437_000.png
Processed: 00015383_001.png
Processed: 00022528_021.png
Processed: 00007321_012.png
Processed: 00010535_014.png
Processed: 00005742_006.png
Processed: 00004847_000.png
Processed: 00017882_000.png
Processed: 00018949_006.png
Processed: 00000013_029.png
Processed: 00011074_000.png
Processed: 00009330_001.png
Processed: 00011370_005.png
Processed: 00000116_032.png
Processed: 00013662_030.png
Processed: 00014626_009.png
Processed: 00025479_002.png
Processed: 00011456_004.png
Processed: 00023719_001.png
Processed: 00009323_004.png
Processed: 00003226_000.png
Processed: 00022282_

Processed: 00010949_002.png
Processed: 00017972_003.png
Processed: 00013175_017.png
Processed: 00028871_002.png
Processed: 00015558_003.png
Processed: 00005015_006.png
Processed: 00011507_002.png
Processed: 00003989_024.png
Processed: 00010508_001.png
Processed: 00000096_000.png
Processed: 00006725_005.png
Processed: 00005066_002.png
Processed: 00011003_011.png
Processed: 00010563_017.png
Processed: 00013601_022.png
Processed: 00019706_014.png
Processed: 00028044_006.png
Processed: 00025746_002.png
Processed: 00004746_004.png
Processed: 00029150_000.png
Processed: 00011212_001.png
Processed: 00011956_000.png
Processed: 00026779_000.png
Processed: 00007735_048.png
Processed: 00020642_001.png
Processed: 00013555_004.png
Processed: 00020400_000.png
Processed: 00011140_005.png
Processed: 00021772_008.png
Processed: 00005066_060.png
Processed: 00008305_000.png
Processed: 00023066_001.png
Processed: 00014424_001.png
Processed: 00023325_013.png
Processed: 00008404_000.png
Processed: 00014018_

Processed: 00013062_002.png
Processed: 00018530_009.png
Processed: 00017425_006.png
Processed: 00013625_032.png
Processed: 00020241_000.png
Processed: 00011215_000.png
Processed: 00005009_000.png
Processed: 00019042_000.png
Processed: 00006935_000.png
Processed: 00011119_000.png
Processed: 00000579_001.png
Processed: 00011507_000.png
Processed: 00017955_000.png
Processed: 00006481_022.png
Processed: 00007627_004.png
Processed: 00001385_012.png
Processed: 00003875_014.png
Processed: 00014996_018.png
Processed: 00012184_000.png
Processed: 00021189_000.png
Processed: 00012455_001.png
Processed: 00010756_002.png
Processed: 00017511_010.png
Processed: 00003028_057.png
Processed: 00013467_023.png
Processed: 00007498_001.png
Processed: 00005398_002.png
Processed: 00004342_054.png
Processed: 00013370_002.png
Processed: 00020945_024.png
Processed: 00015255_008.png
Processed: 00019070_006.png
Processed: 00002704_017.png
Processed: 00009458_006.png
Processed: 00027260_017.png
Processed: 00015401_

Processed: 00013358_000.png
Processed: 00006375_002.png
Processed: 00006218_001.png
Processed: 00003028_027.png
Processed: 00006549_000.png
Processed: 00028640_008.png
Processed: 00013670_157.png
Processed: 00008436_002.png
Processed: 00007581_000.png
Processed: 00021796_004.png
Processed: 00016333_007.png
Processed: 00007018_042.png
Processed: 00028509_008.png
Processed: 00008012_001.png
Processed: 00003787_000.png
Processed: 00009191_000.png
Processed: 00004132_008.png
Processed: 00006875_001.png
Processed: 00005641_001.png
Processed: 00010815_003.png
Processed: 00021685_000.png
Processed: 00004843_004.png
Processed: 00028400_001.png
Processed: 00027867_001.png
Processed: 00005532_019.png
Processed: 00015509_000.png
Processed: 00029391_000.png
Processed: 00006534_000.png
Processed: 00020312_026.png
Processed: 00004725_000.png
Processed: 00020703_022.png
Processed: 00016291_034.png
Processed: 00013236_000.png
Processed: 00018187_010.png
Processed: 00004893_061.png
Processed: 00027261_

Processed: 00005641_003.png
Processed: 00014995_000.png
Processed: 00022282_002.png
Processed: 00000662_004.png
Processed: 00001836_058.png
Processed: 00004342_030.png
Processed: 00023890_000.png
Processed: 00005066_057.png
Processed: 00029336_003.png
Processed: 00006166_000.png
Processed: 00015714_002.png
Processed: 00002524_030.png
Processed: 00003989_000.png
Processed: 00028974_017.png
Processed: 00004206_000.png
Processed: 00015497_000.png
Processed: 00012931_016.png
Processed: 00014420_000.png
Processed: 00014706_020.png
Processed: 00000013_045.png
Processed: 00000032_001.png
Processed: 00015376_019.png
Processed: 00012646_008.png
Processed: 00015692_000.png
Processed: 00003610_015.png
Processed: 00005666_004.png
Processed: 00005089_042.png
Processed: 00004533_014.png
Processed: 00009826_001.png
Processed: 00011424_004.png
Processed: 00017671_000.png
Processed: 00012158_020.png
Processed: 00001384_001.png
Processed: 00017818_000.png
Processed: 00019706_010.png
Processed: 00007735_

Processed: 00025382_000.png
Processed: 00004553_002.png
Processed: 00001917_002.png
Processed: 00022322_000.png
Processed: 00002359_007.png
Processed: 00010767_014.png
Processed: 00028948_001.png
Processed: 00016651_000.png
Processed: 00009257_000.png
Processed: 00023080_007.png
Processed: 00028871_000.png
Processed: 00029510_000.png
Processed: 00017686_000.png
Processed: 00007735_025.png
Processed: 00003797_000.png
Processed: 00004634_002.png
Processed: 00002552_001.png
Processed: 00028368_001.png
Processed: 00000627_001.png
Processed: 00028632_001.png
Processed: 00014287_000.png
Processed: 00010165_001.png
Processed: 00017511_007.png
Processed: 00016052_021.png
Processed: 00015414_019.png
Processed: 00016484_002.png
Processed: 00026018_000.png
Processed: 00021886_001.png
Processed: 00001373_002.png
Processed: 00012184_003.png
Processed: 00011904_001.png
Processed: 00022215_010.png
Processed: 00027706_019.png
Processed: 00021860_005.png
Processed: 00006751_000.png
Processed: 00000823_

Processed: 00004533_022.png
Processed: 00027976_000.png
Processed: 00016273_000.png
Processed: 00012628_058.png
Processed: 00015173_010.png
Processed: 00005365_028.png
Processed: 00019961_001.png
Processed: 00020968_000.png
Processed: 00017138_046.png
Processed: 00027931_000.png
Processed: 00008316_000.png
Processed: 00007525_000.png
Processed: 00021107_001.png
Processed: 00017524_023.png
Processed: 00010007_011.png
Processed: 00028861_007.png
Processed: 00004381_045.png
Processed: 00025494_000.png
Processed: 00018644_003.png
Processed: 00017138_038.png
Processed: 00019390_001.png
Processed: 00009600_009.png
Processed: 00002395_028.png
Processed: 00009798_015.png
Processed: 00000511_001.png
Processed: 00015646_026.png
Processed: 00000360_007.png
Processed: 00025078_000.png
Processed: 00014223_001.png
Processed: 00029007_002.png
Processed: 00012845_001.png
Processed: 00011995_000.png
Processed: 00016302_000.png
Processed: 00010092_024.png
Processed: 00017915_000.png
Processed: 00015770_

In [8]:
# Save preprocessed images
with h5py.File('preprocessed_xrays.h5', 'w') as hf:
    hf.create_dataset('images', data=X, compression='gzip')
    hf.create_dataset('labels', data=y)
    # Store filenames as strings
    dt = h5py.string_dtype(encoding='utf-8')
    hf.create_dataset('filenames', data=image_names, dtype=dt)

print("Data saved to HDF5 file")

Data saved to HDF5 file
