In [None]:
# Display the first 10 original images from the dataset folder in a grid for visual inspection
import os
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
from tqdm import tqdm
import zipfile

# Here's the original dataset: https://www.kaggle.com/datasets/bjoernjostein/ptb-xl-ecg-image-gmc2024
# The dataset contains ~21.671 images split into folders like 00000, 01000, etc.,
# with each folder holding around 1.000 images for easier organization and access
folder_path = '/kaggle/input/ptb-xl-ecg-image-gmc2024/00000'

image_files = sorted([f for f in os.listdir(folder_path) if f.endswith(('.png', '.jpg', '.jpeg'))])
image_files = image_files[:10]

plt.figure(figsize=(20, 8))

for i, img_file in enumerate(image_files):
    img_path = os.path.join(folder_path, img_file)
    img = Image.open(img_path)
    
    plt.subplot(2, 5, i+1)
    plt.imshow(img, cmap='gray')
    plt.title(img_file)
    plt.axis('off')

plt.tight_layout()
plt.show()

In [None]:
# The cropped images have a size of 2200 x 1265 to focus on the signal.
# Initially, cropping was done using a mobile editor with a 16:9 ratio,
# but several adjustments were made to ensure the signal was fully captured.
# The final size of 2200 x 1265 was found to be the optimal crop dimension.

folder_path = '/kaggle/input/ptb-xl-ecg-image-gmc2024/00000'
image_files = sorted([f for f in os.listdir(folder_path) if f.endswith(('.png', '.jpg', '.jpeg'))])
image_files = image_files[:10]

plt.figure(figsize=(20, 8))

for i, img_file in enumerate(image_files):
    img_path = os.path.join(folder_path, img_file)
    img = Image.open(img_path)
    
    width, height = img.size
    left = 0
    top = height - 1265
    right = 2200
    bottom = height
    
    if right > width:
        right = width
    if top < 0:
        top = 0
    
    cropped_img = img.crop((left, top, right, bottom))
    
    plt.subplot(2, 5, i+1)
    plt.imshow(cropped_img, cmap='gray')
    plt.title(img_file)
    plt.axis('off')

plt.tight_layout()
plt.show()

In [None]:
# Code to calculate average brightness and contrast
folder_path = '/kaggle/input/ptb-xl-ecg-image-gmc2024'
brightness_list = []
contrast_list = []

# Get all image files from all subfolders
image_paths = []
for root, _, files in os.walk(folder_path):
    for file in files:
        if file.lower().endswith(('.png', '.jpg', '.jpeg')):
            image_paths.append(os.path.join(root, file))

# Loop through all images
for path in tqdm(image_paths, desc='Processing images'):
    with Image.open(path) as img:
        gray = img.convert('L')  # Convert image to grayscale
        arr = np.array(gray, dtype=np.float32)

        brightness = arr.mean()  # Calculate average brightness
        contrast = arr.std()     # Calculate standard deviation (contrast)

        brightness_list.append(brightness)
        contrast_list.append(contrast)

# Calculate average brightness and contrast for the entire dataset
mean_brightness = np.mean(brightness_list)
mean_contrast = np.mean(contrast_list)

print(f"Average brightness: {mean_brightness:.2f}")
print(f"Average contrast: {mean_contrast:.2f}")

In [None]:
# Target brightness and contrast
target_brightness = 200
target_contrast = 15.0

# These paths are for processing the 00000 folder.
# Repeat the same process with updated paths for other folders like 01000, 02000, etc., to standardize all images
input_folder = '/kaggle/input/ptb-xl-ecg-image-gmc2024/00000'
temp_output_folder = '/kaggle/working/temp_images' 
final_zip_path = '/kaggle/working/00000.zip'

os.makedirs(temp_output_folder, exist_ok=True)

def adjust_brightness_contrast(img_array, target_brightness, target_contrast):
    mean_img = img_array.mean()
    std_img = img_array.std()
    if std_img == 0:
        return img_array.astype(np.uint8)
    norm = (img_array - mean_img) * (target_contrast / std_img) + target_brightness
    norm = np.clip(norm, 0, 255)
    return norm.astype(np.uint8)

image_files = [f for f in os.listdir(input_folder) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]

for file in tqdm(image_files, desc='Processing images'):
    path = os.path.join(input_folder, file)
    with Image.open(path) as img:
        width, height = img.size
        crop_top = height - 1265
        crop_left = (width - 2200) // 2
        cropped = img.crop((crop_left, crop_top, crop_left + 2200, height))

        resized = cropped.resize((224, 224), resample=Image.LANCZOS)

        gray = resized.convert('L')
        arr = np.array(gray, dtype=np.float32)

        adjusted_arr = adjust_brightness_contrast(arr, target_brightness, target_contrast)
        adjusted_img = Image.fromarray(adjusted_arr)
        adjusted_img.save(os.path.join(temp_output_folder, file))

# Create a ZIP file from temp_output_folder
with zipfile.ZipFile(final_zip_path, 'w') as zipf:
    for root, _, files in os.walk(temp_output_folder):
        for file in files:
            file_path = os.path.join(root, file)
            arcname = os.path.relpath(file_path, temp_output_folder)
            zipf.write(file_path, arcname)

print("Done! All images have been standardized for brightness and contrast, and saved in a ZIP file.")