In [1]:
import cv2
import numpy as np
from matplotlib import pyplot as plt
import os

def advanced_preprocessing_method(image_path):
    """
    Advanced method with bilateral filtering and edge-aware processing
    """
    # Load image in grayscale
    img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    
    # Bilateral filtering to preserve edges while removing noise
    filtered = cv2.bilateralFilter(img, 9, 75, 75)
    
    # Apply Otsu's thresholding
    _, binary = cv2.threshold(filtered, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    
    # Connected component analysis to remove small noise
    num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(
        cv2.bitwise_not(binary), connectivity=8)
    
    # Filter components by area (remove very small components)
    min_area = 50  # Adjust based on your needs
    filtered_img = np.zeros_like(binary)
    
    for i in range(1, num_labels):  # Skip background (label 0)
        area = stats[i, cv2.CC_STAT_AREA]
        if area > min_area:
            filtered_img[labels == i] = 255
    
    # Morphological closing to smooth lines
    kernel_close = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (2, 2))
    closed = cv2.morphologyEx(filtered_img, cv2.MORPH_CLOSE, kernel_close)
    
    # Invert for final output
    inverted = cv2.bitwise_not(closed)
    
    return inverted


def process_single_image(input_path, output_path):
    """
    Process a single image
    """
    processed = advanced_preprocessing_method(input_path)
    cv2.imwrite(output_path, processed)
    print(f"Processed: {input_path} -> {output_path}")


def process_all_datasets(base_path='Dataset', output_base='Preprocessed_Dataset_advanced'):
    """
    Process all ECG images in the dataset folders using advanced method
    """
    # Define folders
    folders = [
        'ECG Images of Myocardial Infarction Patients',
        'ECG Images of Patient that have abnormal heartbeat',
        'ECG Images of Patient that have History of MI',
        'Normal Person ECG Images'
    ]
    
    # Create output directory
    os.makedirs(output_base, exist_ok=True)
    
    total_processed = 0
    total_failed = 0
    
    for folder in folders:
        input_folder = os.path.join(base_path, folder)
        output_folder = os.path.join(output_base, folder)
        
        # Check if folder exists
        if not os.path.exists(input_folder):
            print(f"Warning: Folder not found - {input_folder}")
            continue
        
        # Create output folder
        os.makedirs(output_folder, exist_ok=True)
        
        # Get all image files
        image_extensions = ['.png', '.jpg', '.jpeg', '.bmp', '.tiff']
        image_files = [f for f in os.listdir(input_folder) 
                      if os.path.splitext(f.lower())[1] in image_extensions]
        
        print(f"\nProcessing: {folder}")
        print(f"Found {len(image_files)} images")
        
        # Process each image
        for idx, img_file in enumerate(image_files, 1):
            try:
                input_path = os.path.join(input_folder, img_file)
                output_path = os.path.join(output_folder, img_file)
                
                # Apply preprocessing
                processed = advanced_preprocessing_method(input_path)
                
                # Save processed image
                cv2.imwrite(output_path, processed)
                total_processed += 1
                
                # Print progress every 10 images
                if idx % 10 == 0 or idx == len(image_files):
                    print(f"  Progress: {idx}/{len(image_files)} images processed")
                
            except Exception as e:
                print(f"\nError processing {img_file}: {str(e)}")
                total_failed += 1
    
    print(f"\n{'='*60}")
    print(f"Processing Complete!")
    print(f"Total images processed: {total_processed}")
    print(f"Total images failed: {total_failed}")
    print(f"Output saved to: {output_base}")
    print(f"{'='*60}")


def visualize_result(image_path):
    """
    Visualize original and processed image side by side
    """
    # Load original
    original = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    
    # Process
    processed = advanced_preprocessing_method(image_path)
    
    # Display
    fig, axes = plt.subplots(1, 2, figsize=(15, 6))
    
    axes[0].imshow(original, cmap='gray')
    axes[0].set_title('Original Image')
    axes[0].axis('off')
    
    axes[1].imshow(processed, cmap='gray')
    axes[1].set_title('Advanced Method Result')
    axes[1].axis('off')
    
    plt.tight_layout()
    plt.savefig('advanced_result.png', dpi=300, bbox_inches='tight')
    plt.show()
    print("Visualization saved as 'advanced_result.png'")


# Usage examples
if __name__ == "__main__":
    # Example 1: Process a single image
    # process_single_image('path/to/image.png', 'output.png')
    
    # Example 2: Visualize one image
    # visualize_result('path/to/image.png')
    
    # Example 3: Process all datasets
    process_all_datasets(base_path='../Datasets/Dataset_cropped', 
                        output_base='../Datasets/Dataset_without_background')


Processing: ECG Images of Myocardial Infarction Patients
Found 239 images
  Progress: 10/239 images processed
  Progress: 20/239 images processed
  Progress: 30/239 images processed
  Progress: 40/239 images processed
  Progress: 50/239 images processed
  Progress: 60/239 images processed
  Progress: 70/239 images processed
  Progress: 80/239 images processed
  Progress: 90/239 images processed
  Progress: 100/239 images processed
  Progress: 110/239 images processed
  Progress: 120/239 images processed
  Progress: 130/239 images processed
  Progress: 140/239 images processed
  Progress: 150/239 images processed
  Progress: 160/239 images processed
  Progress: 170/239 images processed
  Progress: 180/239 images processed
  Progress: 190/239 images processed
  Progress: 200/239 images processed
  Progress: 210/239 images processed
  Progress: 220/239 images processed
  Progress: 230/239 images processed
  Progress: 239/239 images processed

Processing: ECG Images of Patient that have a