In [2]:
# emnist_denoise_compare.py
import os
import random
import json
import numpy as np
import cv2
import torch
from torchvision.datasets import EMNIST
from torchvision import transforms
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
from pathlib import Path

def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)

def median_filter(img, k=3):
    img_uint8 = (img * 255).astype(np.uint8)
    return cv2.medianBlur(img_uint8, k) / 255.0

def adaptive_threshold(img):
    img_uint8 = (img * 255).astype(np.uint8)
    th = cv2.adaptiveThreshold(img_uint8, 255,
                               cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                               cv2.THRESH_BINARY, 11, 2)
    return th / 255.0

def morphological_opening(img, k=3):
    img_uint8 = (img * 255).astype(np.uint8)
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (k, k))
    opened = cv2.morphologyEx(img_uint8, cv2.MORPH_OPEN, kernel)
    return opened / 255.0

def clahe_equalize(img):
    img_uint8 = (img * 255).astype(np.uint8)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
    return clahe.apply(img_uint8) / 255.0

def non_local_means(img):
    img_uint8 = (img * 255).astype(np.uint8)
    return cv2.fastNlMeansDenoising(img_uint8, None, h=10, templateWindowSize=7, searchWindowSize=21) / 255.0

def l2_error(a, b):
    return np.mean((a - b)**2)

def show_and_save_grid(imgs, titles, out_path, cmap='gray'):
    n = len(imgs)
    fig, axs = plt.subplots(1, n, figsize=(3 * n, 3))
    if n == 1:
        axs = [axs]
    for ax, img, title in zip(axs, imgs, titles):
        ax.imshow(img, cmap=cmap, vmin=0, vmax=1)
        ax.set_title(title)
        ax.axis('off')
    plt.tight_layout()
    plt.savefig(out_path, dpi=150)
    plt.close(fig)

def main():
    set_seed(123)
    out_dir = Path("denoise_comparison_outputs")
    out_dir.mkdir(exist_ok=True, parents=True)

    transform = transforms.Compose([transforms.ToTensor()])
    ds = EMNIST(root='data', split='balanced', train=False, download=True, transform=transform)

    # pick N random samples
    N = 5
    idxs = np.random.choice(len(ds), size=N, replace=False)
    summary = {}

    for i, idx in enumerate(idxs):
        img_tensor, label = ds[idx]
        img = img_tensor.squeeze(0).numpy()  # [H,W], in [0,1]
        # apply methods
        med = median_filter(img)
        th = adaptive_threshold(img)
        opened = morphological_opening(img)
        clahe = clahe_equalize(img)
        nlm = non_local_means(img)

        # compute errors
        errors = {
            'Median': l2_error(img, med),
            'AdaptiveTh': l2_error(img, th),
            'Opening': l2_error(img, opened),
            'CLAHE': l2_error(img, clahe),
            'NLM': l2_error(img, nlm),
        }
        summary[f'sample_{i}'] = {
            'index': int(idx),
            'label': int(label),
            'errors': errors
        }

        # visualize
        imgs = [img, med, th, opened, clahe, nlm]
        titles = ['Original', 'Median', 'AdaptiveTh', 'Opening', 'CLAHE', 'Non-localMeans']
        out_path = out_dir / f"sample_{i}_label_{label}.png"
        show_and_save_grid(imgs, titles, out_path)
        print(f"[{i}] idx={idx} label={label} errors={errors} saved visualization to {out_path}")

    # dump summary
    with open(out_dir / "summary.json", "w") as f:
        json.dump(summary, f, indent=2)
    print(f"Saved summary to {out_dir / 'summary.json'}")

if __name__ == "__main__":
    main()


[0] idx=1626 label=30 errors={'Median': 0.0028126349464996404, 'AdaptiveTh': 0.18940185825344155, 'Opening': 0.007214454737865688, 'CLAHE': 0.018098494208127418, 'NLM': 1.412330098576953e-06} saved visualization to denoise_comparison_outputs/sample_0_label_30.png
[1] idx=15177 label=2 errors={'Median': 0.0024084536721603184, 'AdaptiveTh': 0.18653928156257088, 'Opening': 0.022668734705964585, 'CLAHE': 0.015944711142068028, 'NLM': 7.453955260327664e-07} saved visualization to denoise_comparison_outputs/sample_1_label_2.png
[2] idx=2755 label=4 errors={'Median': 0.0037604846898375193, 'AdaptiveTh': 0.32814706228050483, 'Opening': 0.01426349009268466, 'CLAHE': 0.01635985754806527, 'NLM': 3.923134844544271e-08} saved visualization to denoise_comparison_outputs/sample_2_label_4.png
[3] idx=14243 label=2 errors={'Median': 0.0026396049077055464, 'AdaptiveTh': 0.17394381245859813, 'Opening': 0.0050411931069168445, 'CLAHE': 0.015315479108874553, 'NLM': 1.0788628259811316e-05} saved visualization