In [1]:
import os
import cv2
import numpy as np

In [10]:
def extract_color_histogram(patch, bins=8):
    hist_features = []
    for i in range(3):
        channel = patch[:,:,i]
        hist, _ = np.histogram(channel,bins=bins, range=(0,256))
        hist =  hist.astype("float") / (patch.shape[0]*patch.shape[1])
        hist_features.extend(hist)
    return np.array(hist_features)

In [11]:
def extract_greyscale_histogram(patch, bins=8):
    hist_features = []
    channel = patch[:,:]
    hist, _ = np.histogram(channel,bins=bins, range=(0,256))
    hist =  hist.astype("float") / (patch.shape[0]*patch.shape[1])
    hist_features.extend(hist)
    return np.array(hist_features)

In [12]:
def pad_image_to_multiple(img, patch_size=32, is_color=False):
    if is_color:
        h, w, c = img.shape
        new_img = np.zeros((
            ((h + patch_size - 1) // patch_size) * patch_size,
            ((w + patch_size - 1) // patch_size) * patch_size,
            3
        ), dtype=img.dtype)
        new_img[:h, :w, :] = img
    else:
        h, w = img.shape
        new_img = np.zeros((
            ((h + patch_size - 1) // patch_size) * patch_size,
            ((w + patch_size - 1) // patch_size) * patch_size
        ), dtype=img.dtype)
        new_img[:h, :w] = img
    return new_img

In [13]:
def process_image(image_path, patch_size=32):
    img = cv2.imread(image_path, cv2.IMREAD_UNCHANGED)
    if img is None:
        raise ValueError(f"Could not read image: {image_path}")

    if len(img.shape) == 2 or (len(img.shape) == 3 and img.shape[2] == 1):
        # Grayscale image
        img = pad_image_to_multiple(img, patch_size=patch_size, is_color=False)
        features = []
        for y in range(0, img.shape[0], patch_size):
            for x in range(0, img.shape[1], patch_size):
                patch = img[y:y + patch_size, x:x + patch_size]
                features.append(extract_grayscale_histogram(patch))
        return np.stack(features)
    else:
        # Color image
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = pad_image_to_multiple(img, patch_size=patch_size, is_color=True)
        features = []
        for y in range(0, img.shape[0], patch_size):
            for x in range(0, img.shape[1], patch_size):
                patch = img[y:y + patch_size, x:x + patch_size]
                features.append(extract_color_histogram(patch))
        return np.stack(features)

In [25]:
def process_dataset(input_dir, patch_size=32):
    class_names = os.listdir(input_dir)
    for class_name in class_names:
        class_path = os.path.join(input_dir, class_name)
        processed_folder = os.path.join(class_path,"processed")
        os.makedirs(processed_folder,exist_ok=True)
        
        for image_name in os.listdir(class_path):
            image_path = os.path.join(class_path,image_name)
            if(os.path.isfile(image_path)):
                features = process_image(image_path, patch_size)
                output_file = os.path.join(processed_folder,os.path.splitext(image_name)[0]+".npy")
                np.save(output_file,features)
                print(f"Processed : {output_file}")
    
process_dataset(r"../Group04-SUN397/group04/train/")

Processed : ../Group04-SUN397/group04/train/atrium_public\processed\sun_aaepnczurcpxcpgk.npy
Processed : ../Group04-SUN397/group04/train/atrium_public\processed\sun_acpggluskildlivv.npy
Processed : ../Group04-SUN397/group04/train/atrium_public\processed\sun_acvolvfbonmmvewo.npy
Processed : ../Group04-SUN397/group04/train/atrium_public\processed\sun_ajbuwfqzqpcursof.npy
Processed : ../Group04-SUN397/group04/train/atrium_public\processed\sun_akypvrysfcrbtaeu.npy
Processed : ../Group04-SUN397/group04/train/atrium_public\processed\sun_arcajpcsxcgmnmkb.npy
Processed : ../Group04-SUN397/group04/train/atrium_public\processed\sun_axlbraofdeijprwh.npy
Processed : ../Group04-SUN397/group04/train/atrium_public\processed\sun_ayfxrgkfvqdtwwvm.npy
Processed : ../Group04-SUN397/group04/train/atrium_public\processed\sun_backlwgvjateapur.npy
Processed : ../Group04-SUN397/group04/train/atrium_public\processed\sun_bbokfmmphonmpkxr.npy
Processed : ../Group04-SUN397/group04/train/atrium_public\processed\su

Processed : ../Group04-SUN397/group04/train/general_store_outdoor\processed\sun_bsozvhsdghtqjzec.npy
Processed : ../Group04-SUN397/group04/train/general_store_outdoor\processed\sun_bswykbxmrypzjuhi.npy
Processed : ../Group04-SUN397/group04/train/general_store_outdoor\processed\sun_btimhillnztiwbtg.npy
Processed : ../Group04-SUN397/group04/train/general_store_outdoor\processed\sun_btjmibqgdplvnnnl.npy
Processed : ../Group04-SUN397/group04/train/general_store_outdoor\processed\sun_bumysmhhawnxxdbq.npy
Processed : ../Group04-SUN397/group04/train/general_store_outdoor\processed\sun_bvyyptcxqcokckfp.npy
Processed : ../Group04-SUN397/group04/train/general_store_outdoor\processed\sun_bwtocflsmiliiaae.npy
Processed : ../Group04-SUN397/group04/train/general_store_outdoor\processed\sun_bxktoineveycirme.npy
Processed : ../Group04-SUN397/group04/train/general_store_outdoor\processed\sun_bxthyvborkgqckxd.npy
Processed : ../Group04-SUN397/group04/train/general_store_outdoor\processed\sun_byukyxqgfqj

In [26]:
process_dataset(r"../Group04-SUN397/group04/test/")

Processed : ../Group04-SUN397/group04/test/atrium_public\processed\sun_ajwxcjuvsuivtizh.npy
Processed : ../Group04-SUN397/group04/test/atrium_public\processed\sun_aljgsphxyjqrnifw.npy
Processed : ../Group04-SUN397/group04/test/atrium_public\processed\sun_aurxzcbkrewmblcd.npy
Processed : ../Group04-SUN397/group04/test/atrium_public\processed\sun_ausmncqifigawxdy.npy
Processed : ../Group04-SUN397/group04/test/atrium_public\processed\sun_avayfwkofwkboktt.npy
Processed : ../Group04-SUN397/group04/test/atrium_public\processed\sun_avzfxfdrlltkuepc.npy
Processed : ../Group04-SUN397/group04/test/atrium_public\processed\sun_aymllcxypikztcgg.npy
Processed : ../Group04-SUN397/group04/test/atrium_public\processed\sun_aywekxidnqhyqwmk.npy
Processed : ../Group04-SUN397/group04/test/atrium_public\processed\sun_badwqvsdrmyxsqgs.npy
Processed : ../Group04-SUN397/group04/test/atrium_public\processed\sun_baoobknwttwgizdc.npy
Processed : ../Group04-SUN397/group04/test/atrium_public\processed\sun_batbgslaf

Processed : ../Group04-SUN397/group04/test/general_store_outdoor\processed\sun_bobdvgpuvtimandb.npy
Processed : ../Group04-SUN397/group04/test/general_store_outdoor\processed\sun_boreohelddwowcin.npy
Processed : ../Group04-SUN397/group04/test/general_store_outdoor\processed\sun_bpivwlsvnwgrtclf.npy
Processed : ../Group04-SUN397/group04/test/general_store_outdoor\processed\sun_brhpupiowlvjhbal.npy
Processed : ../Group04-SUN397/group04/test/general_store_outdoor\processed\sun_brmmkazxnlvxglwr.npy
Processed : ../Group04-SUN397/group04/test/general_store_outdoor\processed\sun_brrmcyvcniksoblh.npy
Processed : ../Group04-SUN397/group04/test/general_store_outdoor\processed\sun_brudcnzseuwwfuqn.npy
Processed : ../Group04-SUN397/group04/test/general_store_outdoor\processed\sun_btjtipciedkytgic.npy
Processed : ../Group04-SUN397/group04/test/general_store_outdoor\processed\sun_btpxrvpihwkowgnl.npy
Processed : ../Group04-SUN397/group04/test/general_store_outdoor\processed\sun_bvekhoelbwrkubps.npy


In [27]:
def extract_mean_std_features(image_path, patch_size=7,step=1):
    img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    if img is None:
        raise ValueError(f"Could not read image: {image_path}")
    h, w = img.shape
    features = []
    
    for y in range(0, h - patch_size + 1, step):
        for x in range(0, w - patch_size + 1, step):
            patch = img[y:y+patch_size, x:x+patch_size]
            mean_val = np.mean(patch)
            std_val = np.std(patch)
            features.append([mean_val, std_val])

    return np.array(features)

In [29]:
def process_dataset(input_dir, patch_size=7, step=1):
    processed_folder=os.path.join(input_dir,"processed")
    os.makedirs(processed_folder,exist_ok=True)
    for image_name in os.listdir(input_dir):
        image_path = os.path.join(input_dir,image_name)
        if(os.path.isfile(image_path)):
            features = extract_mean_std_features(image_path, patch_size, step)
            output_file = os.path.join(processed_folder, os.path.splitext(image_name)[0] + ".npy")
            np.save(output_file,features)
            print(f"Processed: {output_file}")

process_dataset(r"../Group04-cervical_cytology_images/group04/Train/")

Processed: ../Group04-cervical_cytology_images/group04/Train/processed\106.npy
Processed: ../Group04-cervical_cytology_images/group04/Train/processed\107.npy
Processed: ../Group04-cervical_cytology_images/group04/Train/processed\108.npy
Processed: ../Group04-cervical_cytology_images/group04/Train/processed\109.npy
Processed: ../Group04-cervical_cytology_images/group04/Train/processed\110.npy
Processed: ../Group04-cervical_cytology_images/group04/Train/processed\111.npy
Processed: ../Group04-cervical_cytology_images/group04/Train/processed\112.npy
Processed: ../Group04-cervical_cytology_images/group04/Train/processed\113.npy
Processed: ../Group04-cervical_cytology_images/group04/Train/processed\114.npy
Processed: ../Group04-cervical_cytology_images/group04/Train/processed\115.npy
Processed: ../Group04-cervical_cytology_images/group04/Train/processed\116.npy
Processed: ../Group04-cervical_cytology_images/group04/Train/processed\117.npy
Processed: ../Group04-cervical_cytology_images/group

In [30]:
process_dataset(r"../Group04-cervical_cytology_images/group04/Test/")

Processed: ../Group04-cervical_cytology_images/group04/Test/processed\14.npy
Processed: ../Group04-cervical_cytology_images/group04/Test/processed\49.npy
Processed: ../Group04-cervical_cytology_images/group04/Test/processed\54.npy
