In [3]:
import cv2
import numpy as np
import json
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
import os
import csv

In [5]:
def rgb_to_cmyk(rgb):
    rgb_normalized = rgb / 255.0
    K = 1 - np.max(rgb_normalized, axis=1)
    C = (1 - rgb_normalized[:, 0] - K) / (1 - K + 1e-10)
    M = (1 - rgb_normalized[:, 1] - K) / (1 - K + 1e-10)
    Y = (1 - rgb_normalized[:, 2] - K) / (1 - K + 1e-10)
    CMYK = np.stack((C, M, Y, K), axis=1) * 100 
    return CMYK

def extract_dominant_colors(image_path, annotations, num_colors=6):
    filename = os.path.basename(image_path)
    image_annotations = annotations.get(filename, None)
    
    if not image_annotations:
        print(f"No annotations found for image {filename}.")
        return np.array([]), 0, 0, 0, 0, 0

    image = cv2.imread(image_path)
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    regions = image_annotations.get("regions", {})

    for region_key, region in regions.items():
        segmentation = region["shape_attributes"]
        polygon = np.array(list(zip(segmentation["all_points_x"], segmentation["all_points_y"])), dtype=np.int32)

        mask = np.zeros(image.shape[:2], dtype=np.uint8)
        cv2.fillPoly(mask, [polygon], 255)

        region_pixels_rgb = cv2.bitwise_and(image_rgb, image_rgb, mask=mask)
        pixels = region_pixels_rgb.reshape(-1, 3)
        pixels = pixels[np.any(pixels != [0, 0, 0], axis=1)]  # Remove black pixels

        if len(pixels) > 0:
            # RGB to LAB color
            region_lab = cv2.cvtColor(region_pixels_rgb, cv2.COLOR_RGB2LAB)
            pixels_lab = region_lab.reshape(-1, 3)
            pixels_lab = pixels_lab[np.any(region_pixels_rgb.reshape(-1, 3) != [0, 0, 0], axis=1)]

            L = pixels_lab[:, 0] * (100 / 255.0)
            A = pixels_lab[:, 1] - 128
            B = pixels_lab[:, 2] - 128

            avg_L = np.mean(L)
            avg_A = np.mean(A)
            avg_B = np.mean(B)

            kmeans = KMeans(n_clusters=min(num_colors, len(pixels)))
            kmeans.fit(pixels)
            colors = kmeans.cluster_centers_.astype(int)
            colors_cmyk = rgb_to_cmyk(colors)
            print("Cluster Colors (CMYK):", colors)

            counts = np.bincount(kmeans.labels_)
            total_pixels = np.sum(counts)
            cluster_proportions = counts / total_pixels

            yellowness_values = colors_cmyk[:, 2]
            WAY = np.dot(cluster_proportions, yellowness_values)

            return colors, WAY, avg_L, avg_A, avg_B
        else:
            return np.array([]), 0, 0, 0, 0, 0


def plot_color_wheel(colors):
    if colors.size == 0:
        print("No colors detected in the specified region.")
        return

    fig, ax = plt.subplots(figsize=(6, 6), subplot_kw=dict(polar=True))
    num_colors = len(colors)
    angles = np.linspace(0, 2 * np.pi, num_colors, endpoint=False).tolist()
    angles += angles[:1]
    color_patches = ['#%02x%02x%02x' % tuple(color) for color in colors]
    for i in range(num_colors):
        ax.bar(angles[i], 1, width=2*np.pi/num_colors, color=color_patches[i])
    ax.set_axis_off()
    plt.show()

def process_images_in_folder(folder_path, annotations_path, output_csv, num_colors=6):
    with open(annotations_path) as f:
        annotations = json.load(f)

    with open(output_csv, mode='w', newline='', encoding='utf-8') as csvfile:
        writer = csv.writer(csvfile)
    
        writer.writerow(['Image'] + [f'Cluster {i + 1}' for i in range(num_colors)] + ["Classification"])

        for filename in os.listdir(folder_path):
            if filename.lower().endswith(('png', 'jpg', 'jpeg', 'bmp', 'tiff')):
                image_path = os.path.join(folder_path, filename)
                dominant_colors, WAY, avg_L, avg_A, avg_B= extract_dominant_colors(image_path, annotations, num_colors)

                if dominant_colors.size > 0:
                    clusters = [f'{color[0]}-{color[1]}-{color[2]}' for color in dominant_colors]
                    writer.writerow([filename] + clusters + ["Healthy"])
                else:
                    print(f"Skipping image {filename} due to no valid regions or colors.")

if __name__ == "__main__":
    folder_path = "./train/train N"
    annotations_path = "./anno/annotations_healthy.json"
    output_csv = "healthy_features.csv"

    process_images_in_folder(folder_path, annotations_path, output_csv, num_colors=6)


Cluster Colors (CMYK): [[167 146 136]
 [204 190 177]
 [107  82  73]
 [185 170 158]
 [143 120 110]
 [208 203 191]]
Cluster Colors (CMYK): [[153 131 120]
 [207 191 178]
 [124  97  87]
 [179 159 146]
 [195 178 164]
 [208 203 192]]
Cluster Colors (CMYK): [[210 200 189]
 [143 121 110]
 [185 168 155]
 [114  88  82]
 [167 146 135]
 [203 187 174]]
Cluster Colors (CMYK): [[209 201 190]
 [148 121 111]
 [170 147 137]
 [188 169 156]
 [118  87  78]
 [207 188 174]]
Cluster Colors (CMYK): [[205 188 174]
 [167 144 133]
 [208 201 189]
 [187 168 155]
 [118  92  82]
 [141 118 109]]
Cluster Colors (CMYK): [[211 193 185]
 [111  91  83]
 [249 244 237]
 [155 140 132]
 [188 167 159]
 [224 215 210]]
Cluster Colors (CMYK): [[207 189 181]
 [ 91  73  66]
 [250 243 237]
 [133 116 110]
 [175 158 149]
 [223 216 209]]
Cluster Colors (CMYK): [[224 209 205]
 [153 121 118]
 [178 152 153]
 [122  81  74]
 [205 184 182]
 [247 234 230]]
Cluster Colors (CMYK): [[138  81  69]
 [173 121 113]
 [125  65  53]
 [153 104  90]
 [182

In [7]:
import json
import os
import numpy as np
import cv2
from sklearn.cluster import KMeans

def rgb_to_cmyk(rgb):
    rgb_normalized = rgb / 255.0
    K = 1 - np.max(rgb_normalized, axis=1)
    C = (1 - rgb_normalized[:, 0] - K) / (1 - K + 1e-10)
    M = (1 - rgb_normalized[:, 1] - K) / (1 - K + 1e-10)
    Y = (1 - rgb_normalized[:, 2] - K) / (1 - K + 1e-10)
    CMYK = np.stack((C, M, Y, K), axis=1) * 100  # Convert to percentage
    return CMYK

def extract_dominant_colors(image_path, annotations, num_colors=6):
    image = cv2.imread(image_path)
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    filename = os.path.basename(image_path)
    image_id = None
    for img in annotations['images']:
        if img['file_name'] == filename:
            image_id = img['id']
            break
    
    if image_id is None:
        print(f"No image found in annotations for {filename}.")
        return np.array([]), 0, 0, 0, 0, 0

    # Find the annotations related to this image
    image_annotations = [ann for ann in annotations['annotations'] if ann['image_id'] == image_id]

    if not image_annotations:
        print(f"No annotations found for image {filename}.")
        return np.array([]), 0, 0, 0, 0, 0

    for annotation in image_annotations:
        segmentation = annotation['segmentation']
        if isinstance(segmentation, list) and isinstance(segmentation[0], list):
            polygon = np.array(segmentation[0]).reshape(-1, 2).astype(np.int32)
        
        mask = np.zeros(image.shape[:2], dtype=np.uint8)
        cv2.fillPoly(mask, [polygon], 255)

        region_pixels_rgb = cv2.bitwise_and(image_rgb, image_rgb, mask=mask)
        pixels = region_pixels_rgb.reshape(-1, 3)
        pixels = pixels[np.any(pixels != [0, 0, 0], axis=1)]  # Remove black pixels

        if len(pixels) > 0:
            region_lab = cv2.cvtColor(region_pixels_rgb, cv2.COLOR_RGB2LAB)
            pixels_lab = region_lab.reshape(-1, 3)
            pixels_lab = pixels_lab[np.any(region_pixels_rgb.reshape(-1, 3) != [0, 0, 0], axis=1)]

            L = pixels_lab[:, 0] * (100 / 255.0)
            A = pixels_lab[:, 1] - 128
            B = pixels_lab[:, 2] - 128

            avg_L = np.mean(L)
            avg_A = np.mean(A)
            avg_B = np.mean(B)

            kmeans = KMeans(n_clusters=min(num_colors, len(pixels)))
            kmeans.fit(pixels)
            colors = kmeans.cluster_centers_.astype(int)
            colors_cmyk = rgb_to_cmyk(colors)
            print("Cluster Colors (CMYK):", colors)

            counts = np.bincount(kmeans.labels_)
            total_pixels = np.sum(counts)
            cluster_proportions = counts / total_pixels

            yellowness_values = colors_cmyk[:, 2]
            WAY = np.dot(cluster_proportions, yellowness_values)

            return colors, WAY, avg_L, avg_A, avg_B, 0
        else:
            return np.array([]), 0, 0, 0, 0, 0

def process_images_in_folder(folder_path, annotations_path, output_csv, num_colors=6):
    with open(annotations_path) as f:
        annotations = json.load(f)

    with open(output_csv, mode='w', newline='', encoding='utf-8') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(['Filename'] + [f'Cluster {i + 1}' for i in range(num_colors)] + ["Classification"])

        for filename in os.listdir(folder_path):
            if filename.lower().endswith(('png', 'jpg', 'jpeg', 'bmp', 'tiff')):
                image_path = os.path.join(folder_path, filename)
                dominant_colors, WAY, avg_L, avg_A, avg_B, _ = extract_dominant_colors(image_path, annotations, num_colors)

                if dominant_colors.size > 0:
                    clusters = [f'{color[0]}-{color[1]}-{color[2]}' for color in dominant_colors]
                    writer.writerow([filename] + clusters + ["Jaundiced"])
                else:
                    print(f"Skipping image {filename} due to no valid regions or colors.")

if __name__ == "__main__":
    folder_path = "./train/train J"
    annotations_path = "./anno/annotations_Jaundice.json"
    output_csv = "jaundiced_features.csv"

    process_images_in_folder(folder_path, annotations_path, output_csv, num_colors=6)


Cluster Colors (CMYK): [[154  75  19]
 [225 185 122]
 [188 130  61]
 [172 105  38]
 [214 169 107]
 [203 151  83]]
Cluster Colors (CMYK): [[123  54  18]
 [205 155  90]
 [167 105  41]
 [189 131  63]
 [218 178 116]
 [153  76  22]]
Cluster Colors (CMYK): [[250 246 191]
 [136 108  72]
 [212 191 142]
 [ 94  62  30]
 [237 220 175]
 [175 156 109]]
Cluster Colors (CMYK): [[248 239 183]
 [212 192 142]
 [135 100  61]
 [183 159 110]
 [235 217 167]
 [252 250 192]]
Cluster Colors (CMYK): [[190 131  61]
 [222 179 115]
 [154  78  20]
 [207 155  87]
 [102  58  21]
 [165 105  43]]
Cluster Colors (CMYK): [[177 112  43]
 [211 161  94]
 [195 139  69]
 [221 179 117]
 [125  76  31]
 [158  75  17]]
Cluster Colors (CMYK): [[225 184 119]
 [172 106  38]
 [189 130  60]
 [154  74  17]
 [213 166 103]
 [201 149  81]]
Cluster Colors (CMYK): [[222 177 101]
 [231 203 149]
 [154 119  89]
 [243 221 171]
 [203 152  92]
 [231 190 117]]
Cluster Colors (CMYK): [[235 198 129]
 [160 115  74]
 [243 219 169]
 [200 149  92]
 [225