In [None]:
# Import libraries
from skimage import io, color, exposure, filters,segmentation
import numpy as np
from scipy.ndimage import morphology,measurements,sum
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from PIL import Image, ImageOps, ImageFilter
import PIL
import cv2
import numpy as np
from sklearn.preprocessing import normalize
from google.colab.patches import cv2_imshow
from skimage.morphology import disk
from skimage import measure
import skimage.morphology
import os
# importing libraries
import tensorflow
import keras
import glob
import random
%matplotlib inline

In [None]:
def otsu_thresholding(image):
   M, N = image.shape  # Image dimensions

   # Calculate probabilities for each intensity level using histogram
   histogram, bins = np.histogram(image.flatten(), bins=256, range=(0, 256))
   probabilities = histogram / (M * N)

   # Initialize variables
   output_class_probabilities = [0, 0]  # Probabilities of classes
   output_class_means = [0, 0]  # Means of classes
   threshold_range = range(256)
   between_class_variance = 0

   # Iterate through all possible thresholds
   for t in threshold_range:
       # Calculate weights and means of classes for threshold t
       weight1 = np.sum(probabilities[:t])
       weight2 = np.sum(probabilities[t:])
       mean1 = np.sum(np.arange(t) * probabilities[:t]) / weight1 if weight1 > 0 else 0
       mean2 = np.sum(np.arange(t, 256) * probabilities[t:]) / weight2 if weight2 > 0 else 0

       # Calculate within-class variance for threshold t
       within_class_variance = weight1 * (mean1 - np.mean(image))**2 + weight2 * (mean2 - np.mean(image))**2

       # Update best threshold and between-class variance
       if between_class_variance < within_class_variance:
           between_class_variance = within_class_variance
           best_threshold = t

   # Apply thresholding
   thresholded_image = np.where(image < best_threshold, 0, 1)

   return thresholded_image

In [None]:
def apply_morphological_opening(thresholded_image, SE_size=5):

    SE = np.ones((SE_size, SE_size))  # Create structuring element

    # Apply erosion followed by dilation
    opened_image = morphology.binary_dilation(
        morphology.binary_erosion(thresholded_image, SE), SE
    )

    return opened_image

In [None]:
def calculate_mcr(opened_image):
    # Label connected components (blobs)
    labeled_array, num_features = measurements.label(opened_image)

    # Calculate the size of each blob
    blob_sizes = measurements.sum(opened_image, labeled_array, index=range(1, num_features + 1))

    # Find the blob with the maximum size
    max_blob_size = np.max(blob_sizes)
    max_blob_index = np.argmax(blob_sizes) + 1  # Adjust for 0-based indexing

    # Calculate the total number of pixels in the image
    total_pixels = opened_image.size

    # Calculate the MCR
    mcr = max_blob_size / total_pixels

    return mcr

In [None]:
def remove_isolated_components(opened_image, mcr_threshold=0.8):
    # Label connected components (blobs)
    labeled_array, num_features = measure.label(opened_image, connectivity=2, background=0,return_num = True)  # Use 8-connectivity

    # Calculate the size of each blob
    blob_sizes = measurements.sum(opened_image, labeled_array, index=range(1, num_features + 1))

    # Find the blob with the maximum size
    max_blob_size = np.max(blob_sizes)
    max_blob_index = np.argmax(blob_sizes) + 1  # Adjust for 0-based indexing

    # Calculate the total number of pixels in the image
    total_pixels = opened_image.size

    # Calculate the MCR
    mcr = max_blob_size / total_pixels

    # Identify isolated components based on MCR
    isolated_labels = np.where(blob_sizes / max_blob_size < mcr_threshold)[0] + 1  # Adjust for 0-based indexing

    # Remove isolated components from the image
    filtered_image = np.where(np.isin(labeled_array, isolated_labels), 0, opened_image)

    return filtered_image

In [None]:
def detect_wbc(img):
    cmyk_img = img.convert('CMYK')

    # Convert to grayscale
    img_gray = img.convert('L')

    c = cmyk_img.getchannel('C')
    img_gray = cv2.cvtColor(np.array(img), cv2.COLOR_BGR2GRAY)

    # Apply Otsu's thresholding method
    thresholded_image = otsu_thresholding(img_gray)

    # Remove small particles and noise
    opening = apply_morphological_opening(thresholded_image)

    # Calculates the MCR (Maximum Clump Size Ratio) of an image.
    mcr = calculate_mcr(opening)
    # Removes isolated components from a binary image using the maximum object algorithm and MCR.
    filtered_image = remove_isolated_components(opening,mcr)

    # Calculate the moments of the WBC with the largest center of mass
    moments = cv2.moments(filtered_image/255)
    x = int(moments['m10']/moments['m00'])
    y = int(moments['m01']/moments['m00'])

    # Identify the maximum WBC diameter and draw a circle around the nucleus
    distances = np.sqrt((np.arange(filtered_image.shape[0])[:,None] - x)**2 + (np.arange(filtered_image.shape[1]) - y)**2)
    diameter = np.max(distances[filtered_image > 0])
    radius = np.max(distances[filtered_image > 0])/2
    cimg = cv2.circle(np.array(img_gray), (x,y), int(radius), 255, 2)


    # Create a bounding box around the circle
    min_x = max(0, int(x - radius))
    max_x = min(filtered_image.shape[1], int(x + radius))
    min_y = max(0, int(y - radius))
    max_y = min(filtered_image.shape[0], int(y + radius))

    # Draw a rectangle (bounding box) around the circle
    cv2.rectangle(np.array(img), (min_x, min_y), (max_x, max_y), 255, 2)

    # Crop the region of interest (ROI) from the original image based on the bounding box
    roi = np.array(img)[min_y:max_y, min_x:max_x]

    return roi


In [None]:
# Set the directory path
directory_path = '/content/drive/MyDrive/Datasets/images/'

# Get a list of file paths in the directory
file_paths = []
file_names = []
for root, directories, files in os.walk(directory_path):
    for filename in files:
        file_path = os.path.join(root, filename)
        image = Image.open(file_path)
        roi = detect_wbc(image)
        name = filename[:-4]
        name = int(name[-3:])
        Image.fromarray(roi).save(f'/content/drive/MyDrive/Datasets/gray_roi/{name}.jpg')

  opened_image = morphology.binary_dilation(
  morphology.binary_erosion(thresholded_image, SE), SE
  labeled_array, num_features = measurements.label(opened_image)
  blob_sizes = measurements.sum(opened_image, labeled_array, index=range(1, num_features + 1))


In [None]:
# Load train.csv
train_data = pd.read_csv('/content/drive/MyDrive/Datasets/train.csv')

# Path to the directory containing images
image_dir = '/content/drive/MyDrive/Datasets/images/'

# Preprocess images with histogram equalization
def preprocess_images(image_names):
    processed_images = []
    for image_name in image_names:
        image_path = f'{image_dir}BloodImage_{image_name:05d}.jpg'
        image = Image.open(image_path)
        roi = detect_wbc(image)
        Image.SAVE(f'/content/drive/MyDrive/Datasets/c_roi/{image_name}.jpg')


