In [None]:
import cv2
import numpy as np
import os
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from PIL import Image

In [None]:
# Function to extract features from a single image
def extract_features(image):
    features = []
    height, width = image.shape[:2]

    # Define patch size and stride
    patch_size = 7
    stride = 1

    for y in range(0, height - patch_size + 1, stride):
        for x in range(0, width - patch_size + 1, stride):
            patch = image[y:y+patch_size, x:x+patch_size]
            mean_intensity = np.mean(patch)
            variance_intensity = np.var(patch)
            features.append([mean_intensity, variance_intensity])

    return features

# Function to process a set of images
def process_images(data_dir):
    feature_vectors = []

    for root, dirs, files in os.walk(data_dir):
        for file in files:
            if file.endswith('.jpg') or file.endswith('.png'):
                image_path = os.path.join(root, file)
                image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)

                # Extract features from the image
                image_features = extract_features(image)
                feature_vectors.extend(image_features)

    return feature_vectors

# Define paths to your dataset (training and test data)
train_data_dir = './cell_input/Train'
test_data_dir_1 = './cell_input/Test1'
test_data_dir_2 = './cell_input/Test2'
test_data_dir_3 = './cell_input/Test3'

# Define the output directory for saving feature vectors
output_dir = './cell_output/'
os.makedirs(output_dir, exist_ok=True)

In [None]:
# # Process for segmentation and feature extraction

# train_feature_vectors = process_images(train_data_dir)
# train_feature_vectors = np.array(train_feature_vectors)

# test1_feature_vectors = process_images(test_data_dir_1)
# test1_feature_vectors = np.array(test1_feature_vectors)

# test2_feature_vectors = process_images(test_data_dir_2)
# test2_feature_vectors = np.array(test2_feature_vectors)

# test3_feature_vectors = process_images(test_data_dir_3)
# test3_feature_vectors = np.array(test3_feature_vectors)

In [None]:
# # Save train feature vectors to a text file
# np.savetxt(output_dir + 'train_feature_vectors.txt', train_feature_vectors)

# # Save test feature vectors to a text file
# np.savetxt(output_dir + 'test1_feature_vectors.txt', test1_feature_vectors)
# np.savetxt(output_dir + 'test2_feature_vectors.txt', test2_feature_vectors)
# np.savetxt(output_dir + 'test3_feature_vectors.txt', test3_feature_vectors)

In [None]:
# Read train feature vectors from the text file and convert to a NumPy array
train_feature_vectors = np.loadtxt(output_dir + 'train_feature_vectors.txt')

# Read test feature vectors from text files and convert to NumPy arrays
test1_feature_vectors = np.loadtxt(output_dir + 'test1_feature_vectors.txt')
test2_feature_vectors = np.loadtxt(output_dir + 'test2_feature_vectors.txt')
test3_feature_vectors = np.loadtxt(output_dir + 'test3_feature_vectors.txt')

In [None]:
print("Shape of loaded_feature_vectors:", train_feature_vectors.shape)
print("Data type of train_feature_vectors:", type(train_feature_vectors))

print("Shape of loaded_feature_vectors:", test1_feature_vectors.shape)
print("Data type of train_feature_vectors:", type(test1_feature_vectors))

print("Shape of loaded_feature_vectors:", test2_feature_vectors.shape)
print("Data type of train_feature_vectors:", type(test2_feature_vectors))

print("Shape of loaded_feature_vectors:", test3_feature_vectors.shape)
print("Data type of train_feature_vectors:", type(test3_feature_vectors))

In [None]:
def initialize_centers(data, num_clusters):
    # Randomly initialize cluster centers
    np.random.shuffle(data)
    return data[:num_clusters]

def assign_to_clusters(data, centers):
    # Assign each data point to the nearest cluster
    distances = np.linalg.norm(data[:, np.newaxis] - centers, axis=2)
    return np.argmin(distances, axis=1)

def update_centers(data, cluster_assignments, num_clusters):
    new_centers = np.zeros((num_clusters, data.shape[1]))
    for cluster in range(num_clusters):
        cluster_points = data[cluster_assignments == cluster]
        if len(cluster_points) > 0:
            new_centers[cluster] = np.mean(cluster_points, axis=0)
    return new_centers

def k_means(data, num_clusters, max_iterations):
    centers = initialize_centers(data, num_clusters)
    for iteration in range(max_iterations):
        cluster_assignments = assign_to_clusters(data, centers)
        new_centers = update_centers(data, cluster_assignments, num_clusters)
        if np.all(centers == new_centers):
            print(iteration)
            break
        centers = new_centers
    return centers, cluster_assignments

num_clusters = 3
max_iterations = 100
final_centers, cluster_assignments = k_means(train_feature_vectors.copy(), num_clusters, max_iterations)

In [None]:
# from sklearn.cluster import KMeans

# num_clusters = 3
# max_iterations = 100

# # Assuming you have a training dataset train_feature_vectors
# # You should replace 'train_feature_vectors' with your actual data
# # Make sure train_feature_vectors is a 2D array-like object where each row represents a data point

# # Create a KMeans model
# kmeans = KMeans(n_clusters=num_clusters, max_iter=max_iterations)

# # Fit the model to your data
# kmeans.fit(train_feature_vectors.copy())

# # Get the final cluster centers
# final_centers = kmeans.cluster_centers_

# # Get the cluster assignments for each data point
# cluster_assignments = kmeans.labels_

In [None]:
print(final_centers)
print(len(cluster_assignments))
# [[ 220.34620054   15.3150673 ]
#  [ 185.9805921   424.53356587]
#  [ 147.27253844 1423.03213372]]

In [None]:
# Define the cluster colors and labels
colors = ['darkred', 'darkgreen', 'darkblue']
cluster_labels = ['Cluster 1', 'Cluster 2', 'Cluster 3']
# Visualize K-means results:
plt.figure(figsize=(8, 8))
for i in range(num_clusters):
    plt.scatter(train_feature_vectors[cluster_assignments == 0, 0], train_feature_vectors[cluster_assignments == 0, 1], c=colors[i], label=cluster_labels[i], s=10)  # Reduce the point size

plt.scatter(final_centers[:, 0], final_centers[:, 1], c='yellow', marker='x', s=100, label='Final Centers')

plt.title('K-means Clustering')
plt.legend()

# Set X and Y axis labels
plt.xlabel('Mean')
plt.ylabel('Variance')

plt.show()

In [None]:
# Define the cluster colors and labels
colors = ['darkred', 'darkgreen', 'darkblue']
cluster_labels = ['Cluster 1', 'Cluster 2', 'Cluster 3']
# Visualize K-means results:
plt.figure(figsize=(8, 8))
for i in range(num_clusters):
    plt.scatter(train_feature_vectors[cluster_assignments == 1, 0], train_feature_vectors[cluster_assignments == 1, 1], c=colors[i], label=cluster_labels[i], s=10)  # Reduce the point size

plt.scatter(final_centers[:, 0], final_centers[:, 1], c='yellow', marker='x', s=100, label='Final Centers')

plt.title('K-means Clustering')
plt.legend()

# Set X and Y axis labels
plt.xlabel('Mean')
plt.ylabel('Variance')

plt.show()

In [None]:
# Define the cluster colors and labels
colors = ['darkred', 'darkgreen', 'darkblue']
cluster_labels = ['Cluster 1', 'Cluster 2', 'Cluster 3']
# Visualize K-means results:
plt.figure(figsize=(8, 8))
for i in range(num_clusters):
    plt.scatter(train_feature_vectors[cluster_assignments == 2, 0], train_feature_vectors[cluster_assignments == 2, 1], c=colors[i], label=cluster_labels[i], s=10)  # Reduce the point size

plt.scatter(final_centers[:, 0], final_centers[:, 1], c='yellow', marker='x', s=100, label='Final Centers')

plt.title('K-means Clustering')
plt.legend()

# Set X and Y axis labels
plt.xlabel('Mean')
plt.ylabel('Variance')

plt.show()

In [None]:
c1, c2, c3 = 0,0,0
for num in cluster_assignments:
    if num == 0:
        c1+=1
    elif num == 1:
        c2+=1
    else:
        c3+=1

print(f"{c1}  {c2}  {c3}")

In [None]:
# Define the cluster colors and labels
colors = ['darkred', 'darkgreen', 'darkblue']
cluster_labels = ['Cluster 1', 'Cluster 2', 'Cluster 3']
# Visualize K-means results:
plt.figure(figsize=(8, 8))
for i in range(num_clusters):
    plt.scatter(train_feature_vectors[cluster_assignments == i, 0], train_feature_vectors[cluster_assignments == i, 1], c=colors[i], label=cluster_labels[i], s=10)  # Reduce the point size

plt.scatter(final_centers[:, 0], final_centers[:, 1], c='yellow', marker='x', s=100, label='Final Centers')

plt.title('K-means Clustering')
plt.legend()

# Set X and Y axis labels
plt.xlabel('Mean')
plt.ylabel('Variance')

plt.show()

In [None]:
def mahalanobis_distance(x, center, covariance):
    diff = x - center
    inv_covariance = np.linalg.inv(covariance)
    mahalanobis_dist = np.dot(diff.T, np.dot(inv_covariance, diff))
    return np.sqrt(mahalanobis_dist)

def assign_to_mahalanobis_clusters(data, centers, covariances):
    num_clusters = len(centers)
    distances = np.zeros((data.shape[0], num_clusters))
    for i in range(num_clusters):
        for j in range(data.shape[0]):
            distances[j, i] = mahalanobis_distance(data[j], centers[i], covariances[i])
    return np.argmin(distances, axis=1)

def update_mahalanobis_centers(data, cluster_assignments, num_clusters):
    new_centers = np.zeros((num_clusters, data.shape[1]))
    covariances = []
    for cluster in range(num_clusters):
        cluster_points = data[cluster_assignments == cluster]
        if len(cluster_points) > 0:
            new_centers[cluster] = np.mean(cluster_points, axis=0)
            covariances.append(np.cov(cluster_points, rowvar=False))
        else:
            # If a cluster has no assigned data points, keep the same center and use identity covariance.
            covariances.append(np.eye(data.shape[1]))
    return new_centers, covariances

def modified_k_means(data, num_clusters, max_iterations, initial_centers=None):
    covariances = [np.eye(data.shape[1]) for _ in range(num_clusters)]  # Initialize with identity covariance matrices

    for iteration in range(max_iterations):
        cluster_assignments = assign_to_mahalanobis_clusters(data, initial_centers, covariances)
        new_centers, new_covariances = update_mahalanobis_centers(data, cluster_assignments, num_clusters)

        initial_centers, covariances = new_centers, new_covariances

    return initial_centers, cluster_assignments, new_covariances

num_clusters = 3
max_iterations = 2
final_mahalanobis_centers, mahalanobis_cluster_assignments, final_covariances = modified_k_means(train_feature_vectors.copy(), num_clusters, max_iterations, final_centers.copy())

In [None]:
print(final_mahalanobis_centers)
print(len(mahalanobis_cluster_assignments))

In [None]:
# Visualize Modified K-means results:
plt.figure(figsize=(8, 8))
for i in range(num_clusters):
    plt.scatter(train_feature_vectors[mahalanobis_cluster_assignments == i, 0], train_feature_vectors[mahalanobis_cluster_assignments == i, 1], c=colors[i], label=cluster_labels[i], s=10)  # Reduce the point size

plt.scatter(final_mahalanobis_centers[:, 0], final_mahalanobis_centers[:, 1], c='yellow', marker='x', s=100, label='Final Centers')

plt.title('Modified K-means Clustering with Mahalanobis Distance')
plt.legend()

# Set X and Y axis labels
plt.xlabel('Mean')
plt.ylabel('Variance')

plt.show()

In [None]:
# Apply clustering assignments to test data
test1_assignments_kmeans = assign_to_clusters(test1_feature_vectors, final_centers)
test1_assignments_modified = assign_to_mahalanobis_clusters(test1_feature_vectors, final_mahalanobis_centers, final_covariances)

test2_assignments_kmeans = assign_to_clusters(test2_feature_vectors, final_centers)
test2_assignments_modified = assign_to_mahalanobis_clusters(test2_feature_vectors, final_mahalanobis_centers, final_covariances)

test3_assignments_kmeans = assign_to_clusters(test3_feature_vectors, final_centers)
test3_assignments_modified = assign_to_mahalanobis_clusters(test3_feature_vectors, final_mahalanobis_centers, final_covariances)

In [None]:
# Function to create and display a segmented image
def display_segmented_image(assignments, title, image_path=None):
    # Define custom RGB colors for 0, 1, and 2 (dark red, dark green, dark blue)
    custom_colors = [(0.5, 0, 0), (0, 0.5, 0), (0, 0, 0.5)]  # You can adjust the darkness if needed

    # Create a ListedColormap using the custom colors
    custom_cmap = ListedColormap(custom_colors)
    image_shape = (506, 506)
    image_data = assignments.reshape(image_shape)

    fig = plt.figure(figsize=(12, 6))  # Adjust the figure size as needed
    
    # Create a subplot for the image
    if image_path is not None:
        plt.subplot(1, 2, 1)
        img = Image.open(image_path)
        plt.imshow(img, cmap="gray")
        plt.axis('off')

    # Create a subplot for the segmented image
    plt.subplot(1, 2, 2)
    plt.imshow(image_data, cmap=custom_cmap, vmin=0, vmax=2)  # Use the custom colormap
    plt.axis('off')  # Turn off axis labels

    fig.subplots_adjust(wspace=0.1)  # Adjust the gap between subplots

    # Add the title in the middle of the figure with a gap
    fig.suptitle(title, fontsize=16, ha='center', va='center')

    plt.show()

In [None]:
display_segmented_image(np.array(test1_assignments_kmeans), "Segmented Test 1 Image using K Means for K = 3","./cell_input/Test1/5.png")
display_segmented_image(np.array(test1_assignments_modified), "Segmented Test 1 Image using Modified K Means for K = 3","./cell_input/Test1/5.png")
display_segmented_image(np.array(test2_assignments_kmeans), "Segmented Test 2 Image using K Means for K = 3","./cell_input/Test2/10.png")
display_segmented_image(np.array(test2_assignments_modified), "Segmented Test 2 Image using Modified K Means for K = 3","./cell_input/Test2/10.png")
display_segmented_image(np.array(test3_assignments_kmeans), "Segmented Test 3 Image using K Means for K = 3","./cell_input/Test3/15.png")
display_segmented_image(np.array(test3_assignments_modified), "Segmented Test 3 Image using Modified K Means for K = 3","./cell_input/Test3/15.png")

In [None]:
def visualize_kmeans_results(feature_vectors, cluster_assignments, title):
    num_clusters = len(cluster_labels)

    plt.figure(figsize=(8, 8))
    for i in range(num_clusters):
        plt.scatter(feature_vectors[cluster_assignments == i, 0], feature_vectors[cluster_assignments == i, 1], c=colors[i], label=cluster_labels[i])

    plt.title(title)
    plt.legend()

    # Set X and Y axis labels
    plt.xlabel('Mean')
    plt.ylabel('Variance')

    plt.show()

In [None]:

# Load or generate your image data and provide it as the 'image' parameter

# For test1
visualize_kmeans_results(test1_feature_vectors, test1_assignments_kmeans, "K-means Clustering")
visualize_kmeans_results(test1_feature_vectors, test1_assignments_modified, "Modified K-means Clustering with Mahalanobis Distance")

# For test2
visualize_kmeans_results(test2_feature_vectors, test2_assignments_kmeans, "K-means Clustering")
visualize_kmeans_results(test2_feature_vectors, test2_assignments_modified, "Modified K-means Clustering with Mahalanobis Distance")

# For test3
visualize_kmeans_results(test3_feature_vectors, test3_assignments_kmeans, "K-means Clustering")
visualize_kmeans_results(test3_feature_vectors, test3_assignments_modified, "Modified K-means Clustering with Mahalanobis Distance")


In [None]:
# Function to extract pixel-based features from a single image
def extract_pixel_features(image):
    features = []

    height, width = image.shape[:2]

    for y in range(height):
        for x in range(width):
            pixel_value = image[y, x]
            features.append(pixel_value)

    return features

# Function to process a set of images and create pixel-based feature vectors
def process_images(data_dir):
    feature_vectors = []

    for root, dirs, files in os.walk(data_dir):
        for file in files:
            if file.endswith('.jpg') or file.endswith('.png'):
                image_path = os.path.join(root, file)
                image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)

                # Extract pixel-based features from the image
                pixel_features = extract_pixel_features(image)
                feature_vectors.extend(pixel_features)

    return feature_vectors


In [None]:
# Process for segmentation and feature extraction

test1_feature_vectors_pixel = process_images(test_data_dir_1)
test1_feature_vectors_pixel = np.array(test1_feature_vectors_pixel)

test2_feature_vectors_pixel = process_images(test_data_dir_2)
test2_feature_vectors_pixel = np.array(test2_feature_vectors_pixel)

test3_feature_vectors_pixel = process_images(test_data_dir_3)
test3_feature_vectors_pixel = np.array(test3_feature_vectors_pixel)

In [None]:
# Save test feature vectors to a text file
np.savetxt(output_dir + 'test1_feature_vectors_pixel.txt', test1_feature_vectors_pixel)
np.savetxt(output_dir + 'test2_feature_vectors_pixel.txt', test2_feature_vectors_pixel)
np.savetxt(output_dir + 'test3_feature_vectors_pixel.txt', test3_feature_vectors_pixel)

In [None]:
# Read test feature vectors from text files and convert to NumPy arrays
test1_feature_vectors_pixel = np.loadtxt(output_dir + 'test1_feature_vectors_pixel.txt')
test2_feature_vectors_pixel = np.loadtxt(output_dir + 'test2_feature_vectors_pixel.txt')
test3_feature_vectors_pixel = np.loadtxt(output_dir + 'test3_feature_vectors_pixel.txt')

In [None]:
print("Shape of loaded_feature_vectors_pixel:", test1_feature_vectors_pixel.shape)
print("Data type of train_feature_vectors_pixel:", type(test1_feature_vectors_pixel))

print("Shape of loaded_feature_vectors_pixel:", test2_feature_vectors_pixel.shape)
print("Data type of train_feature_vectors_pixel:", type(test2_feature_vectors_pixel))

print("Shape of loaded_feature_vectors_pixel:", test3_feature_vectors_pixel.shape)
print("Data type of train_feature_vectors_pixel:", type(test3_feature_vectors_pixel))

In [None]:
def assign_to_clusters(data, centers):
    # Calculate distances between each pixel value and the 0th column (first dimension) of cluster centers
    distances = np.abs(data - centers)  # Calculate absolute differences
    return np.argmin(distances, axis=1)

# Function to create and display a segmented image
def display_segmented_image_pixels(assignments, title, image_path=None):
    # Define custom RGB colors for 0, 1, and 2 (dark red, dark green, dark blue)
    custom_colors = [(0.5, 0, 0), (0, 0.5, 0), (0, 0, 0.5)]  # You can adjust the darkness if needed

    # Create a ListedColormap using the custom colors
    custom_cmap = ListedColormap(custom_colors)
    image_shape = (512, 512)
    image_data = assignments.reshape(image_shape)

    fig = plt.figure(figsize=(12, 6))  # Adjust the figure size as needed
    
    # Create a subplot for the image
    if image_path is not None:
        plt.subplot(1, 2, 1)
        img = Image.open(image_path)
        plt.imshow(img, cmap="gray")
        plt.axis('off')

    # Create a subplot for the segmented image
    plt.subplot(1, 2, 2)
    plt.imshow(image_data, cmap=custom_cmap, vmin=0, vmax=2)  # Use the custom colormap
    plt.axis('off')  # Turn off axis labels

    fig.subplots_adjust(wspace=0.1)  # Adjust the gap between subplots

    # Add the title in the middle of the figure with a gap
    fig.suptitle(title, fontsize=16, ha='center', va='center')

    plt.show()

# For example, if you want to compare against the 0th column:
reshaped_data = test1_feature_vectors_pixel.reshape(-1, 1)
test1_assignments_kmeans_pixel = assign_to_clusters(reshaped_data, final_centers[:, 0])

# For example, if you want to compare against the 0th column:
reshaped_data = test2_feature_vectors_pixel.reshape(-1, 1)
test2_assignments_kmeans_pixel = assign_to_clusters(reshaped_data, final_centers[:, 0])

# For example, if you want to compare against the 0th column:
reshaped_data = test3_feature_vectors_pixel.reshape(-1, 1)
test3_assignments_kmeans_pixel = assign_to_clusters(reshaped_data, final_centers[:, 0])


In [None]:
display_segmented_image_pixels(np.array(test1_assignments_kmeans_pixel), "Segmented Test 1 Image using K Means for K = 3","./cell_input/Test1/5.png")
# display_segmented_image(np.array(test1_assignments_modified), "Segmented Test 1 Image using Modified K Means for K = 3","./cell_input/Test1/5.png")
display_segmented_image_pixels(np.array(test2_assignments_kmeans_pixel), "Segmented Test 2 Image using K Means for K = 3","./cell_input/Test2/10.png")
# display_segmented_image(np.array(test2_assignments_modified), "Segmented Test 2 Image using Modified K Means for K = 3","./cell_input/Test2/10.png")
display_segmented_image_pixels(np.array(test3_assignments_kmeans_pixel), "Segmented Test 3 Image using K Means for K = 3","./cell_input/Test3/15.png")
# display_segmented_image(np.array(test3_assignments_modified), "Segmented Test 3 Image using Modified K Means for K = 3","./cell_input/Test3/15.png")