**Clustering**

In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
import cv2
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.models import Model
from sklearn.cluster import AgglomerativeClustering
from scipy.cluster.hierarchy import dendrogram, linkage

# Load Pretrained ResNet50 (without top layer)
base_model = ResNet50(weights="imagenet", include_top=False, pooling="avg")
model = Model(inputs=base_model.input, outputs=base_model.output)

# Function to preprocess images
def preprocess_image(image_path, target_size=(224, 224)):
    img = cv2.imread(image_path)
    img = cv2.resize(img, target_size)
    img = np.expand_dims(img, axis=0)  # Add batch dimension
    img = preprocess_input(img)
    return img

# Extract embeddings from images
def extract_embeddings(image_paths):
    embeddings = []
    for img_path in image_paths:
        img = preprocess_image(img_path)
        feature_vector = model.predict(img)[0]  # Get feature embedding
        embeddings.append(feature_vector)
    return np.array(embeddings)

# Load image paths
image_folder = "/kaggle/input/inme-veri-seti-stroke-dataset/İNME VERİ SETİ/YarısmaVeriSeti_1_Oturum/PNG"
image_paths = [os.path.join(image_folder, f) for f in os.listdir(image_folder) if f.endswith(".png")]

# Extract embeddings
image_embeddings = extract_embeddings(image_paths)

# Perform Hierarchical Clustering
linkage_matrix = linkage(image_embeddings, method='ward')  # 'ward' minimizes variance
clustering = AgglomerativeClustering(n_clusters=3, linkage='ward')
labels = clustering.fit_predict(image_embeddings)

# Plot Dendrogram
plt.figure(figsize=(10, 5))
dendrogram(linkage_matrix, labels=labels, leaf_rotation=90)
plt.title("Hierarchical Clustering Dendrogram")
plt.xlabel("Image Index")
plt.ylabel("Distance")
plt.show()

# Print cluster labels for each image
for img, label in zip(image_paths, labels):
    print(f"{img} -> Cluster {label}")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms

In [5]:
import os
import shutil
import zipfile

# Create output directories for each cluster
output_dir = "clustered_images"
os.makedirs(output_dir, exist_ok=True)

for cluster_id in set(labels):  # Create subfolders for each cluster
    os.makedirs(f"{output_dir}/Cluster_{cluster_id}", exist_ok=True)

# Copy images to respective cluster folders
for img_path, cluster in zip(image_paths, labels):
    shutil.copy(img_path, f"{output_dir}/Cluster_{cluster}")

# Zip the clustered images
zip_filename = "clusters.zip"
shutil.make_archive(zip_filename.replace(".zip", ""), 'zip', output_dir)

print(f"Clusters saved and zipped as {zip_filename}")


Clusters saved and zipped as clusters.zip
