In [1]:
import tensorflow as tf
import numpy as np
import cv2
import os
from sklearn.cluster import KMeans

In [2]:
base_model = tf.keras.applications.DenseNet121(include_top=False, input_shape=(224, 224, 3), pooling='avg')

In [3]:
def extract_features(image_path):
    image = cv2.imread(image_path)
    if image is None:
        print(f"Error loading image: {image_path}")
        return None
    image = cv2.resize(image, (224, 224))
    image = np.expand_dims(image, axis=0) / 255.0  # Normalize
    features = base_model.predict(image)  # Extract features
    return features.flatten()

In [4]:
base_dir = "Data/"
categories = ["COVID19", "NORMAL", "PNEUMONIA"]
image_paths = []
labels = []

for category in categories:
    category_path = os.path.join(base_dir, category)
    for img in os.listdir(category_path):
        if img.endswith(('.png', '.jpg', '.jpeg')):
            image_paths.append(os.path.join(category_path, img))
            labels.append(category)

# Ensure images exist
if len(image_paths) == 0:
    raise ValueError("No valid images found in the directory. Check the image path and format.")

In [None]:
features = []
valid_image_paths = []  # Store only successfully processed images
failed_images = []  # Store failed image paths

for img in image_paths:
    try:
        feat = extract_features(img)
        features.append(feat)
        valid_image_paths.append(img)  # Store only successful images
    except Exception as e:
        print(f"Error processing {img}: {e}")
        failed_images.append(img)

print(f"Total successful images: {len(valid_image_paths)}")
print(f"Total failed images: {len(failed_images)}")

# Convert list to NumPy array
features = np.array(features)

# Check if feature extraction succeeded
if features.shape[0] == 0:
    raise ValueError("Feature extraction failed for all images. Check image preprocessing.")

# Ensure features are 2D for KMeans
if len(features.shape) == 1:
    features = features.reshape(-1, 1)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 9s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 347ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 363ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 355ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 344ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 387ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 405ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 384ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 362ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 314ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 268ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 300ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 329ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1

In [None]:
kmeans = KMeans(n_clusters=3, random_state=42, n_init=10)
severity_labels = kmeans.fit_predict(features)

# Map clusters to severity labels
cluster_mapping = {0: "Mild", 1: "Moderate", 2: "Severe"}
severity_labels = [cluster_mapping[label] for label in severity_labels]

# Print sample results
for img_path, severity in zip(valid_image_paths[:10], severity_labels[:10]):
    print(f"Image: {os.path.basename(img_path)} -> Severity: {severity}")

# Print some sample image paths
print("\nSample Image Paths:")
print(image_paths[:5])

In [None]:
print(f"Total image paths: {len(image_paths)}")
print(f"Total severity labels: {len(severity_labels)}")

In [None]:
import pandas as pd

# Ensure both lists are correctly populated
print("Sample image paths:", image_paths[:5])
print("Sample severity labels:", severity_labels[:5])

# Check if lengths match
if len(image_paths) != len(severity_labels):
    raise ValueError("Mismatch: image_paths and severity_labels must have the same length.")

# Create DataFrame
df = pd.DataFrame({"image_path": image_paths, "severity": severity_labels})

# Save to CSV
df.to_csv("severity_labels.csv", index=False)
print("Severity labels saved to severity_labels.csv")