In [1]:
import warnings
warnings.filterwarnings("ignore")

## Copying 100 files from each folder

In [2]:
import os
import shutil
import random

# Source dataset path
source_path = "/kaggle/input/food-freshness-dataset/Dataset"

# Destination path where sampled images will be copied
destination_path = "/kaggle/working/Food_Freshness_Sample"

# Number of images to copy from each folder
num_images_to_copy = 100

# Loop through 'Fresh' and 'Rotten' directories
for freshness_type in ['Fresh', 'Rotten']:
    freshness_dir = os.path.join(source_path, freshness_type)

    # List all subfolders (fruits or vegetables)
    for category in os.listdir(freshness_dir):
        category_path = os.path.join(freshness_dir, category)

        if os.path.isdir(category_path):
            # Destination folder path
            dest_category_path = os.path.join(destination_path, freshness_type, category)
            os.makedirs(dest_category_path, exist_ok=True)

            # List image files in the category folder
            image_files = [f for f in os.listdir(category_path) if f.lower().endswith(('.jpg', '.jpeg', '.png'))]

            # Randomly sample 100 images
            selected_images = random.sample(image_files, min(num_images_to_copy, len(image_files)))

            # Copy selected images to destination
            for img_file in selected_images:
                src_file = os.path.join(category_path, img_file)
                dest_file = os.path.join(dest_category_path, img_file)
                shutil.copy(src_file, dest_file)

print("✅ 100 images copied from each folder successfully!")

✅ 100 images copied from each folder successfully!


### CSV File Preparation

In [3]:
import os
import pandas as pd

# Dataset path
dataset_path = "/kaggle/working/Food_Freshness_Sample"

# Define valid image extensions
valid_extensions = ('.jpg', '.jpeg', '.png')

# List to store the data
data = []

# Loop through 'Fresh' and 'Rotten'
for freshness_type in ['Fresh', 'Rotten']:
    freshness_dir = os.path.join(dataset_path, freshness_type)

    # Go through each category folder
    for category in os.listdir(freshness_dir):
        category_path = os.path.join(freshness_dir, category)

        if os.path.isdir(category_path):
            image_files = [f for f in os.listdir(category_path) if f.lower().endswith(valid_extensions)]
            count = len(image_files)

            # Append to data list
            data.append({
                'Category': category,
                'Freshness': freshness_type,
                'Folder Path': f"{freshness_type}/{category}",
                'Image Count': count
            })

# Convert to DataFrame
df = pd.DataFrame(data)

# Save to CSV
csv_path = "/kaggle/working/file_counts.csv"
df.to_csv(csv_path, index=False)

print(f"✅ File counts saved to {csv_path}")


✅ File counts saved to /kaggle/working/file_counts.csv


In [4]:
df = pd.read_csv("/kaggle/working/file_counts.csv")
df

Unnamed: 0,Category,Freshness,Folder Path,Image Count
0,FreshTomato,Fresh,Fresh/FreshTomato,100
1,FreshBittergroud,Fresh,Fresh/FreshBittergroud,100
2,FreshOkara,Fresh,Fresh/FreshOkara,100
3,FreshCucumber,Fresh,Fresh/FreshCucumber,100
4,FreshBellpepper,Fresh,Fresh/FreshBellpepper,100
5,FreshStrawberry,Fresh,Fresh/FreshStrawberry,100
6,FreshApple,Fresh,Fresh/FreshApple,100
7,FreshPotato,Fresh,Fresh/FreshPotato,100
8,FreshBanana,Fresh,Fresh/FreshBanana,100
9,FreshMango,Fresh,Fresh/FreshMango,100


### Renaming of Files

In [5]:
import os

# Dataset path
dataset_path = "/kaggle/working/Food_Freshness_Sample"

# Valid image extensions
valid_extensions = ('.jpg', '.jpeg', '.png')

# Loop through 'Fresh' and 'Rotten' folders
for freshness_type in ['Fresh', 'Rotten']:
    freshness_dir = os.path.join(dataset_path, freshness_type)

    for folder in os.listdir(freshness_dir):
        folder_path = os.path.join(freshness_dir, folder)

        if os.path.isdir(folder_path):
            # Get list of image files
            image_files = [f for f in os.listdir(folder_path) if f.lower().endswith(valid_extensions)]
            image_files.sort()  # Optional: sort to maintain order

            # Rename files one by one
            for i, old_filename in enumerate(image_files, start=1):
                ext = os.path.splitext(old_filename)[1]
                new_filename = f"{folder}_{i}{ext}"
                old_filepath = os.path.join(folder_path, old_filename)
                new_filepath = os.path.join(folder_path, new_filename)

                os.rename(old_filepath, new_filepath)

            print(f"✅ Renamed files in: {freshness_type}/{folder}")

✅ Renamed files in: Fresh/FreshTomato
✅ Renamed files in: Fresh/FreshBittergroud
✅ Renamed files in: Fresh/FreshOkara
✅ Renamed files in: Fresh/FreshCucumber
✅ Renamed files in: Fresh/FreshBellpepper
✅ Renamed files in: Fresh/FreshStrawberry
✅ Renamed files in: Fresh/FreshApple
✅ Renamed files in: Fresh/FreshPotato
✅ Renamed files in: Fresh/FreshBanana
✅ Renamed files in: Fresh/FreshMango
✅ Renamed files in: Fresh/FreshCarrot
✅ Renamed files in: Fresh/FreshCapciscum
✅ Renamed files in: Fresh/FreshOrange
✅ Renamed files in: Rotten/RottenBellpepper
✅ Renamed files in: Rotten/RottenStrawberry
✅ Renamed files in: Rotten/RottenPotato
✅ Renamed files in: Rotten/RottenBanana
✅ Renamed files in: Rotten/RottenTomato
✅ Renamed files in: Rotten/RottenCucumber
✅ Renamed files in: Rotten/RottenApple
✅ Renamed files in: Rotten/RottenMango
✅ Renamed files in: Rotten/RottenCapsicum
✅ Renamed files in: Rotten/RottenOrange
✅ Renamed files in: Rotten/RottenCarrot
✅ Renamed files in: Rotten/RottenBittergr

## Data Preprocessing

In [6]:
import os
import cv2
import numpy as np
from tqdm import tqdm
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import MaxPooling2D
import tensorflow as tf

# Set dataset path
dataset_path = "/kaggle/working/Food_Freshness_Sample"

# Image size
img_size = (128, 128)

# Store data and labels
X = []
y = []

# Loop through Fresh and Rotten
for freshness_type in ['Fresh', 'Rotten']:
    freshness_dir = os.path.join(dataset_path, freshness_type)

    for folder in os.listdir(freshness_dir):
        folder_path = os.path.join(freshness_dir, folder)

        if os.path.isdir(folder_path):
            for filename in tqdm(os.listdir(folder_path), desc=f"Processing {folder}"):
                file_path = os.path.join(folder_path, filename)
                try:
                    # Read image
                    img = cv2.imread(file_path)

                    # Noise Reduction (Gaussian Blur)
                    blurred = cv2.GaussianBlur(img, (5, 5), 0)

                    # Convert to Grayscale
                    gray = cv2.cvtColor(blurred, cv2.COLOR_BGR2GRAY)

                    # Resize
                    resized = cv2.resize(gray, img_size)

                    # Edge Detection (Canny)
                    edges = cv2.Canny(resized, 100, 200)

                    # Normalize
                    normalized = edges / 255.0

                    # Reshape to (128, 128, 1)
                    normalized = np.expand_dims(normalized, axis=-1)

                    # Append data and label
                    X.append(normalized)
                    y.append(folder)
                except Exception as e:
                    print(f"❌ Error processing {file_path}: {e}")

# Convert to numpy arrays
X = np.array(X)
y = np.array(y)

# Encode labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)
y_categorical = to_categorical(y_encoded)

print(f"✅ Dataset shape: {X.shape}")
print(f"✅ Labels shape: {y_categorical.shape}")

# Add MaxPooling2D layer for demonstration
# (you'll use this in your CNN model later)
example_input = tf.keras.Input(shape=(128, 128, 1))
pooled_output = MaxPooling2D(pool_size=(2, 2))(example_input)

print("✅ MaxPooling2D layer added (will be used in model).")


2025-04-13 00:36:29.489464: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1744504589.737516      31 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1744504589.804816      31 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
Processing FreshTomato: 100%|██████████| 100/100 [00:00<00:00, 199.29it/s]
Processing FreshBittergroud: 100%|██████████| 100/100 [00:00<00:00, 416.45it/s]
Processing FreshOkara: 100%|██████████| 100/100 [00:00<00:00, 269.96it/s]
Processing FreshCucumber: 100%|██████████| 100/100 [00:00<00:00, 194.65it/s]
Processing FreshBellpepper: 100%|██████████| 100/100 [00:00<00:00, 141.12it/s]
Processing FreshStrawberry: 100%|██████████| 100/100

✅ Dataset shape: (2600, 128, 128, 1)
✅ Labels shape: (2600, 26)
✅ MaxPooling2D layer added (will be used in model).


## Data Augmentation

In [7]:
import cv2
import os
import numpy as np
from tqdm import tqdm

# Source and target paths
source_path = "/kaggle/working/Food_Freshness_Sample"
augmented_path = "/kaggle/working/augmented_dataset"

# Create destination folders
os.makedirs(augmented_path, exist_ok=True)

# Image size
img_size = (128, 128)

# Augmentation functions
def rotate_image(img, angle):
    h, w = img.shape[:2]
    center = (w // 2, h // 2)
    rot_mat = cv2.getRotationMatrix2D(center, angle, 1.0)
    return cv2.warpAffine(img, rot_mat, (w, h))

def adjust_brightness(img, value=30):
    hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
    hsv[:, :, 2] = np.clip(hsv[:, :, 2] + value, 0, 255)
    return cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR)

# Loop through each class folder
for freshness_type in ['Fresh', 'Rotten']:
    freshness_dir = os.path.join(source_path, freshness_type)
    for folder in os.listdir(freshness_dir):
        folder_path = os.path.join(freshness_dir, folder)
        target_folder = os.path.join(augmented_path, freshness_type, folder)
        os.makedirs(target_folder, exist_ok=True)

        for idx, filename in enumerate(tqdm(os.listdir(folder_path), desc=f"Augmenting {folder}")):
            file_path = os.path.join(folder_path, filename)
            img = cv2.imread(file_path)

            if img is None:
                continue

            # Resize
            img = cv2.resize(img, img_size)

            # 1. Original
            cv2.imwrite(os.path.join(target_folder, f"{folder}_{idx}_original.jpg"), img)

            # 2. Horizontal Flip
            flipped = cv2.flip(img, 1)
            cv2.imwrite(os.path.join(target_folder, f"{folder}_{idx}_flipped.jpg"), flipped)

            # 3. Random Rotation (20°)
            rotated = rotate_image(img, angle=20)
            cv2.imwrite(os.path.join(target_folder, f"{folder}_{idx}_rotated.jpg"), rotated)

            # 4. Brightness Increase
            brighter = adjust_brightness(img, value=40)
            cv2.imwrite(os.path.join(target_folder, f"{folder}_{idx}_bright.jpg"), brighter)

            # 5. 180° Rotation
            rotated_180 = rotate_image(img, angle=180)
            cv2.imwrite(os.path.join(target_folder, f"{folder}_{idx}_rotated180.jpg"), rotated_180)


Augmenting FreshTomato: 100%|██████████| 100/100 [00:00<00:00, 230.47it/s]
Augmenting FreshBittergroud: 100%|██████████| 100/100 [00:00<00:00, 398.12it/s]
Augmenting FreshOkara: 100%|██████████| 100/100 [00:00<00:00, 265.09it/s]
Augmenting FreshCucumber: 100%|██████████| 100/100 [00:00<00:00, 238.20it/s]
Augmenting FreshBellpepper: 100%|██████████| 100/100 [00:00<00:00, 152.13it/s]
Augmenting FreshStrawberry: 100%|██████████| 100/100 [00:00<00:00, 597.70it/s]
Augmenting FreshApple: 100%|██████████| 100/100 [00:00<00:00, 254.97it/s]
Augmenting FreshPotato: 100%|██████████| 100/100 [00:00<00:00, 286.02it/s]
Augmenting FreshBanana: 100%|██████████| 100/100 [00:00<00:00, 213.52it/s]
Augmenting FreshMango: 100%|██████████| 100/100 [00:00<00:00, 351.61it/s]
Augmenting FreshCarrot: 100%|██████████| 100/100 [00:00<00:00, 127.97it/s]
Augmenting FreshCapciscum: 100%|██████████| 100/100 [00:00<00:00, 403.96it/s]
Augmenting FreshOrange: 100%|██████████| 100/100 [00:00<00:00, 301.50it/s]
Augmenting

### CSV file preparation after Augmentation

In [8]:
import os
import pandas as pd

# Dataset path
dataset_path = "/kaggle/working/augmented_dataset"

# Define valid image extensions
valid_extensions = ('.jpg', '.jpeg', '.png')

# List to store the data
data = []

# Loop through 'Fresh' and 'Rotten'
for freshness_type in ['Fresh', 'Rotten']:
    freshness_dir = os.path.join(dataset_path, freshness_type)

    # Go through each category folder
    for category in os.listdir(freshness_dir):
        category_path = os.path.join(freshness_dir, category)

        if os.path.isdir(category_path):
            image_files = [f for f in os.listdir(category_path) if f.lower().endswith(valid_extensions)]
            count = len(image_files)

            # Append to data list
            data.append({
                'Category': category,
                'Freshness': freshness_type,
                'Folder Path': f"{freshness_type}/{category}",
                'Image Count': count
            })

# Convert to DataFrame
df = pd.DataFrame(data)

# Save to CSV
csv_path = "/kaggle/working/file_counts_augmented.csv"
df.to_csv(csv_path, index=False)

print(f"✅ File counts saved to {csv_path}")

✅ File counts saved to /kaggle/working/file_counts_augmented.csv


In [9]:
df

Unnamed: 0,Category,Freshness,Folder Path,Image Count
0,FreshTomato,Fresh,Fresh/FreshTomato,500
1,FreshBittergroud,Fresh,Fresh/FreshBittergroud,500
2,FreshOkara,Fresh,Fresh/FreshOkara,500
3,FreshCucumber,Fresh,Fresh/FreshCucumber,500
4,FreshBellpepper,Fresh,Fresh/FreshBellpepper,500
5,FreshStrawberry,Fresh,Fresh/FreshStrawberry,500
6,FreshApple,Fresh,Fresh/FreshApple,500
7,FreshPotato,Fresh,Fresh/FreshPotato,500
8,FreshBanana,Fresh,Fresh/FreshBanana,500
9,FreshMango,Fresh,Fresh/FreshMango,500


## Clustering

In [None]:
import os
import numpy as np
import pandas as pd
from tqdm import tqdm
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
from tensorflow.keras.preprocessing import image
from tensorflow.keras.models import Model

# Dataset path
dataset_path = "/kaggle/input/food-freshness-dataset/Dataset"
categories = [os.path.join(dp, d) for dp, dn, filenames in os.walk(dataset_path) for d in dn]

# Load VGG16 model (with average pooling)
base_model = VGG16(weights='imagenet', include_top=False, pooling='avg')
model = Model(inputs=base_model.input, outputs=base_model.output)

# Extract features
features = []
labels = []
paths = []

for folder in tqdm(categories, desc="Processing folders"):
    for file in os.listdir(folder):
        img_path = os.path.join(folder, file)
        try:
            img = image.load_img(img_path, target_size=(128, 128))
            img_array = image.img_to_array(img)
            img_array = np.expand_dims(img_array, axis=0)
            img_array = preprocess_input(img_array)
            feat = model.predict(img_array).flatten()
            features.append(feat)
            labels.append(os.path.basename(folder))
            paths.append(img_path)
        except:
            continue

# Convert to numpy arrays
features = np.array(features)

I0000 00:00:1744504736.622303      31 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 13942 MB memory:  -> device: 0, name: Tesla T4, pci bus id: 0000:00:04.0, compute capability: 7.5
I0000 00:00:1744504736.623007      31 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 13942 MB memory:  -> device: 1, name: Tesla T4, pci bus id: 0000:00:05.0, compute capability: 7.5


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m58889256/58889256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


I0000 00:00:1744504738.965664      96 service.cc:148] XLA service 0x796c94007100 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1744504738.966858      96 service.cc:156]   StreamExecutor device (0): Tesla T4, Compute Capability 7.5
I0000 00:00:1744504738.966876      96 service.cc:156]   StreamExecutor device (1): Tesla T4, Compute Capability 7.5
I0000 00:00:1744504739.109710      96 cuda_dnn.cc:529] Loaded cuDNN version 90300


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step


I0000 00:00:1744504741.376107      96 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17

Processing folders:  11%|█         | 3/28 [01:25<11:52, 28.49s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17

Processing folders:  25%|██▌       | 7/28 [04:41<14:50, 42.39s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16

In [None]:
from sklearn.cluster import KMeans

# Set number of clusters (try 13 or 26)
n_clusters = 13
kmeans = KMeans(n_clusters=n_clusters, random_state=42)
cluster_labels = kmeans.fit_predict(features)

In [None]:
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt

# Reduce dimensions
pca = PCA(n_components=2)
reduced_features = pca.fit_transform(features)

# Create DataFrame
df = pd.DataFrame({
    'Image_Path': paths,
    'Actual_Label': labels,
    'Cluster_Label': cluster_labels,
    'PCA1': reduced_features[:, 0],
    'PCA2': reduced_features[:, 1]
})

# Save to CSV
df.to_csv("/kaggle/working/cluster_results.csv", index=False)

# Plot
plt.figure(figsize=(10, 7))
scatter = plt.scatter(df['PCA1'], df['PCA2'], c=df['Cluster_Label'], cmap='tab20', s=20)
plt.title("KMeans Clusters (PCA Reduced)")
plt.xlabel("PCA 1")
plt.ylabel("PCA 2")
plt.colorbar(scatter)
plt.grid(True)
plt.show()

In [None]:
from sklearn.cluster import KMeans

# Set number of clusters (try 13 or 26)
n_clusters = 26
kmeans = KMeans(n_clusters=n_clusters, random_state=42)
cluster_labels = kmeans.fit_predict(features)

In [None]:
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt

# Reduce dimensions
pca = PCA(n_components=2)
reduced_features = pca.fit_transform(features)

# Create DataFrame
df = pd.DataFrame({
    'Image_Path': paths,
    'Actual_Label': labels,
    'Cluster_Label': cluster_labels,
    'PCA1': reduced_features[:, 0],
    'PCA2': reduced_features[:, 1]
})

# Save to CSV
df.to_csv("/kaggle/working/cluster_results_2.csv", index=False)

# Plot
plt.figure(figsize=(10, 7))
scatter = plt.scatter(df['PCA1'], df['PCA2'], c=df['Cluster_Label'], cmap='tab20', s=20)
plt.title("KMeans Clusters (PCA Reduced)")
plt.xlabel("PCA 1")
plt.ylabel("PCA 2")
plt.colorbar(scatter)
plt.grid(True)
plt.show()

In [None]:
df2=pd.read_csv("/kaggle/working/cluster_results_2.csv")
df1=pd.read_csv("/kaggle/working/cluster_results.csv")

In [None]:
df1

In [None]:
df2