**PREPROCESSING DATA**

In [None]:
import os
from PIL import Image
import numpy as np

# Path to the folder containing the images
folder_path = 'C:/Codes/UNPAD/Sem5/DatMin/UTS/DataMining/'

# Preprocessing Function (Resize, Convert to RGB, Normalize)
def preprocess_image(img, size=(300, 300)):
    # Resize the image
    img_resized = img.resize(size)
    
    # Convert RGBA to RGB if necessary
    img_rgb = img_resized.convert('RGB')
    
    # Normalize pixel values to [0, 1]
    img_array_rgb = np.array(img_rgb) / 255.0
    
    return img_array_rgb

# List to store processed images
processed_images = []

# Loop through all files in the folder
for file_name in os.listdir(folder_path):
    if file_name.endswith('.png') or file_name.endswith('.jpg') or file_name.endswith('.jpeg'):  # Add other image formats if needed
        image_path = os.path.join(folder_path, file_name)
        
        # Open and preprocess the image
        img = Image.open(image_path)
        processed_img = preprocess_image(img)
        processed_images.append(processed_img)

# Output the shape of each processed image
for i, img in enumerate(processed_images):
    print(f"Processed Image {i+1} Shape:", img.shape)

# Display a small portion of the array to check correctness
for i, img in enumerate(processed_images):
    print(f"Sample from Image {i+1} Array:", img[0:2, 0:2, :])

In [None]:
# Feature extraction functions
def extract_color_features(img_array):
    # Calculate mean color for each channel (R, G, B)
    mean_colors = np.mean(img_array, axis=(0, 1))  # Mean across width and height
    return mean_colors

def extract_texture_features(img_array):
    # Calculate variance of pixel intensities to capture texture information
    variance = np.var(img_array, axis=(0, 1))  # Variance across width and height
    return variance

def extract_spatial_features(img_array, grid_size=(5, 5)):
    # Split the image into a grid and calculate mean for each grid cell
    h, w, c = img_array.shape
    grid_h, grid_w = grid_size
    spatial_features = []
    
    # Calculate the height and width of each grid cell
    cell_h, cell_w = h // grid_h, w // grid_w
    
    # Loop through grid cells and calculate mean pixel values
    for i in range(grid_h):
        for j in range(grid_w):
            cell = img_array[i*cell_h:(i+1)*cell_h, j*cell_w:(j+1)*cell_w, :]
            spatial_features.append(np.mean(cell, axis=(0, 1)))  # Mean of R, G, B values for each cell
    
    # Flatten the list of spatial features into a 1D array
    return np.array(spatial_features).flatten()

# Lists to store features
color_features_list = []
texture_features_list = []
spatial_features_list = []

# Extract features for each processed image
for img in processed_images:
    color_features = extract_color_features(img)
    texture_features = extract_texture_features(img)
    spatial_features = extract_spatial_features(img)
    
    # Store features
    color_features_list.append(color_features)
    texture_features_list.append(texture_features)
    spatial_features_list.append(spatial_features)

# Output features for each processed image
for i in range(len(processed_images)):
    print(f"Color Features for Image {i+1}:", color_features_list[i])
    print(f"Texture Features for Image {i+1}:", texture_features_list[i])
    print(f"Spatial Features for Image {i+1}:", spatial_features_list[i])

# Display a small portion of the array to check correctness for the first image
for i, img in enumerate(processed_images):
    print(f"Sample from Image {i+1} Array:", img[0:2, 0:2, :])
