In [14]:
import cv2
import numpy as np
from skimage.feature import hog
import os
from joblib import Parallel, delayed

# Function to extract color features from an image using OpenCV
def extract_color_features(image_path):
    image = cv2.imread(image_path)
    if image is None:
        print(f"Error reading image: {image_path}")
        return None
    image = cv2.cvtColor(image, cv2.COLOR_BGR2LAB)
    color_features = cv2.mean(image)[:3]  # Mean LAB values
    return np.array(color_features)

# Function to extract texture features using HOG
def extract_texture_features(image_path):
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    if image is None:
        print(f"Error reading image: {image_path}")
        return None
    features = hog(image, orientations=8, pixels_per_cell=(16, 16),
                   cells_per_block=(1, 1), visualize=False)
    return features

# Function to combine color and texture features
def extract_features(image_path):
    color_features = extract_color_features(image_path)
    texture_features = extract_texture_features(image_path)
    if color_features is not None and texture_features is not None:
        return np.hstack([color_features, texture_features])
    else:
        return None

# Function to process a pair of images
def process_pair(pair, base_path):
    top_image_path = os.path.join(base_path, 'train', 'cloth', pair[0])
    bottom_image_path = os.path.join(base_path, 'train', 'cloth', pair[1])
    print(f"Processing: {top_image_path} and {bottom_image_path}")
    top_features = extract_features(top_image_path)
    bottom_features = extract_features(bottom_image_path)
    if top_features is not None and bottom_features is not None:
        return np.hstack([top_features, bottom_features])
    else:
        print(f"Failed to extract features for: {top_image_path} or {bottom_image_path}")
        return None

# Base path for images
image_base_path = 'C:/Users/ARA/Desktop/Outfit-Aura/OUTFIT-AURA-web-application-/backend/data/zalando/'

# Check if the directory exists
if not os.path.isdir(image_base_path):
    print(f"Directory does not exist: {image_base_path}")
else:
    print(f"Directory exists: {image_base_path}")

# List all files in the train/cloth/ directory
train_image_dir = os.path.join(image_base_path, 'train', 'cloth')
print(f"Listing files in directory: {train_image_dir}")
for root, dirs, files in os.walk(train_image_dir):
    for file in files:
        print(file)

# Load train pairs from text file
train_pairs_path = os.path.join(image_base_path, 'train_pairs.txt')
with open(train_pairs_path, 'r') as file:
    train_pairs = [line.strip().split() for line in file.readlines()]

# Extract features for each pair in parallel
features = Parallel(n_jobs=-1)(delayed(process_pair)(pair, image_base_path) for pair in train_pairs)

# Filter out None values (in case of errors)
features = [feature for feature in features if feature is not None]

# Convert features to numpy array
features = np.array(features)

print(f"Extracted features shape: {features.shape}")


Directory exists: C:/Users/ARA/Desktop/Outfit-Aura/OUTFIT-AURA-web-application-/backend/data/zalando/
Listing files in directory: C:/Users/ARA/Desktop/Outfit-Aura/OUTFIT-AURA-web-application-/backend/data/zalando/train\cloth
00000_00.jpg
00001_00.jpg
00002_00.jpg
00003_00.jpg
00005_00.jpg
00007_00.jpg
00009_00.jpg
00010_00.jpg
00011_00.jpg
00012_00.jpg
00014_00.jpg
00015_00.jpg
00016_00.jpg
00018_00.jpg
00019_00.jpg
00022_00.jpg
00023_00.jpg
00024_00.jpg
00025_00.jpg
00026_00.jpg
00028_00.jpg
00029_00.jpg
00030_00.jpg
00031_00.jpg
00032_00.jpg
00033_00.jpg
00036_00.jpg
00038_00.jpg
00040_00.jpg
00041_00.jpg
00042_00.jpg
00043_00.jpg
00044_00.jpg
00045_00.jpg
00046_00.jpg
00047_00.jpg
00048_00.jpg
00049_00.jpg
00050_00.jpg
00051_00.jpg
00052_00.jpg
00053_00.jpg
00054_00.jpg
00056_00.jpg
00058_00.jpg
00059_00.jpg
00061_00.jpg
00062_00.jpg
00065_00.jpg
00066_00.jpg
00068_00.jpg
00070_00.jpg
00072_00.jpg
00073_00.jpg
00076_00.jpg
00077_00.jpg
00078_00.jpg
00079_00.jpg
00080_00.jpg
00081_00

In [15]:
import numpy as np

# Save features to a file
np.save('extracted_features.npy', features)


clustering using the features extracted

In [16]:
from sklearn.cluster import KMeans

# Load the features from the file
features = np.load('extracted_features.npy')

# Perform K-Means clustering
kmeans = KMeans(n_clusters=10, random_state=42)  # Adjust the number of clusters as needed
clusters = kmeans.fit_predict(features)

# Save the cluster labels
np.save('clusters.npy', clusters)


In [17]:
# Load the clusters
clusters = np.load('clusters.npy')

# Function to recommend similar items based on the cluster
def recommend_outfits(item_index, clusters, features, top_n=5):
    cluster_label = clusters[item_index]
    cluster_indices = np.where(clusters == cluster_label)[0]
    distances = np.linalg.norm(features[cluster_indices] - features[item_index], axis=1)
    recommended_indices = cluster_indices[np.argsort(distances)[:top_n]]
    return recommended_indices

# Example usage
item_index = 0  # Index of the item to base recommendations on
recommended_indices = recommend_outfits(item_index, clusters, features)

print(f"Recommended item indices: {recommended_indices}")


Recommended item indices: [    0 11390 10191  7769  8778]
