In [2]:
%config IPCompleter.use_jedi=False

In [3]:
import os
import numpy as np
np.object = object
np.bool = bool
np.int = int
from PIL import Image
from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input
from tensorflow.keras.models import Model
from sklearn.neighbors import NearestNeighbors
from joblib import parallel_backend
import pickle

In [4]:
from tensorflow.python.client import device_lib 
print(device_lib.list_local_devices())

[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 18285612151447911451
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 9901703168
locality {
  bus_id: 1
  links {
  }
}
incarnation: 7348448893519015696
physical_device_desc: "device: 0, name: NVIDIA GeForce RTX 4070, pci bus id: 0000:26:00.0, compute capability: 8.9"
]


In [5]:
import tensorflow as tf
tf.__version__

'2.6.0'

In [6]:
tf.debugging.set_log_device_placement(False)

In [7]:
DATA_DIR = 'C:\\fash-recom-dataset\\images'
MODEL_OUTPUT_DIR = os.path.join('data', 'models')
MODEL_OUTPUT_PATH = os.path.join(MODEL_OUTPUT_DIR, 'fashion_knn_model.pkl')

In [8]:
if not os.path.exists(MODEL_OUTPUT_DIR):
    os.makedirs(MODEL_OUTPUT_DIR)

In [9]:
def create_feature_extractor():
    base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(80, 60, 3), pooling='max')
    x = base_model.output
    return Model(inputs=base_model.input, outputs=x)

In [10]:
feature_extractor = create_feature_extractor()

In [11]:
def preprocess_image(img_path):
    img = Image.open(img_path).convert('RGB')
    img = img.resize((60, 80))
    img_array = np.array(img)
    img_array = np.expand_dims(img_array, axis=0)
    return preprocess_input(img_array) # Normalize the input for ResNet50

In [13]:
def extract_features(image_path, feature_extractor):
    preprocessed_image = preprocess_image(image_path)
    features = feature_extractor.predict(preprocessed_image, verbose=1)
    return features.flatten() 

In [None]:
feature_extractor = create_feature_extractor()

features = []

for img_name in os.listdir(DATA_DIR):
    if img_name.lower().endswith(('.png', '.jpg', '.jpeg')):
        img_path = os.path.join(DATA_DIR, img_name)
        try:
            extracted_image = extract_features(img_path, feature_extractor)
            features.append(extracted_image)
        except Exception as e:
            print(f"Error processing {img_path}: {str(e)}")

feature_array = np.array(features)



In [15]:
print()




In [22]:
np.savetxt('../temp/feature_array.txt', feature_array)

In [23]:
# Train KNN model
knn = None
with parallel_backend('threading', n_jobs=-1):
    knn = NearestNeighbors(n_neighbors=99, metric='cosine') # using formula (sqrt(n) / 2) - 1 (-1 to avoid even K)
    knn.fit(feature_array)

In [24]:
with open(MODEL_OUTPUT_PATH, 'wb') as f:
    pickle.dump((knn, feature_array, image_paths), f)