In [91]:
import os
import cv2
import matplotlib.pyplot as plt
import numpy as np
import time

from pymilvus import MilvusClient
#from pymilvus import connections, FieldSchema, CollectionSchema, DataType, Collection, utility
from IPython.display import display, Image, HTML

### Configurar Milvus - Base de datos de Vectores

In [92]:
DIMENSION = 128
COLLECTION_NAME = "fashion_recommender"

db_client = MilvusClient(uri="fashion.db")

def crear_coleccion():

    if db_client.has_collection(COLLECTION_NAME):
       db_client.drop_collection(COLLECTION_NAME)

    schema = MilvusClient.create_schema(
        auto_id=True,
        enable_dynamic_field=False
    )

    db_client.create_collection(
        collection_name=COLLECTION_NAME,
        vector_field_name="image_embeddings",
        dimension=DIMENSION,
        auto_id=True,
        enable_dynamic_field=True,
        metric_type="COSINE",
    )

def extraer_features(folder_imagen: str, nombre_imagen: str):
    # Extraer características de imágenes con SIFT
    sift = cv2.SIFT_create()
    ruta_imagen = os.path.join(folder_imagen, nombre_imagen)
    imagen = cv2.imread(ruta_imagen, cv2.IMREAD_GRAYSCALE)
    if imagen is None:
        print(f"No se pudo leer la imagen '{ruta_imagen}'.")
        return None
    keypoints, descriptors = sift.detectAndCompute(imagen, None)
    promedio_descriptores = np.mean(descriptors, axis=0)
    return promedio_descriptores.tolist()

def insertar_en_coleccion(folder_imagen: str):
    start = time.time()
    # Recorrer las imágenes en el directorio
    count = 0
    list_dirs = os.listdir(folder_imagen)
    for nombre_imagen in list_dirs:
        features_imagen = extraer_features(folder_imagen, nombre_imagen)
        if not features_imagen:
            print(f"No se pudo leer la imagen '{nombre_imagen}'.")
            continue

        db_client.insert(COLLECTION_NAME, {"image_id": nombre_imagen ,"image_embeddings": features_imagen })
        count += 1

        if count % 1000 == 0 or count == len(list_dirs):
            print(f"images -> {count}")
    
    end = time.time
    print(f"tiempo de ejecucion (seg) {round(end - start, 4)}")
    print(f"Total imagenes insertadas: {count}")


In [93]:
folder_imagenes = "fashion-dataset/images"

crear_coleccion()
insertar_en_coleccion(folder_imagenes)

images -> 1000
images -> 2000
images -> 3000
images -> 4000
images -> 5000
images -> 6000
No se pudo leer la imagen 'fashion-dataset/images/.DS_Store'.
No se pudo leer la imagen '.DS_Store'.
images -> 7000
images -> 8000
images -> 9000
images -> 10000
images -> 11000
images -> 12000
images -> 13000
images -> 14000
images -> 15000
images -> 16000
images -> 17000
images -> 18000
images -> 19000
images -> 20000
images -> 21000
images -> 22000
images -> 23000
images -> 24000
images -> 25000
images -> 26000
images -> 27000
images -> 28000
images -> 29000
images -> 30000
images -> 31000
images -> 32000
images -> 33000
images -> 34000
images -> 35000
images -> 36000
images -> 37000
images -> 38000
images -> 39000
images -> 40000
images -> 41000


AxisError: axis 0 is out of bounds for array of dimension 0

In [97]:
query_folder = "fashion-dataset/test_images"
query_image_name = "1590.jpg"
features = extraer_features(query_folder, query_image_name)

results = db_client.search(
    COLLECTION_NAME,
    data=[features],
    output_fields=["image_id"],
    search_params={"metric_type": "COSINE"}
)
results

data: ["[{'id': 453823012167236962, 'distance': 0.9999997615814209, 'entity': {'image_id': '1590.jpg'}}, {'id': 453824084139886024, 'distance': 0.9973840713500977, 'entity': {'image_id': '4650.jpg'}}, {'id': 453824723585668848, 'distance': 0.9972014427185059, 'entity': {'image_id': '5712.jpg'}}, {'id': 453822740655774600, 'distance': 0.9969114661216736, 'entity': {'image_id': '13198.jpg'}}, {'id': 453823887311678738, 'distance': 0.9967807531356812, 'entity': {'image_id': '15410.jpg'}}, {'id': 453822756717337974, 'distance': 0.9967342019081116, 'entity': {'image_id': '4570.jpg'}}, {'id': 453824856525711246, 'distance': 0.9967103600502014, 'entity': {'image_id': '3943.jpg'}}, {'id': 453823317679563746, 'distance': 0.9966440200805664, 'entity': {'image_id': '38155.jpg'}}, {'id': 453823613688646186, 'distance': 0.9966123104095459, 'entity': {'image_id': '7753.jpg'}}, {'id': 453823618076150460, 'distance': 0.9965291023254395, 'entity': {'image_id': '18724.jpg'}}]"] 

In [98]:
for result in results:
    for hit in result[:10]:
        print(hit["entity"])

{'image_id': '1590.jpg'}
{'image_id': '4650.jpg'}
{'image_id': '5712.jpg'}
{'image_id': '13198.jpg'}
{'image_id': '15410.jpg'}
{'image_id': '4570.jpg'}
{'image_id': '3943.jpg'}
{'image_id': '38155.jpg'}
{'image_id': '7753.jpg'}
{'image_id': '18724.jpg'}


In [99]:
def display_products(results: list):
    html = ""
    for result in results:
        for hit in result[:10]:
            html += f"""
                <div style="display: inline-block; text-align: center; margin: 10px;">
                    <img src="fashion-dataset/images/{hit["entity"]["image_id"]}" width="150" /><br>
                    <b style="width=180">{hit["entity"]["image_id"]}</b><br>
                </div>
            """

    display(HTML(html))

display_products(results)