In [1]:
from dotenv import dotenv_values
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

# Load API key from enviroment variables or a configuration file
pinecone_api_key= dotenv_values(".env")["pinecone_api_key"]

In [2]:
import tensorflow as tf
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.resnet50 import preprocess_input

# Load the pre-trained ResNet50 model without the top classification layers
model = ResNet50(weights='imagenet', include_top=False, pooling='avg')






In [3]:
def get_image_embedding(image_path):
    # Load and preprocess the image
    img = image.load_img(image_path, target_size=(150, 120))
    img_array = image.img_to_array(img)
    img_array = np.expand_dims(img_array, axis=0)
    img_array = preprocess_input(img_array)

    # Obtain the image embedding
    embedding = model.predict(img_array)
    return embedding.flatten()

In [20]:
# Load the Images as vectors

import numpy as np
import os

# Define the folder path where your images are stored
folder_path = "MassimoDutti/top"

# Initialize an empty list to store image vectors
embedding_vectors = []

# Loop through all files in the folder
for filename in os.listdir(folder_path):
    if filename.endswith(".jpg") or filename.endswith(".png"):
        # Read the image using OpenCV
        image_path = os.path.join(folder_path, filename)

        embedding = get_image_embedding(image_path)
        embedding_vectors.append(embedding)




In [125]:
embedding_vectors[0:5]

[array([0.01718138, 0.        , 0.22389951, ..., 0.        , 1.1059793 ,
        0.        ], dtype=float32),
 array([0.0078711 , 0.31094927, 0.03584773, ..., 0.        , 0.9748006 ,
        0.70302427], dtype=float32),
 array([0.05241712, 0.56843036, 0.07284076, ..., 0.        , 0.02680944,
        2.7541823 ], dtype=float32),
 array([0.17328301, 2.7277431 , 0.20778885, ..., 0.08576186, 1.8294342 ,
        4.440919  ], dtype=float32),
 array([0.9033886 , 0.10631194, 0.        , ..., 0.207195  , 1.8678468 ,
        1.632143  ], dtype=float32)]

In [7]:
print("Number of vector:", len(embedding_vectors))
print("Embedding shape:",embedding_vectors[0].shape)

Number of vector: 25
Embedding shape: (2048,)


# Pinecone

In [8]:
from pinecone import Pinecone

In [9]:
pc = Pinecone(api_key=pinecone_api_key)

In [10]:
pc.describe_index('imagevector')

{'dimension': 2048,
 'host': 'imagevector-edim1f5.svc.aped-4627-b74a.pinecone.io',
 'metric': 'cosine',
 'name': 'imagevector',
 'spec': {'serverless': {'cloud': 'aws', 'region': 'us-east-1'}},
 'status': {'ready': True, 'state': 'Ready'}}

In [11]:
index = pc.Index('imagevector')

In [131]:
# Delete all vectors in the collection
# index.delete(delete_all=True, namespace='uniqulo_bottom')

{}

In [21]:
# Initialize an empty list to store the dictionaries
embeddings_list = []

# Iterate over the embedding vectors and create dictionaries
for idx, embedding in enumerate(embedding_vectors):
    # Create a dictionary with 'id' and 'value' keys
    embedding_dict = {'id': str(idx+1), 'values': embedding.tolist()}
    embeddings_list.append(embedding_dict)


In [133]:
embeddings_list[0]

{'id': '1',
 'values': [0.017181383445858955,
  0.0,
  0.22389951348304749,
  0.07437849789857864,
  0.0,
  0.0187225304543972,
  0.0,
  0.040633924305438995,
  0.2226370871067047,
  0.0,
  0.0,
  2.6255264282226562,
  1.898435354232788,
  0.0,
  0.02565932832658291,
  0.0,
  0.0,
  0.0,
  0.3080846965312958,
  0.007767158094793558,
  0.20605787634849548,
  0.0,
  0.0,
  0.0,
  0.22108617424964905,
  0.12406294047832489,
  0.17862196266651154,
  0.0,
  0.0,
  0.061651743948459625,
  0.0,
  0.008538496680557728,
  0.6590477824211121,
  0.25781187415122986,
  0.3000761866569519,
  0.0,
  0.0,
  1.9098014831542969,
  0.0,
  0.09793484210968018,
  0.16307522356510162,
  0.5295872688293457,
  1.4029700756072998,
  0.2429686039686203,
  2.665034055709839,
  0.136880561709404,
  0.0,
  0.17076806724071503,
  0.0,
  0.36865106225013733,
  0.8099579811096191,
  0.02527765929698944,
  0.11274492740631104,
  0.0011325093219056726,
  0.012736896052956581,
  0.7753812074661255,
  0.0,
  0.950225830

In [134]:
len(embeddings_list)

20

In [22]:
index.upsert(
    vectors = embeddings_list,
    namespace="MassimoDutti_top"
)

{'upserted_count': 25}

## prep some vectors for query purpose

In [51]:
# Make a single call to the API 
q_responses = get_image_embedding("06-01.png").tolist()



In [None]:
q_responses

In [57]:
# time to query
index.query(
    namespace="uniqulo_bottom",
    vector = q_responses,
    top_k = 3
)

{'matches': [{'id': '11', 'score': 0.584782779, 'values': []},
             {'id': '9', 'score': 0.579614818, 'values': []},
             {'id': '2', 'score': 0.531748354, 'values': []}],
 'namespace': 'uniqulo_bottom',
 'usage': {'read_units': 5}}

In [52]:
# time to query
index.query(
    namespace="uniqulo_top",
    vector = q_responses,
    top_k = 3
)

{'matches': [{'id': '22', 'score': 0.99999994, 'values': []},
             {'id': '2', 'score': 0.65958488, 'values': []},
             {'id': '16', 'score': 0.624230087, 'values': []}],
 'namespace': 'uniqulo_top',
 'usage': {'read_units': 5}}