In [None]:
import os
import numpy as np
import pandas as pd 
import torchvision.transforms as T
from pymongo import MongoClient
from pymongo.server_api import ServerApi
from src.embedding.hfdataset_processing import create_dict_from_image, create_hf_ds_from_dict, add_image_to_hf_dataset
from dotenv import load_dotenv
from datasets import Dataset
from google.cloud import vision

In [None]:
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = r'third-apex-402521-e63d3fe1b50f.json'
def get_tags_from_img(img_uri) -> vision.EntityAnnotation:

    # Instantiates a client
    client = vision.ImageAnnotatorClient()

    image = vision.Image()
    image.source.image_uri = img_uri 

    # Performs label detection on the image file
    response = client.label_detection(image=image)
    labels = response.label_annotations

    print("Labels:")
    tags = []
    for label in labels:
        tags.append(label.description)
    return tags

get_tags_from_img("http://clothing-images.s3.us.cloud-object-storage.appdomain.cloud/BC7646B8-459E-4FF2-B465-DF041718ACDE.jpg")


In [None]:
uri = os.environ["DB_URI"]
client = MongoClient(uri, server_api=ServerApi('1'))
db = client.clothing
collection = db.items
import requests
from PIL import Image
from io import BytesIO

# URL of the image

test_brian_hf_dataset = Dataset.from_dict({})

for item in collection.find():
    image_url = item["image"]
    response = requests.get(image_url)
    img_data = BytesIO(response.content)
    # Open and display the image using PIL
    img = Image.open(img_data)
    brian_setup_dict = create_dict_from_image(img)
    brian_setup_dict['image_url'] = [image_url]
    brian_setup_hf_dataset = create_hf_ds_from_dict(brian_setup_dict)
    test_brian_hf_dataset = add_image_to_hf_dataset(brian_setup_hf_dataset, test_brian_hf_dataset)

test_brian_hf_dataset

In [None]:
print(test_brian_hf_dataset['image'][1])

In [None]:
from src.embedding.embedding import add_embeddings, extract_embeddings
from transformers import AutoFeatureExtractor, AutoModel

from constants import VISION_TRANSFORMER_CKPT, DEVICE
extractor = AutoFeatureExtractor.from_pretrained(VISION_TRANSFORMER_CKPT)
model = AutoModel.from_pretrained(VISION_TRANSFORMER_CKPT)
hidden_dim = model.config.hidden_size
extract_fn = extract_embeddings(model.to(DEVICE))
test_brian_hf_dataset = add_embeddings(extract_fn, test_brian_hf_dataset)

test_brian_hf_dataset

In [None]:
test_brian_hf_dataset["image"][1]

In [None]:
from src.embedding.search import add_faiss_index_to_hfdataset, find_k_most_similar
test_brian_hf_dataset = add_faiss_index_to_hfdataset(test_brian_hf_dataset)
var = find_k_most_similar(test_brian_hf_dataset, 3)

In [None]:
var['image_url'][1:4]

In [None]:
image_url = 'https://media.gettyimages.com/id/102057102/photo/a-woman-spreads-out-a-towel-on-the-beach.jpg?s=612x612&w=gi&k=20&c=yRliyFhp-yR0Zfn9wKqb_Tcu_v8-Y_INXQ0t-5F-B6Q='

# Download the image
response = requests.get(image_url)
img_data = BytesIO(response.content)

# Open and display the image using PIL
img = Image.open(img_data)
towel_dict = create_dict_from_image(img)
towel_dict['image_url'] = [image_url]
towel_hf_dataset = create_hf_ds_from_dict(towel_dict)

towel_hf_dataset = add_embeddings(extract_fn, towel_hf_dataset)
new_dummy_hf_ds = add_image_to_hf_dataset(towel_hf_dataset,test_brian_hf_dataset)
new_dummy_hf_ds = add_faiss_index_to_hfdataset(new_dummy_hf_ds)
var = find_k_most_similar(test_brian_hf_dataset, 3)
var['image_url'][1:4]

In [None]:
var["image"][1:3]