In [None]:
from openai import OpenAI
import os
from dotenv import load_dotenv
from supabase.client import create_client, Client


load_dotenv()  # take environment variables from .env.

def readEnv(key: str) -> str:
    value = os.getenv(key)
    if value is None:
        raise ValueError(f"{key} is not set.")
    return value    

OPENAI_API_KEY = readEnv("OPENAI_API_KEY")

client = OpenAI(api_key=OPENAI_API_KEY)

url: str = readEnv("SUPABASE_URL")
key: str = readEnv("SUPABASE_SERVICE_KEY")
supabase: Client = create_client(url, key)


In [42]:
import requests
import glob
import base64

# Encode the image
def encode_image(image_path):
  with open(image_path, "rb") as image_file:
    return base64.b64encode(image_file.read()).decode('utf-8')

# Create the image object
def create_image_object(base64_image):
    return  {
        "type": "image_url",
        "image_url": {
        "url": f"data:image/jpeg;base64,{base64_image}"
        }
    }

### Get all the images in the images folder
extensions = ['*.jpg', '*.jpeg', '*.png', '*.heic']

image_paths = []
for extension in extensions:
    image_paths.extend(glob.glob(f"images/{extension}"))


base64_images = []

for image_path in image_paths:
    # Getting the base64 string
    base64_image = encode_image(image_path)
    base64_images.append(create_image_object(base64_image))

headers = {
  "Content-Type": "application/json",
  "Authorization": f"Bearer {OPENAI_API_KEY}"
}

payload = {
  "model": "gpt-4-vision-preview",
  "messages": [
    {
        "role": "system",
        "content": "You will be given an arbitrary number of images. For each image, give a description of the image. The description should be independent and treat each image individually. Start the description without numbering the images, for example: 'A cat is sitting on a table.' Delineate each description with a double line break."
    },
    {
      "role": "user",
      "content": [
        *base64_images # unpack all base64 objects
      ]
    }
  ],
  "max_tokens": 300,
}

response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
json = response.json()

descriptions = json['choices'][0]['message']['content'].split('\n\n')

print(descriptions)

['The first image shows a digital screen that appears to have a glitch or low resolution, making it difficult to discern the content clearly. The screen is housed in a device with three buttons underneath it - one that resembles a play or start symbol, another that appears to be a directional or selection button, and a third with a menu or list icon.', 'The second image captures a natural scene set in a wintry landscape, where a pack of grey wolves are huddled together on the snow. The wolves seem to be resting or possibly protecting each other from the cold. The background is a vast expanse of untouched snow, emphasizing the harshness and isolation of their environment.']


In [43]:
# Create connection to db

# Create embeddings for each description from OpenAI
embeddings = client.embeddings.create(input=descriptions, model='text-embedding-3-small').data

embedding_vectors = [embedding.embedding for embedding in embeddings]

data, count = supabase.table('images').upsert([
    {
        "description": description,
        "image_path": image_path,
        "embedding": embedding
    }
    for description, image_path, embedding in zip(descriptions, image_paths, embedding_vectors)
]).execute()

print(data, count)

2024-02-02 12:33:14,243:INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-02-02 12:33:14,963:INFO - HTTP Request: POST https://qwhgjqvosygrtkjgddnq.supabase.co/rest/v1/images "HTTP/1.1 201 Created"


('data', [{'id': 4, 'description': 'The first image shows a digital screen that appears to have a glitch or low resolution, making it difficult to discern the content clearly. The screen is housed in a device with three buttons underneath it - one that resembles a play or start symbol, another that appears to be a directional or selection button, and a third with a menu or list icon.', 'embedding': '[-0.03404727,0.0125877075,-0.020827798,0.024006993,0.0029855894,-0.031574562,0.058203712,0.009646273,-0.0006050149,-6.177521e-05,0.06217091,-0.0062055425,-0.012071428,-0.018626818,-0.040215448,-0.020882143,-0.05608425,-0.021928288,-0.021846771,0.008117815,0.03874813,-0.011670632,0.0060119377,0.019781653,0.04010676,0.012995296,-0.015665004,-0.010284829,0.012037463,0.053475678,-0.002314766,-0.011779322,0.014917757,-0.104288444,0.003532438,-0.04480762,-0.00035664038,0.029889861,-0.012207291,-0.007812123,-0.041764285,0.026357424,0.014347133,0.007336602,0.021452768,-0.010644866,-0.026316663,0.01

In [44]:
# search supabase

input_str = "Fox in the snow"

embeddings = client.embeddings.create(input=input_str, model='text-embedding-3-small').data[0].embedding

res = supabase.rpc('match_images',  {
  'query_embedding': embeddings,
  'similarity_threshold': 0.4,
  'match_count': 2,
}).execute()

print(res.data[0]) # contains id, description, similarity, image_path

2024-02-02 12:33:15,357:INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-02-02 12:33:15,520:INFO - HTTP Request: POST https://qwhgjqvosygrtkjgddnq.supabase.co/rest/v1/rpc/match_images "HTTP/1.1 200 OK"


{'id': 2, 'description': 'The second image captures a snowy landscape with a group of grey wolves huddled together on the snow, likely for warmth or social bonding.', 'similarity': 0.461063953574661, 'image_path': 'images/Untitled.jpg'}
