In [1]:
from openai import OpenAI
import os
from dotenv import load_dotenv
from supabase.client import create_client, Client


load_dotenv()  # take environment variables from .env.

def readEnv(key: str) -> str:
    value = os.getenv(key)
    if value is None:
        raise ValueError(f"{key} is not set.")
    return value    

OPENAI_API_KEY = readEnv("OPENAI_API_KEY")

client = OpenAI(api_key=OPENAI_API_KEY)

url: str = readEnv("SUPABASE_URL")
key: str = readEnv("SUPABASE_SERVICE_KEY")
supabase: Client = create_client(url, key)


In [2]:
import requests
import glob
import base64

# Encode the image
def encode_image(image_path):
  with open(image_path, "rb") as image_file:
    return base64.b64encode(image_file.read()).decode('utf-8')

# Create the image object
def create_image_object(base64_image):
    return  {
        "type": "image_url",
        "image_url": {
        "url": f"data:image/jpeg;base64,{base64_image}"
        }
    }

### Get all the images in the images folder
extensions = ['*.jpg', '*.jpeg', '*.png', '*.heic']

image_paths = []
for extension in extensions:
    image_paths.extend(glob.glob(f"images/{extension}"))


base64_images = []

for image_path in image_paths:
    # Getting the base64 string
    base64_image = encode_image(image_path)
    base64_images.append(create_image_object(base64_image))

headers = {
  "Content-Type": "application/json",
  "Authorization": f"Bearer {OPENAI_API_KEY}"
}

payload = {
  "model": "gpt-4-vision-preview",
  "messages": [
    {
        "role": "system",
        "content": "You will be given an arbitrary number of images. For each image, give a description of the image. The description should be independent and treat each image individually. Start the description without numbering the images, for example: 'A cat is sitting on a table.' Delineate each description with a double line break."
    },
    {
      "role": "user",
      "content": [
        *base64_images # unpack all base64 objects
      ]
    }
  ],
  "max_tokens": 300,
}

response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
json = response.json()

descriptions = json['choices'][0]['message']['content'].split('\n\n')

print(descriptions)

['The first image shows a small screen displaying what appears to be a pixelated image of two silhouettes against a backdrop with shades of purple and blue, which might suggest a sunset or sunrise. The image seems to come from a low-resolution or glitching digital display. Below the screen, there are three buttons that seem to be interface controls, probably allowing the user to play or pause, navigate, or access a menu.', 'The second image depicts a group of gray wolves huddled together on a snowy landscape. The environment looks cold and barren, and the wolves seem to be resting or possibly seeking warmth from each other. The image gives a sense of a remote, wintry wilderness habitat possibly indicative of a wildlife documentary or nature photography.']


In [3]:
# Create connection to db

# Create embeddings for each description from OpenAI
embeddings = client.embeddings.create(input=descriptions, model='text-embedding-3-small').data

embedding_vectors = [embedding.embedding for embedding in embeddings]

data, count = supabase.table('images').upsert([
    {
        "description": description,
        "image_path": image_path,
        "embedding": embedding
    }
    for description, image_path, embedding in zip(descriptions, image_paths, embedding_vectors)
]).execute()

print(data, count)

2024-02-06 12:42:50,683:INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-02-06 12:42:51,477:INFO - HTTP Request: POST https://qwhgjqvosygrtkjgddnq.supabase.co/rest/v1/images "HTTP/1.1 201 Created"


('data', [{'id': 4, 'description': 'The first image shows a small screen displaying what appears to be a pixelated image of two silhouettes against a backdrop with shades of purple and blue, which might suggest a sunset or sunrise. The image seems to come from a low-resolution or glitching digital display. Below the screen, there are three buttons that seem to be interface controls, probably allowing the user to play or pause, navigate, or access a menu.', 'embedding': '[-0.010357944,-0.023256516,-0.041990157,-0.0038179215,-0.015229807,-0.031017995,0.09157428,0.010141572,0.0010687737,0.031017995,0.052264344,-0.011753893,-0.020325024,-0.009464537,-0.016877027,-0.011090818,-0.042716052,-0.0070111556,-0.021539498,0.02509917,0.02892407,-0.04218559,-0.0020014425,0.057736468,0.01902679,-0.0028756557,-0.013861777,-0.006184056,0.03274897,0.043581538,-0.026201969,-0.020297104,-0.009297023,-0.08247269,0.012633341,-0.058574036,-0.0065784114,0.028351732,-0.037048496,-0.020995079,-0.014245664,-0.01

In [4]:
# search supabase

input_str = "Fox in the snow"

embeddings = client.embeddings.create(input=input_str, model='text-embedding-3-small').data[0].embedding

res = supabase.rpc('match_images',  {
  'query_embedding': embeddings,
  'similarity_threshold': 0.4,
  'match_count': 2,
}).execute()

print(res.data[0]) # contains id, description, similarity, image_path

2024-02-06 12:43:00,172:INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


2024-02-06 12:43:00,442:INFO - HTTP Request: POST https://qwhgjqvosygrtkjgddnq.supabase.co/rest/v1/rpc/match_images "HTTP/1.1 200 OK"


{'id': 3, 'description': 'The second image captures a snowy landscape where a group of wolves is huddled together on the snow. The wolves seem to be engaging in social behavior, possibly playing or displaying affection towards each other. The environment is predominantly white due to snow coverage, with some patches of grass or ground visible. The scene conveys a sense of wilderness and animal behavior in a wintry setting.', 'similarity': 0.479985475540161, 'image_path': 'images/Untitled.jpg'}
