In [9]:
%pip install -qqqU supabase openai python-dotenv

Note: you may need to restart the kernel to use updated packages.


In [48]:
from openai import OpenAI
from supabase import create_client
import numpy as np
import json
import os
from dotenv import load_dotenv

In [49]:
load_dotenv()

# Initialize Supabase client
SUPABASE_URL = os.getenv("SUPABASE_URL")
SUPABASE_KEY = os.getenv("SUPABASE_KEY")
supabase = create_client(supabase_url=SUPABASE_URL, supabase_key=SUPABASE_KEY)

In [12]:
# OpenAI API key
openai.api_key = os.getenv("OPENAI_API_KEY")

# Define the table and query size
TABLE_NAME = "usda_foods"
BATCH_SIZE = 300


In [60]:
# Fetch rows from the Supabase table
def fetch_food_names():
    response = supabase.table(TABLE_NAME).select("fdc_id, name").is_("embedding", None).limit(BATCH_SIZE).execute()
    if response.data:
        return response.data
    elif response.error:
        print(f"Error fetching rows: {response.error}")
        return []

In [61]:
# Generate embeddings using OpenAI API
def generate_embeddings(food_names):
    try:
        client = OpenAI()
        response = client.embeddings.create(
            model="text-embedding-3-small",
            input=food_names
        )
        data = response.data
        return [embedding.embedding for embedding in data]
    except Exception as e:
        print(f"Error generating embeddings: {e}")
        return []

In [68]:
# Update embeddings in the Supabase table
def update_embeddings(rows, embeddings):
    for row, embedding in zip(rows, embeddings):
        try:
            embedding_json = json.dumps(embedding)
            response = supabase.table(TABLE_NAME).update({"embedding": embedding_json}).eq("fdc_id", row["fdc_id"]).execute()
            if response.__dict__.get('error'):
                print(f"Error updating row {row['fdc_id']}: {response__dict__.get('error')}")
        except Exception as e:
            print(f"Error updating row {row['fdc_id']}: {e}")

In [None]:
# Step 1: Fetch rows
rows = fetch_food_names()
if not rows:
    print("No rows fetched, exiting...")
    pass

# Step 2: Extract names and generate embeddings
food_names = [row["name"] for row in rows]
embeddings = generate_embeddings(food_names)
if not embeddings:
    print("No embeddings generated, exiting...")
    pass

# Step 3: Update rows with embeddings
update_embeddings(rows, embeddings)
print("Embeddings successfully updated.")