In [1]:
import psycopg2
import json
import os 
import re
# Connect to your PostgreSQL database
DATABASE_URL = os.environ["DATABASE_URL"]
conn = psycopg2.connect(DATABASE_URL, sslmode="require")

# Create a cursor object
cur = conn.cursor()

# Fetch rows where openai_labels exists
cur.execute("SELECT id, cmetadata->>'openai_labels' FROM langchain_pg_embedding WHERE cmetadata ? 'openai_labels';")
rows = cur.fetchall()

# Process each row and update the openai_labels_json field
for row in rows:
    row_id = row[0]
    openai_labels_str = row[1]
    
    # Remove surrounding square brackets and split on commas with or without space
    labels_list = re.split(r',\s*', openai_labels_str.strip('[]'))
    
    # Remove any double quotes and spaces within the labels
    cleaned_labels_list = [label.replace('"', '').replace(' ', '').strip() for label in labels_list]

    # Convert the list into a proper JSON array
    openai_labels_json = json.dumps(cleaned_labels_list)

    # Update the table with the new JSON array in the 'openai_labels_json' field
    cur.execute("""
        UPDATE langchain_pg_embedding
        SET cmetadata = jsonb_set(cmetadata, '{openai_labels_json_6}', %s::jsonb)
        WHERE id = %s;
    """, (openai_labels_json, row_id))

# Commit the changes to the database
conn.commit()

# Close the cursor and connection
cur.close()
conn.close()