In [7]:
import openai
import os


def geo_classify(title, text):
  
    prompt = f'''
    Please extract all specifc geographic locations or pysical places or buildings, return them as a comma separated list.

    Do not include casual decriptions eg. "behind the church". 

    Title: {title}

    Text: {text}

    '''


    # Set your OpenAI API key
    api_key = os.environ["OPENAI_API_KEY"]

    # Initialize the OpenAI API client
    openai.api_key = api_key

    # Define the message you want to send
    message = prompt



    # Send the message to ChatGPT-4 and get a response
    completion = openai.chat.completions.create(
        model="gpt-4o",
        messages=[
            {
                "role": "user",
                "content": message,
            },
        ],
    )
    return completion.choices[0].message.content

In [8]:
from postgres import Postgres 
import json

pg = Postgres()

records = pg.query("""
    SELECT id, cmetadata->>'neighbourhood' as neighbourhood, cmetadata->>'block_idx' as block_idx, cmetadata->>'sections' as sections, cmetadata->>'text' as text
    FROM langchain_pg_embedding
    WHERE NOT (cmetadata ? 'openai_geo_labels') 
    AND cmetadata->>'experiment' = 'leeds'
    AND cmetadata->>'chunker' = 'sherpa'
    AND cmetadata->>'category' = 'local_plan'    
    LIMIT 1;
""")
print('records: ', records)

for record in records:
    title = record[3]
    text = record[4]
    
    comma_separated_labels = geo_classify(title, text)

    labels = [item.strip() for item in comma_separated_labels.split(",")]       
    
    print ('labels: ', labels)
    id = record[0]
    title = record[3]
    text = record[4]
    print('title: ', title)
    print('text: ', text)
    
    print ('labels: ', labels)
    pg.add_labels_to_cmetadata(id, labels, 'openai_geo_labels')
    
def add_labels_to_cmetadata(self, record_id, labels, label_type):
    """
    Adds an array of labels to the 'labels' key in the cmetadata JSON column for a given record.
    If the 'labels' key does not exist, it will create it and add the new labels.
    
    :param record_id: The unique id of the record (assumed to be the primary key)
    :param labels: A list of labels (Python array) to add
    """
    try:
        with self.conn.cursor() as cursor:
            # We don't need to use json.dumps here; pass the labels list directly.
            print("labels as array:", labels)
            json_path = '{' + label_type + '}'

            query = """
                UPDATE langchain_pg_embedding
                SET cmetadata = jsonb_set(
                    cmetadata,
                    %s,
                    (COALESCE(cmetadata->%s, '[]'::jsonb) || %s::jsonb),
                    true
                )
                WHERE id = %s;
            """

            # Executing the query, dynamically passing the JSON path and labels list
            cursor.execute(query, (json_path, label_type, labels, record_id))
            
            # Commit the transaction
            self.conn.commit()
            
            print(f"Labels {labels} with {label_type} added to record with ID {record_id}.")

    except psycopg2.Error as e:
        print(f"Error updating labels: {e}")
        self.conn.rollback()


records:  [('bbc86a28-33cd-478f-82c9-c0038b302259', None, '146', 'Key > FRONTAGES > Abbey Gardens', ' 44.8m Medical ')]
labels:  ['Abbey Gardens']
title:  Key > FRONTAGES > Abbey Gardens
text:   44.8m Medical 
labels:  ['Abbey Gardens']
["Abbey Gardens"]
**************************************************************************************************************
Labels ['Abbey Gardens'] with openai_geo_labels added to record with ID bbc86a28-33cd-478f-82c9-c0038b302259.
