Querying

Connection

In [2]:
from pymilvus import connections, DataType, CollectionSchema, FieldSchema, Collection, Partition, utility
import openai
import pandas as pd
import numpy as np
import re
import json
from openai.embeddings_utils import get_embedding
import time
from tqdm import tqdm
import fasttext

Constants

In [3]:
OPENAI_API_KEY = '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'
max_tokens = 8000
dimensions = {'openai' : 1536,
            'fasttext' : 300}
openai.api_key = OPENAI_API_KEY


Definitions

In [4]:
# Change partition_name based on kind of data
partition_name = 'facebook_posts'
# Change embedder to either fasttext openai 
embedder = 'openai'
dimension = dimensions[embedder]
bundled_schema = {'rmrj_articles': ['author', 'title', 'published_date', 'text'],
                  'facebook_posts': ['text', 'time', 'link'],
                  'usjr_about': ['text', 'content_id'],
                  'all': ['author', 'title', 'published_date', 'text', 'time', 'post', 'link', 'content_id']}
collection_names = bundled_schema[partition_name]
if embedder == 'fasttext':
    fasttext_model = fasttext.load_model('/Users/garfieldgreglim/Library/Mobile Documents/com~apple~CloudDocs/Josenian-Query/Final Outputs/Jupyter Notebooks/Embedder/crawl-300d-2M-subword.bin')

Embedder

In [5]:
def get_embedding(text, embedding_type):
    text = text.replace("\n", " ")
    model = "text-embedding-ada-002"
    if embedding_type == 'openai':
        return openai.Embedding.create(input = [text], model=model)['data'][0]['embedding']
    elif embedding_type == 'fasttext':
        return fasttext_model.get_sentence_vector(text)
    else:
        raise ValueError("Invalid embedding_type. Expected 'openai' or 'fasttext'.")


Connection

In [6]:
# Check if the connection already exists
if connections.has_connection('default'):
    connections.remove_connection('default')  # Disconnect if it exists

# Now, reconnect with your new configuration
connections.connect(alias='default', host='localhost', port='19530')

Input and vectorization

In [7]:
from datetime import datetime

# Get the current date and time
now = datetime.now()

# Convert the datetime object to a string
current_datetime = now.strftime('%Y-%m-%d %H:%M:%S')

question = f"50th anniversary of his priestly ordination"
query_vectors = get_embedding(question, embedder)
query_vectors = np.array(query_vectors)
if len(query_vectors.shape) == 1:
    query_vectors = query_vectors.reshape(1, -1)

search_params = {
    "metric_type": "L2",  # Distance metric, can be L2, IP (Inner Product), etc.
    "offset": 0,
}

AuthenticationError: Incorrect API key provided: sk-RgPgD***************************************wbD6. You can find your API key at https://platform.openai.com/account/api-keys.

Searching function

In [23]:
results = []
for name in collection_names:
    collection = Collection(f"{name}_collection")
    collection.load()
    result = collection.search(
        data=query_vectors,
        anns_field="embeds",
        param=search_params,
        limit=5,
        partition_names=[partition_name],
        output_fields=[name, 'uuid'],
        consistency_level="Strong"
    )
    results.append(result)


Results sorting by distance and removal of duplicates (smaller distance is kept)

In [24]:
# Initialize a dictionary to hold unique results
unique_results = {}

for i, name in enumerate(collection_names):
    for result in results[i]:
        for item in result:
            uuid = item.entity.get('uuid')
            data = {
                'uuid': uuid,
                name: item.entity.get(name),
                'distance': item.distance
            }
            
            # If this UUID is not in the dictionary, or it is but the new distance is smaller, update the entry
            if uuid not in unique_results or item.distance < unique_results[uuid]['distance']:
                unique_results[uuid] = data

# Convert the dictionary back into a list of dictionaries
results_object = list(unique_results.values())

# Sort the results by distance
sorted_results = sorted(results_object, key=lambda x: x['distance'])


Top 5 results

In [25]:
final_results = sorted_results[:5]

Field completion:

In [26]:
for result in final_results:
    for name in collection_names:
        if name not in result:
            collection = Collection(f"{name}_collection")
            query = f'uuid == "{result["uuid"]}"'
            query_result = collection.query(
                expr=query, 
                offset=0, 
                limit=1, 
                partition_names=[partition_name], 
                output_fields=[name], 
                consistency_level="Strong"
            )
            if query_result:
                result[name] = query_result[0][name]


Printing

In [27]:
for i, result in enumerate(final_results):
    print(f"Result {i}: ", result,"\n")

Result 0:  {'uuid': '0c8b0e0a-f6f5-4ede-a94a-fb31ec5bfc84', 'text': 'READ | Rev. Fr. Hernando Coja, OAR celebrated the 50th anniversary of his priestly ordination in three cities in Cebu and Negros Occidental.  The said priest was ordained on July 15, 1973 in Marcilla, Navarra, Spain.    USJR.EDU.PH OAR priest celebrates 50th presbyteral anniversary in Cebu and Negros Occidental - University of San Jose - Recoletos', 'distance': 0.2943897545337677, 'time': '2023-07-14 July 14, 2023 12:25:50', 'link': 'https://facebook.com/usjr.official/posts/661949235976818'} 

Result 1:  {'uuid': 'c3a1bc3d-1bfb-4c1e-96a4-badc3cc33717', 'text': 'Rev. Fr. Hernando Coja, OAR celebrated the 50th anniversary of his priestly ordination in three cities in Cebu and Negros Occidental.  A eucharistic celebration was done in the Archdiocesan Shrine of Our Lady of Mt. Carmel, The Clean One on July 14. This is the third leg as the first two were done in Talisay City and Bacolod City respectively.  A tribute progra

In [28]:
final_results

[{'uuid': '0c8b0e0a-f6f5-4ede-a94a-fb31ec5bfc84',
  'text': 'READ | Rev. Fr. Hernando Coja, OAR celebrated the 50th anniversary of his priestly ordination in three cities in Cebu and Negros Occidental.  The said priest was ordained on July 15, 1973 in Marcilla, Navarra, Spain.    USJR.EDU.PH OAR priest celebrates 50th presbyteral anniversary in Cebu and Negros Occidental - University of San Jose - Recoletos',
  'distance': 0.2943897545337677,
  'time': '2023-07-14 July 14, 2023 12:25:50',
  'link': 'https://facebook.com/usjr.official/posts/661949235976818'},
 {'uuid': 'c3a1bc3d-1bfb-4c1e-96a4-badc3cc33717',
  'text': 'Rev. Fr. Hernando Coja, OAR celebrated the 50th anniversary of his priestly ordination in three cities in Cebu and Negros Occidental.  A eucharistic celebration was done in the Archdiocesan Shrine of Our Lady of Mt. Carmel, The Clean One on July 14. This is the third leg as the first two were done in Talisay City and Bacolod City respectively.  A tribute program was done 

In [29]:
string_json = json.dumps(final_results)

In [30]:
string_json

'[{"uuid": "0c8b0e0a-f6f5-4ede-a94a-fb31ec5bfc84", "text": "READ | Rev. Fr. Hernando Coja, OAR celebrated the 50th anniversary of his priestly ordination in three cities in Cebu and Negros Occidental.  The said priest was ordained on July 15, 1973 in Marcilla, Navarra, Spain.    USJR.EDU.PH OAR priest celebrates 50th presbyteral anniversary in Cebu and Negros Occidental - University of San Jose - Recoletos", "distance": 0.2943897545337677, "time": "2023-07-14 July 14, 2023 12:25:50", "link": "https://facebook.com/usjr.official/posts/661949235976818"}, {"uuid": "c3a1bc3d-1bfb-4c1e-96a4-badc3cc33717", "text": "Rev. Fr. Hernando Coja, OAR celebrated the 50th anniversary of his priestly ordination in three cities in Cebu and Negros Occidental.  A eucharistic celebration was done in the Archdiocesan Shrine of Our Lady of Mt. Carmel, The Clean One on July 14. This is the third leg as the first two were done in Talisay City and Bacolod City respectively.  A tribute program was done at the USJ

In [31]:
import openai
import json

# Set up your OpenAI API credentials
# openai.api_key = 'your-api-key'

def generate_response(prompt, database_json):
    # Format the input as per the desired conversation format
    string_json = json.dumps(database_json)
    conversation = [
        {'role': 'system', 'content': """You are Josenian Quiri. University of San Jose- Recoletos' general knowledge base assistant. Refer to yourself as JQ. If there are links, give the link as well."""},
        {'role': 'user', 'content': prompt},
        {'role': 'system', 'content': f'Here is the database JSON from your knowledge base: \n{string_json}'},
        {'role': 'user', 'content': ''}
    ]
    
    # Convert the conversation to a string
    conversation_str = ''.join([f'{item["role"]}: {item["content"]}\n' for item in conversation])

    response = openai.ChatCompletion.create(
      model="gpt-4",
      messages=conversation,
      temperature=1,
      max_tokens=500,
      top_p=1,
      frequency_penalty=0,
      presence_penalty=0
    )
    
    # Extract the generated response from the API's response
    generated_text = response['choices'][0]['message']['content']


    # Return the response
    return generated_text

# Example usage
prompt = question

response = generate_response(prompt, final_results)
print(response)


Rev. Fr. Hernando Coja, OAR celebrated the 50th anniversary of his priestly ordination in three cities in Cebu and Negros Occidental. He was ordained on July 15, 1973, and the celebrations took place in 2023 in Talisay City, Bacolod City, and at the Archdiocesan Shrine of Our Lady of Mt. Carmel, The Clean One in Cebu. 

Fr. Coja, throughout his journey, served for 3 years as the parish priest at San Nicolas de Tolentino Parish in Talisay City. A celebration was also held at the San Vicente Ferrer Complex, attended by Fr. Coja's family, OAR priests, friends, and parishioners.

At the University of Negros Occidental – Recoletos (UNO-R) grounds in Bacolod City, a holy mass was held in the Oratory of St. Nicholas of Tolentine. The homilist for this event was Rev. Fr. Leopoldo Estioko, OAR.

In Cebu, the celebration took place at the Archdiocesan Shrine of Our Lady of Mt. Carmel, The Clean One, followed by a tribute program at the USJ-R lobby. Guests included priests and nuns from multiple 