In [1]:
# Import necessary libraries and get API
from langchain_openai import OpenAIEmbeddings
import getpass
import os
from dotenv import load_dotenv

load_dotenv(override = True)

if not os.environ.get("OPENAI_API_KEY"): 
    os.environ["OPENAI_API_KEY"] = getpass.getpass("OpenAI API Key:")

In [2]:
# Import necessary functions
from langchain.docstore.document import Document
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.embeddings.fastembed import FastEmbedEmbeddings

from langchain_iris import IRISVector




In [3]:
# Helper functions

# Loads necessary document given learning style
def load_document(file_path):
    document_path = file_path

    loader = TextLoader(document_path, encoding='utf-8')
    documents = loader.load()
    text_splitter = CharacterTextSplitter(chunk_size = 200, chunk_overlap = 30)
    
    docs = text_splitter.split_documents(documents)
    return docs

def get_answer(query, vectorstore, search_kwargs={}):
    """
    Perform similarity search in the vector store to return most relevant
    response (string).
    """

    # results = vectorstore.search(query, **search_kwargs)
    results = vectorstore.similarity_search_with_score(query)
    
    # Process the results
    if results:
        # Extract the most relevant result (assuming the first one is the most relevant)
        most_relevant_result = results[0]
        document = most_relevant_result[0]  # Document object
        
        return {
            "content": document.page_content
        }
        
    else:
        return {
            "content": "No relevant information found.",
            "score": 0
        }

In [7]:
# If reconnecting to the database, use this:

# db = IRISVector(
#     embedding_function=embeddings,
#     dimension=1536,
#     collection_name=COLLECTION_NAME,
#     connection_string=CONNECTION_STRING,
# )

In [8]:
# To add documents to existing vector store:

# db.add_documents(docs)

In [5]:
# Query in --> string output

def vector_search_response(query, file_path, collection_name):
    """
    Utilizes vector search to determine appropriate response based on user query.
    Returns a string.

    Args:
        query: string input prompt from the user
        file_path: path to content.txt
        collection_name: name for the collection

    """
    # Connection string name definition
    username = 'demo'
    password = 'demo' 
    hostname = os.getenv('IRIS_HOSTNAME', 'localhost')
    port = '1972' 
    namespace = 'USER'
    CONNECTION_STRING = f"iris://{username}:{password}@{hostname}:{port}/{namespace}"

    # Load content from internal
    docs = load_document(file_path)
    
    # Defining the database
    db = IRISVector.from_documents(
        # Embedding function
        embedding = OpenAIEmbeddings(),
        # Stores all meaningful chunks
        documents = docs,
        collection_name = collection_name,
        connection_string = CONNECTION_STRING,
    )

    return get_answer(query, db, search_kwargs = {})

# Function call
print(vector_search_response("Tell me about the Plymouth Colony", '/Users/kevinxie/Desktop/documents/content.txt', 'function_test')['content'])

Created a chunk of size 644, which is longer than the specified 200
Created a chunk of size 304, which is longer than the specified 200
Created a chunk of size 270, which is longer than the specified 200
Created a chunk of size 336, which is longer than the specified 200
Created a chunk of size 442, which is longer than the specified 200
Created a chunk of size 301, which is longer than the specified 200
Created a chunk of size 287, which is longer than the specified 200
Created a chunk of size 286, which is longer than the specified 200
Created a chunk of size 231, which is longer than the specified 200
Created a chunk of size 320, which is longer than the specified 200
Created a chunk of size 223, which is longer than the specified 200
Created a chunk of size 206, which is longer than the specified 200
Created a chunk of size 304, which is longer than the specified 200
Created a chunk of size 365, which is longer than the specified 200
Created a chunk of size 371, which is longer tha

Plymouth Colony: An English colony founded by the Pilgrims in 1620 in present-day Massachusetts. Known for the Mayflower Compact, an early form of self-government.


In [None]:
# for doc, score in docs_with_score:
#     print("-" * 80)
#     print("Score: ", score)
#     print(doc.page_content)
#     print("-" * 80)
# retriever = db.as_retriever()
# print(retriever)