Started once text preprocessing and vector database setup was finished :)

In [10]:
import pickle
import numpy as np
from sentence_transformers import SentenceTransformer
from langchain.vectorstores import Chroma
from langchain.llms import openai
from pydantic import model_validator


# Load the saved vector database
def load_vector_database(filename='vector_database.pkl'):
    with open(filename, 'rb') as file:
        vector_database = pickle.load(file)
    print(f"Vector database loaded from {filename}")
    return vector_database['embeddings'], vector_database['metadata']

# Initialize the SentenceTransformer model
model = SentenceTransformer('all-MiniLM-L6-v2')

# Create a wrapper for the SentenceTransformer model to match the Chroma embedding interface
class SentenceTransformerEmbeddingFunction:
    def __init__(self, model):
        self.model = model

    def __call__(self, input):
        return self.model.encode(input)

# Vector database setup using Chroma
embedding_function = SentenceTransformerEmbeddingFunction(model)

vector_store = Chroma(
    collection_name="document_collection",
    embedding_function=embedding_function,
    persist_directory='vector_db'  # Directory for persistent Chroma storage
)

# Ensure the vector store is populated with the saved embeddings and metadata
def populate_vector_store_from_file(vector_store, filename='vector_database.pkl'):
    embeddings, metadata = load_vector_database(filename)
    vector_store.add_texts(
        texts=[meta['original_text'] for meta in metadata],
        metadatas=metadata,
        embeddings=embeddings
    )
    vector_store.persist()
    print("Vector store populated with data from file.")

# Populate the vector store
populate_vector_store_from_file(vector_store)

# Step 2: Define a function to retrieve and generate responses
def handle_query(user_query):
    """
    Handles a user query by retrieving relevant documents and generating a response.
    """
    # Generate an embedding for the user query
    query_embedding = model.encode([user_query])

    # Perform similarity search in the vector database
    results = vector_store.similarity_search(
        query=user_query,
        k=2  # Number of relevant documents to retrieve
    )

    # Extract relevant documents and metadata
    retrieved_docs = [result['document'] for result in results]
    retrieved_metadata = [result['metadata'] for result in results]

    # Construct a simple prompt without using PromptTemplate
    prompt = f"""
    You are an AI assistant. Below are some relevant documents retrieved based on a user's query.
    Use this information to generate a concise and helpful response.

    Relevant Documents:
    {retrieved_docs}

    User Query:
    {user_query}

    Your Response:
    """

    # Initialize an LLM (e.g., OpenAI's GPT) for response generation
    llm = openai(model_name="gpt-4")  # Replace with your preferred LLM model or API key setup

    # Generate the response
    response = llm(prompt)
    return response

# Step 3: Process a user query
user_query = "how to make chicken"
response = handle_query(user_query)

# Step 4: Display the response
print(f"User Query: {user_query}")
print(f"AI Response:\n{response}")


ImportError: cannot import name 'model_validator' from 'pydantic' (/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/pydantic/__init__.cpython-310-darwin.so)