In [1]:
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
import os

In [2]:
# Read Vectorstore
embedding_model = 'sentence-transformers/all-MiniLM-L6-v2'
embeddings = HuggingFaceEmbeddings(model_name=embedding_model)

db_name = 'vector_db'


# Load the existing vector database that you created from the ingest/pipeline script
if os.path.exists(db_name):
  vectorstore = Chroma(persist_directory=db_name, embedding_function=embeddings)
  print(f"Vectorstore loaded with {vectorstore._collection.count()} documents")
else:
  print("Vectorstore does not exist. Please run the ingestor script first.")

  embeddings = HuggingFaceEmbeddings(model_name=embedding_model)
  from .autonotebook import tqdm as notebook_tqdm
  vectorstore = Chroma(persist_directory=db_name, embedding_function=embeddings)


Vectorstore loaded with 10001 documents


In [5]:
# create a new Chat with Ollama
from langchain_ollama import ChatOllama
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
MODEL = "llama3.2:latest"
llm = ChatOllama(temperature=0.7, model=MODEL)

# set up the conversation memory for the chat
memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)

# the retriever is an abstraction over the VectorStore that will be used during RAG
retriever = vectorstore.as_retriever()

# putting it together: set up the conversation chain with the Llama3.2 LLM, the vector store and memory
conversation_chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever, memory=memory)

In [6]:
import gradio as gr

def chat(message, history):
    result = conversation_chain.invoke({"question": message})
    return result["answer"]

view = gr.ChatInterface(chat).launch()



* Running on local URL:  http://127.0.0.1:7861

To create a public link, set `share=True` in `launch()`.
