In [None]:
import os
import glob
import gradio as gr

In [None]:
from langchain_community.document_loaders import DirectoryLoader, TextLoader
from langchain_text_splitters import CharacterTextSplitter
# from langchain_core.documents import Document

# ** HuggingFaceEmbeddings requires sentence-transformers package
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_chroma import Chroma
from langchain_classic.memory import ConversationBufferMemory
from langchain_classic.chains import ConversationalRetrievalChain
from langchain_ollama import ChatOllama

In [None]:
# Read in documents using LangChain's loaders
# Take everything in all the sub-folders of our knowledgebase

folders = glob.glob("knowledge-base/*")

text_loader_kwargs = {'encoding': 'utf-8'}

documents = []
for folder in folders:
    doc_type = os.path.basename(folder)
    loader = DirectoryLoader(folder, glob="**/*.md", loader_cls=TextLoader,
    loader_kwargs=text_loader_kwargs, use_multithreading=True)
    folder_docs = loader.load()
    for doc in folder_docs:
        doc.metadata["doc_type"] = doc_type
        documents.append(doc)

In [None]:
text_splitter = CharacterTextSplitter(chunk_size = 1000, chunk_overlap=200)
chunks = text_splitter.split_documents(documents)

In [None]:
# Put the chunks of data into a Vector Store that associates a Vector Embedding with each chunk

embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

In [None]:
# Check if a Chroma Datastore already exists - if so, delete the collection to start from scratch

db_name = 'vector_db'

if os.path.exists(db_name):
    Chroma(persist_directory=db_name, embedding_function=embeddings).delete_collection()

In [None]:
# Create our Chroma vectorstore!

vectorstore = Chroma.from_documents(documents=chunks, embedding=embeddings, persist_directory=db_name)
print(f'Vectorstore created with {vectorstore._collection.count()} documents')

In [None]:
# # create a new Chat with OpenAI
# llm = ChatOllama(temperature=0.7, model='gpt-oss:120b-cloud')

# # set up the conversation memory for the chat
# memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)

# # the retriever is an abstraction over the VectorStore that will be used during RAG
# retriever = vectorstore.as_retriever()

# # putting it together: set up the conversation chain with the LLM, the vector store and memory
# conversation_chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever, memory=memory)

In [None]:
# Wrapping that in a function

# def chat(message, history):
#     result = conversation_chain.invoke({"question": message})
#     return result["answer"]

In [None]:
# gr.ChatInterface(fn=chat).launch()

# The LLM cant give the name of the winner of the 2023 IIOTY award winner. Let's investigate waht gets sent behind the scenes to see how we may solve this problem

In [None]:
from langchain_core.callbacks import StdOutCallbackHandler

llm = ChatOllama(temperature=0.7, model='gpt-oss:120b-cloud')
memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)
# k is how many chnks to use when using retriever, we will take more chunks to provide better contexts for the LLM answer
retriever = vectorstore.as_retriever(search_kwargs = {"k":25})
conversation_chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever, memory=memory, callbacks=[StdOutCallbackHandler()])

# query = "Who received the IIOTY award in 2023?"
# result = conversation_chain.invoke({"question":query})
# answer = result["answer"]
# print("\nAnswer:", answer)

In [None]:
def chat(message, history):
    result = conversation_chain.invoke({"question": message})
    return result["answer"]

In [None]:
gr.ChatInterface(fn=chat).launch()