In [1]:
import os
import openai
import langchain
import panel as pn
import param

from langchain.embeddings import OpenAIEmbeddings
from langchain.chat_models import ChatOpenAI


embeddings = OpenAIEmbeddings()
llm_name = "gpt-3.5-turbo"
llm = ChatOpenAI(model_name = llm_name, temperature = 0)

In [None]:
# Text loading, splitting and vectorstore definition

from langchain.document_loaders import Docx2txtLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma

all_texts = []
folder_name = "patents"
for file_name in os.listdir(os.path.join('docs', folder_name)):
    try:
        all_texts += Docx2txtLoader(os.path.join('.', 'docs', folder_name, file_name)).load()
    except:
        print(f"cannot read {os.path.join('.', 'docs', folder_name, file_name)}")
folder_name = "lectures"
for file_name in os.listdir(os.path.join('docs', folder_name)):
    try:
        all_texts += Docx2txtLoader(os.path.join('.', 'docs', folder_name, file_name)).load()
    except:
        print(f"cannot read {os.path.join('.', 'docs', folder_name, file_name)}")
folder_name = "articles"
for file_name in os.listdir(os.path.join('docs', folder_name)):
    try:
        all_texts += Docx2txtLoader(os.path.join('.', 'docs', folder_name, file_name)).load()
    except:
        print(f"cannot read {os.path.join('.', 'docs', folder_name, file_name)}")

text_splitter = RecursiveCharacterTextSplitter(chunk_size = 500, chunk_overlap = 0)
vectorstore = Chroma.from_documents(documents = text_splitter.split_documents(all_texts), 
                                    embedding = embeddings, 
                                    persist_directory = os.path.join(".", "docs", "merged_vectorstore"))

In [None]:
# Vectorstore loading

vectorstore = Chroma(persist_directory=os.path.join(".", "docs", "merged_vectorstore"), embedding_function = embeddings)

In [None]:
# Basic Retriever example

from langchain.chains import RetrievalQA
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain

question = "How many poles shoul my electromotor have, and what should I do if I have the wrong number?"
qa_chain = RetrievalQA.from_chain_type(llm = llm, retriever = vectorstore.as_retriever())
print(qa_chain({"query": question}))

In [None]:
# Retriever with memory example

from langchain.chains import RetrievalQA
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain

memory = ConversationBufferMemory(memory_key = "chat_history", return_messages = True)
retriever = vectorstore.as_retriever()
qa = ConversationalRetrievalChain.from_llm(llm = llm, retriever = retriever, memory = memory)
question = input()
print(f"Question: {question}")
print(f"Answer: {qa({'question': question})['answer']}")
question = input()
print(f"Question: {question}")
print(f"Answer: {qa({'question': question})['answer']}")