In [1]:
import os
from dotenv import load_dotenv

load_dotenv()

os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")

In [2]:
from langchain import hub
from langchain_community.document_loaders import WebBaseLoader, SeleniumURLLoader
from langchain_community.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.schema import AIMessage, HumanMessage, SystemMessage
from langchain.prompts import PromptTemplate
import chromadb
import chromadb.utils.embedding_functions as embedding_functions

In [3]:
website = "https://hackernoon.com/vector-databases-getting-started-with-chromadb-and-more"

def load_document(loader_class, website_url):
    loader = loader_class([website_url])
    return loader.load()

#wb_loader_doc = load_document(WebBaseLoader, website)
#wb_loader_doc[0].page_content


In [4]:
#selenium_loader_doc = load_document(SeleniumURLLoader, website)
#selenium_loader_doc[0].page_content

In [5]:
#text_splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=200)
#splits = text_splitter.split_documents(selenium_loader_doc)
#splits[2]

In [6]:
#client = chromadb.PersistentClient(path="/src/database/chroma_data")

#client = chromadb.HttpClient(host="localhost", port=8000)
#collection = client.create_collection("open2_collection", embedding_function=embedding_functions.OpenAIEmbeddingFunction(api_key=os.environ["OPENAI_API_KEY"]))

In [7]:
#collection = client.get_collection("open2_collection")
#for n in range(0, 10):
#   collection.add(documents=[splits[n].page_content], ids=['id' + str(n)])

In [8]:
prompt = PromptTemplate.from_template(
    """
    Answer the question based only on the following context:
    Context: {context} 
    Question: {question} 
    """
)

In [9]:
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)

In [10]:
from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings

vectorstore = Chroma.from_texts(["PA is working at SAP", 
                                 "PA also participates in a non-profit project", 
                                 "PA's fav pokemon is Snorlax",
                                 "PA likes tacos"], embedding=OpenAIEmbeddings())
#vectorstore.persist()
retriever = vectorstore.as_retriever()

In [11]:
rag_chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [12]:
response = rag_chain.invoke("What can you tell me about PA?")
response

'Based on the context provided, we can tell that PA is involved in a non-profit project, works at SAP, has Snorlax as their favorite Pokemon, and likes tacos.'