In [None]:
!pip install langchain_community langchainhub chromadb langchain langchain-openai

In [None]:
from google.colab import userdata
import os
os.environ['OPENAI_API_KEY'] = userdata.get('openAIYtKey')

In [None]:
from langchain_community.document_loaders import WebBaseLoader

loader = WebBaseLoader(web_paths=["https://www.educosys.com/course/genai"])

docs = loader.load()
print(docs)

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap = 200)
splits = text_splitter.split_documents(docs)

In [None]:
print(splits[0])
print(splits[1])
print(splits[2])

In [None]:
print(len(splits))

In [None]:
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma

vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())

In [None]:
print(vectorstore._collection.count())

In [None]:
print(vectorstore._collection.get())

In [None]:
print("\nCollection 1 - ", vectorstore._collection.get(ids=['28651d9a-ab51-41f8-ab83-e68285623c4e'], include=["embeddings", "documents"]))
print("\nCollection 2 - ", vectorstore._collection.get(ids=['054dee19-19ed-4574-bc51-511060fd707a'], include=["embeddings", "documents"]))
print("\nCollection 3 - ", vectorstore._collection.get(ids=['2fd71cb4-835a-43c5-b920-b7e1be51c450'], include=["embeddings", "documents"]))

In [None]:
retriever = vectorstore.as_retriever()

In [None]:
from langchain import hub
prompt = hub.pull("rlm/rag-prompt")

In [None]:
from langchain_openai import ChatOpenAI
llm = ChatOpenAI()

In [None]:
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

In [None]:
def format_docs(docs):
  return "\n".join(doc.page_content for doc in docs)

In [None]:
rag_chain = ({"context" : retriever | format_docs, "question" : RunnablePassthrough()}
             | prompt
             | llm
             | StrOutputParser())

In [None]:
rag_chain.invoke("Are the recordings of the course available? For how long?")

In [None]:
rag_chain.invoke("Are the testimonials for the course available? Name the studenst who have shared testimonials")

In [None]:
rag_chain.invoke("Are the certificates for the course provided?")

In [None]:
rag_chain.invoke("What all projects are covered in the course?")

In [None]:
from langchain_core.runnables import RunnableLambda

In [None]:
def print_prompt(prompt_text):
  print("Prompt - ", prompt_text)
  return prompt_text

In [None]:
rag_chain_with_print = ({"context" : retriever | format_docs, "question" : RunnablePassthrough()}
             | prompt
             | RunnableLambda(print_prompt)
             | llm
             | StrOutputParser())

In [None]:
rag_chain_with_print.invoke("What all projects are covered in the course?")