In [None]:
! pip install langchain_google_genai langchain_community chromadb 

In [None]:
import warnings 
import os
from google.colab import userdata
from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
from langchain_community.document_loaders import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain.prompts import ChatPromptTemplate


In [None]:
# Ignore all warnings 
warnings.filterwarnings("ignore")

GOOGLE_API_KEY = userdata.get("GOOGLE_API_KEY")
os.environ["GOOGLE_API_KEY"] = GOOGLE_API_KEY

In [None]:
# Create a ChatGoogleGenerativeAI object and convert system messages to human-readable format
llm = ChatGoogleGenerativeAI(model="gemini-pro", convert_system_message_to_human=True)

# Create a GoogleGenerativeAIEmbeddings object for embedding our prompt and documents
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

In [None]:
loader = WebBaseLoader("https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/")

# # load the blog post 
docs = loader.load()

In [None]:
print(docs[0])

In [None]:
# Chunk size and overlap
text_splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=50)

# Store the resulting splits in the 'splits' variable
splits = text_splitter.split_documents(docs)

In [None]:
# Passing the HYDE embeddings to create and store embeddings 
vector_store = Chroma.from_documents(
    documents=splits,
    collection_name="my_collection",
    embedding=embeddings
)

# Creating retriever 
retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k":4})

In [None]:
template = """

For the given question try to generate a hypothetical answer\
Only generate the answer and nothing else:
Question: {question}

"""

In [None]:
prompt = ChatPromptTemplate.from_template(template=template)
query = prompt.format(question="What are different chain of thought(CoT) Prompting?")

In [None]:
hypothetical_answer = llm.invoke(query).content
print(hypothetical_answer)

In [None]:
# Retrieval wiht hypothetical answer/document
similar_docs_1 = retriever.get_relevant_documents(hypothetical_answer)

In [None]:
for doc in similar_docs_1:
    print(doc.page_content)
    print()

In [None]:
question="What are different chain of thought(CoT) Prompting?"

In [None]:
similar_docs_2 = retriever.get_relevant_documents(question)

for doc in similar_docs_2:
    print(doc.page_content)
    print()

In [None]:
template_1 = """

Answer the following question in detailed based on this context:
{context}
Question: {question}

"""

prompt_1 = ChatPromptTemplate(template_1)

In [None]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [None]:
formatted_docs_1 = format_docs(similar_docs_1)

In [None]:
formatted_docs_2 = format_docs(similar_docs_2)

In [None]:
query_prompt_1 = prompt.format(context=formatted_docs_1,
                               question=question)

print(query_prompt_1)

In [None]:
query_prompt_2 = prompt.format(context=formatted_docs_2,
                               question=question)

print(query_prompt_2)

In [None]:
response_1 = llm.invoke(query_prompt_1)

In [None]:
print(response_1.content)

In [None]:
response_2 = llm.invoke(query_prompt_2)

In [None]:
print(response_2.content)