<a href="https://colab.research.google.com/github/ak2742/mlplay/blob/Fine-Tuning/09)_Web_data_RAG_with_gemini_pro.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#@title Install libraries

!pip install -q langchain==0.1.2
!pip install -q google-generativeai langchain-google-genai

# Python framework for state-of-the-art sentence, text and image embeddings.
!pip install -q sentence-transformers

# FAISS Vector Databses specific Libraries
!pip install -q faiss-gpu

# to load webpages
!pip install -q beautifulsoup4

## **Get Gemini Key from Secrets**
Set GEMINI_KEY secret key at Google Colab and get that here to runn Google Gemini LLM. You can get Google Gemini Key from following link https://makersuite.google.com/app/apikey

In [None]:
from google.colab import userdata
from langchain_google_genai import ChatGoogleGenerativeAI

GOOGLE_API_KEY = userdata.get('GEMINI_KEY')

llm = ChatGoogleGenerativeAI(model="gemini-pro", google_api_key=GOOGLE_API_KEY, convert_system_message_to_human=True)

In [None]:
#@title load web page

from langchain_community.document_loaders import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

loader = WebBaseLoader("https://docs.smith.langchain.com/overview")
docs = loader.load()

# Split the documents into smaller chunks
text_splitter = RecursiveCharacterTextSplitter()
documents = text_splitter.split_documents(docs)


In [None]:
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_core.prompts import ChatPromptTemplate
from langchain_community.vectorstores import FAISS
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain, ConversationalRetrievalChain

# Using HuggingFace embeddings
embeddings = HuggingFaceEmbeddings()

# Create a FAISS index
vector = FAISS.from_documents(documents, HuggingFaceEmbeddings(model_name='sentence-transformers/all-mpnet-base-v2'))


In [None]:
#@title Create Chains

prompt = ChatPromptTemplate.from_template("""Answer the following question based only on the provided context:

<context>
{context}
</context>

Question: {input}""")

document_chain = create_stuff_documents_chain(llm, prompt)

# Connect query to FAISS index using a retriever
retriever = vector.as_retriever()

retrieval_chain = create_retrieval_chain(retriever, document_chain)

# Create the Conversational Retrieval Chain
qa_chain = ConversationalRetrievalChain.from_llm(llm, retriever, return_source_documents=True)

In [None]:
#@title Run Chains

from langchain_core.documents import Document

document_chain.invoke({
    "input": "how can langsmith help with testing?",
    "context": [Document(page_content="langsmith can let you visualize test results")]
})

In [None]:
response = retrieval_chain.invoke({"input": "how can langsmith help with testing?"})
print(response["answer"])

In [None]:
import sys
chat_history = []
while True:
    query = input('Prompt: ')
    result = qa_chain({'question': query, 'chat_history': chat_history})
    print('Answer: ' + result['answer'] + '\n')
    chat_history.append((query, result['answer']))