# Leveraging Gemini-Pro for Retrieval Augmented Generation

<table align="left"  width="100%">
  <td style="text-align: center">
    <a href="https://colab.research.google.com/github/guruvittal/codesamples/blob/main/Embeddings_Demo.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/colab-logo-32px.png" alt="Google Colaboratory logo"><br> Run in Colab
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://github.com/guruvittal/codesamples/blob/main/Embeddings_Demo.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/github-logo-32px.png" alt="GitHub logo"><br> View on GitHub
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/workbench/deploy-notebook?download_url=https://raw.githubusercontent.com/guruvittal/codesamples/main/Embeddings_Demo.ipynb">
      <img src="https://lh3.googleusercontent.com/UiNooY4LUgW_oTvpsNhPpQzsstV5W8F7rYgxgGBD85cWJoLmrOzhVs_ksK_vgx40SHs7jCqkTkCk=e14-rj-sc0xffffff-h130-w32" alt="Vertex AI logo"><br> Open in Vertex AI Workbench
    </a>
  </td>
</table>

## Install dependencies

In [None]:
!pip install --upgrade google-cloud-aiplatform


In [None]:

# Install langchain and related libraries
!pip install langchain unstructured[pdf]


In [None]:

# Using Google Cloud Storage Directory loader from langchain
from langchain.document_loaders import GCSDirectoryLoader

In [None]:
# Store docs in local vectorstore as index
!pip install -q chromadb


## Authentication

In [None]:
import sys

if 'google.colab' in sys.modules:
  from google.colab import auth as google_auth
  google_auth.authenticate_user()

## Constants & Helper Functions

In [None]:
# GCP
PROJECT_ID = "Project_id"   # @param {type: "string"}
LOCATION = 'us-central1' # @param {type: "string"}


import vertexai
vertexai.init(project=PROJECT_ID, location=LOCATION)
from vertexai.preview.generative_models import GenerativeModel, Part

def generate():
  model = GenerativeModel("gemini-pro-vision")
  responses = model.generate_content(
    """Answer the question: Who is the killer of John?
Based on the context: John died due to heart attack""",
    generation_config={
        "max_output_tokens": 2048,
        "temperature": 0.9,
        "top_p": 1
    },
    safety_settings=[],
  stream=True,
  )

  for response in responses:
      print(response.text, end="")

print("Calling generate")
generate()

Calling generate
The provided context does not mention anything about a killer, therefore I cannot answer this question.

## Load documents

In [None]:
loader = GCSDirectoryLoader(project_name=PROJECT_ID, bucket="empdocs")
documents = loader.load()
len(documents)

3

In [None]:
# split the documents into chunks
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
docs = text_splitter.split_documents(documents)
print(f"# of documents = {len(docs)}")

# of documents = 15


## Build embeddings for the document corpus

In [None]:
from langchain.embeddings import VertexAIEmbeddings
REQUESTS_PER_MINUTE = 590

embedding = VertexAIEmbeddings(model_name="textembedding-gecko@001",requests_per_minute=REQUESTS_PER_MINUTE)


In [None]:

# Chroma DB as Vector Store Database
from langchain.vectorstores import Chroma

emphandbook_db = Chroma.from_documents(docs, embedding)

## Define the retrieval algorithm and the neighbors needed

In [None]:
# Expose index to the retriever
retriever = emphandbook_db.as_retriever(
    search_type="similarity",
    search_kwargs={"k":6})

## Build the retrieval Q&A chain

In [None]:
from langchain.llms import VertexAI
# Create chain to answer questions
from langchain.chains import RetrievalQA

llm = VertexAI(
    model_name='gemini-pro',
    max_output_tokens=256,
    temperature=0.1,
    top_p=0.8,
    top_k=40,
    verbose=True,
)

# Uses LLM to synthesize results from the search index.
# We use Vertex PaLM Text API for LLM
qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True)


## Test out the retrieval Q&A chain

In [None]:
query = "Think through the steps before you answer this question: How many days of vacation does an employee get"
result = qa({"query": query})
print(result["query"])
print(result["result"])
for i in result["source_documents"]:
  print (i.page_content)
  print (i.metadata["source"])



In [None]:
dir(result.values)