##### Copyright 2024 Google LLC.

In [1]:
# @title Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Gemini API: RAG Evaluation with LlamaIndex
This notebook demonstrates how to evaluate Retrieval-Augmented Generation (RAG) with LlamaIndex. You will learn how to:

1. Read the data
2. Create a vector store with Gemini embeddings
3. Run a query engine with Gemini models
4. Evaluate results using LlamaIndex components

<table class="tfo-notebook-buttons" align="left">
  <td>
    <a target="_blank" href="https://colab.research.google.com/github/google-gemini/cookbook/blob/main/examples/Evaluate_RAG_with_LlamaIndex.ipynb"><img src = "https://www.tensorflow.org/images/colab_logo_32px.png"/>Run in Google Colab</a>
  </td>
</table>

In [2]:
!pip install -q llama-index
!pip install -q llama-index-multi-modal-llms-gemini
!pip install -q llama-index-embeddings-gemini

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m15.5/15.5 MB[0m [31m42.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.2/1.2 MB[0m [31m40.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m169.4/169.4 kB[0m [31m9.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m75.6/75.6 kB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.9/77.9 kB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.5/1.5 MB[0m [31m29.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m361.3/361.3 kB[0m [31m18.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m295.8/295.8 kB[0m [31m15.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [3]:
# Import core components
from llama_index.core import VectorStoreIndex, StorageContext, Settings
from llama_index.core import SimpleDirectoryReader, SimpleKeywordTableIndex

# Import components for parsing and evaluating nodes
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core.evaluation import RelevancyEvaluator

# Import embeddings and gemini models from llama_index
from llama_index.embeddings.gemini import GeminiEmbedding
from llama_index.llms.gemini import Gemini

# Import standard libraries
import os
import pandas as pd
from IPython.display import Markdown

# Apply nest_asyncio to enable nesting of async functions in an already running async loop
import nest_asyncio
nest_asyncio.apply()

## Configure your API key

To run the following cell, your API key must be stored in a Colab Secret named `GOOGLE_API_KEY`. If you don't already have an API key, or you're not sure how to create a Colab Secret, see [Authentication](https://github.com/google-gemini/cookbook/blob/main/quickstarts/Authentication.ipynb) for an example.


In [5]:
from google.colab import userdata
GOOGLE_API_KEY=userdata.get('GOOGLE_API_KEY')

os.environ["GOOGLE_API_KEY"] = GOOGLE_API_KEY

# Define Model and Embeddings

In [6]:
llm = Gemini(model="models/gemini-1.5-flash-latest")

In [7]:
Settings.embed_model = GeminiEmbedding(
    model_name="models/text-embedding-004", api_key=GOOGLE_API_KEY
)
Settings.llm = llm

# Data preparation
Responses to queries about the USA Constitution will be evaluated. Start by downloading and reading the data.

In [8]:
!mkdir -p 'data/constitution/'
!curl 'https://www.gutenberg.org/cache/epub/5/pg5.txt' -o 'data/constitution/usa_constitution.txt'

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 48557  100 48557    0     0   131k      0 --:--:-- --:--:-- --:--:--  132k


In [9]:
documents = SimpleDirectoryReader('./data/constitution/').load_data()

Break documents into nodes and store them.

In [10]:
node_parser = SentenceSplitter.from_defaults(chunk_size=200, chunk_overlap=75)
nodes = node_parser.get_nodes_from_documents(documents)

In [11]:
storage_context = StorageContext.from_defaults()
storage_context.docstore.add_documents(nodes)

Create vector store and define query engine.

In [12]:
vector_index = VectorStoreIndex(nodes, storage_context=storage_context)

In [13]:
query_engine = vector_index.as_query_engine()

## RAG Evaluation

Set up the evaluation pipeline.

In [14]:
eval_questions = [
    'In what situations can impeachment occur?',
    'From what age can someone become a Representative?']

In [15]:
evaluator = RelevancyEvaluator(llm=llm)

In [16]:
# define jupyter display function
def display_eval_df(query: str, response: str, eval_result: str) -> None:
    eval_df = pd.DataFrame(
        {
            "Query": [query],
            "Response": [response.response],
            "Source": "\n".join(source_node.node.get_content() for source_node in response.source_nodes),
            "Evaluation Result": [eval_result.feedback],
        },
        index=[0],
    )
    eval_df = eval_df.style.set_properties(
        **{
            "inline-size": "600px",
            "overflow-wrap": "break-word",
            "vertical-align": "top"
        }
    )
    display(eval_df)

Test your evaluation pipeline with `eval_questions`. You will see a query, the response generated by the LLM, the source from which the LLM derived its response, and an evaluation decision indicating the accuracy and appropriateness of the response (YES/NO).

In [17]:
for question in eval_questions:
    # Query the engine for the current question
    llm_response = query_engine.query(question)

    # Evaluate the response for the current question
    eval_result = evaluator.evaluate_response(query=question, response=llm_response)

    # Display the results for the current question
    display_eval_df(question, llm_response, eval_result)


Unnamed: 0,Query,Response,Source,Evaluation Result
0,In what situations can impeachment occur?,"Impeachment can occur for treason, bribery, or other high crimes and misdemeanors.","The Senate shall choose their other Officers, and also a President pro tempore, in the Absence of the Vice-President, or when he shall exercise the Office of President of the United States. The Senate shall have the sole Power to try all Impeachments. When sitting for that Purpose, they shall be on Oath or Affirmation. When the President of the United States is tried, the Chief Justice shall preside: And no Person shall be convicted without the Concurrence of two thirds of the Members present. Judgment in cases of Impeachment shall not extend further than to removal from Office, and disqualification to hold and enjoy any Office of honor, Trust or Profit under the United States: but the Party convicted shall nevertheless be liable and subject to Indictment, Trial, Judgment and Punishment, according to Law. Section 4. Section 4. The President, Vice President and all civil Officers of the United States, shall be removed from Office on Impeachment for, and Conviction of, Treason, Bribery, or other high Crimes and Misdemeanors. ARTICLE THREE Section 1. The judicial Power of the United States, shall be vested in one supreme Court, and in such inferior Courts as the Congress may from time to time ordain and establish. The Judges, both of the supreme and inferior Courts, shall hold their Offices during good behavior, and shall, at stated Times, receive for their Services, a Compensation, which shall not be diminished during their Continuance in Office. Section 2.",YES


Unnamed: 0,Query,Response,Source,Evaluation Result
0,From what age can someone become a Representative?,Someone must be at least 25 years old to become a Representative.,"No Person shall be a Representative who shall not have attained to the Age of twenty five Years, and been seven Years a citizen of the United States, and who shall not, when elected, be an Inhabitant of that State in which he shall be chosen. Representatives and direct Taxes shall be apportioned among the several States which may be included within this Union, according to their respective Numbers, which shall be determined by adding to the whole number of free Persons, including those bound to Service for a Term of Years, and excluding Indians not taxed, three fifths of all other Persons. The actual Enumeration shall be made within three Years after the first Meeting of the Congress of the United States, and within every subsequent Term of ten Years, in such Manner as they shall by law Direct. Article 1 Section 1. All legislative Powers herein granted shall be vested in a Congress of the United States, which shall consist of a Senate and House of Representatives. Section 2. The House of Representatives shall be composed of Members chosen every second Year by the People of the several States, and the electors in each State shall have the qualifications requisite for electors of the most numerous branch of the State legislature. No Person shall be a Representative who shall not have attained to the Age of twenty five Years, and been seven Years a citizen of the United States, and who shall not, when elected, be an Inhabitant of that State in which he shall be chosen.",YES


## Summary
This notebook shows how to integrate the Gemini API with LlamaIndex to create a Retrieval-Augmented Generation (RAG) pipeline with built-in evaluation.

For further exploration, please check LlamaIndex [documentation](https://docs.llamaindex.ai/en/latest/) to see more functions that you might want to try with Gemini API.