<div class="alert alert-block alert-info">

# RAG with LlamaIndex

This notebook we will explorer different retrievers within the LlamaIndex framework.


In [50]:
# install dependencies
%pip install --force-reinstall -r requirements.txt

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
[0mProcessing ./ragas-aws-1.0.tar.gz
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Installing backend dependencies ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
[?25hCollecting awscli==1.32.19 (from -r requirements.txt (line 1))
  Downloading awscli-1.32.19-py3-none-any.whl.metadata (11 kB)
Collecting boto3==1.34.19 (from -r requirements.txt (line 2))
  Downloading boto3-1.34.19-py3-none-any.whl.metadata (6.6 kB)
Collecting botocore==1.34.19 (from -r requirements.txt (line 3))
  Downloading botocore-1.34.19-py3-none-any.whl.metadata (5.6 kB)
Collecting langchain==0.1.0 (from -r requirements.txt (line 4))
  Downl

In [52]:
# restart kernel to ensure proper version of libraries is loaded
from IPython.display import display_html
def restartkernel() :
    display_html("<script>Jupyter.notebook.kernel.restart()</script>",raw=True)
restartkernel()

In [54]:
# load environment variables 
import boto3
import os
import botocore
from botocore.config import Config
import langchain
import sagemaker
import pandas as pd

from langchain.llms.bedrock import Bedrock
from langchain.llms import SagemakerEndpoint
from langchain.llms.sagemaker_endpoint import LLMContentHandler
from typing import Dict

import json
import requests
import csv
import time
import pandas as pd
import nltk
import sys

from langchain.llms import Bedrock
from dotenv import load_dotenv, find_dotenv
from mlflow import MlflowClient

# loading environment variables that are stored in local file dev.env
load_dotenv(find_dotenv('dev-mlflow.env'),override=True)



os.environ['OPENSEARCH_COLLECTION'] = os.getenv('OPENSEARCH_COLLECTION')
os.environ['AWS_ACCESS_KEY'] = os.getenv('AWS_ACCESS_KEY')
os.environ['AWS_SECRET_TOKEN'] = os.getenv('AWS_SECRET_TOKEN')
os.environ['REGION'] = os.getenv('REGION')
os.environ['MLFLOW_TRACKING_URI'] = os.getenv('MLFLOW_TRACKING_URI')



# Initialize mlflow client
mlflow_client = MlflowClient(tracking_uri=os.environ['MLFLOW_TRACKING_URI'])

# Initialize Bedrock runtime
config = Config(
   retries = {
      'max_attempts': 8
   }
)
bedrock_runtime = boto3.client(
        service_name="bedrock-runtime",
        config=config
)

# Initialize sagemaker session
session = sagemaker.Session()
bucket = session.default_bucket()

In [57]:
## 2. Download ground truth dataset

import xmltodict
url = 'https://d3q8adh3y5sxpk.cloudfront.net/rageval/qsdata_20.xml'

# Send an HTTP GET request to download the file
response = requests.get(url)

# Check if the request was successful (HTTP status code 200)
if response.status_code == 200:        
    xml_data = xmltodict.parse(response.text)

# Convert the dictionary to a Pandas DataFrame
qa_dataset = pd.DataFrame(xml_data['data']['records'])

prompts = []
for row in qa_dataset.itertuples():
    item = {
        'prompt': str(row[1]['Question']),
        'context': str(row[1]['Context']),
        'output': str(row[1]['Answer']['question_answer']),
        'page': str(row[1]['Page'])
    }
    prompts.append(item)

# example prompt
print(prompts[0])

{'prompt': "Who is Amazon's Senior Vice President and General Counsel?", 'context': 'Available Information\nOur investor relations website is amazon.com/ir and we encourage investors to use it as a way of easily finding information about us. We promptly make available on this website, free of charge, the reports that we file or furnish with the Securities and Exchange Commission (â\x80\x9cSECâ\x80\x9d), corporate governance information (including our Code of Business Conduct and Ethics), and select press releases.\nExecutive Officers and Directors\nThe following tables set forth certain information regarding our Executive Officers and Directors as of January 25, 2023:\nInformation About Our Executive Officers\nName Age Position\nJeffrey P. Bezos. Mr. Bezos founded Amazon.com in 1994 and has served as Executive Chair since July 2021. He has served as Chair of the Board since 1994 and served as Chief Executive Officer from May 1996 until July 2021, and as President from 1994 until June 1

In [None]:
# Just testing LLAMA_INDEX

## load data
!mkdir -p ./data

from urllib.request import urlretrieve
urls = [
    'https://d3q8adh3y5sxpk.cloudfront.net/rageval/AMZN-2023-10k.pdf',
]

filenames = [
    'AMZN-2023-10k.pdf',
]

data_root = "./data/"

for idx, url in enumerate(urls):
    file_path = data_root + filenames[idx]
    urlretrieve(url, file_path)

In [None]:
from llama_index import (
    SimpleDirectoryReader,
    LLMPredictor,
    ServiceContext,
    get_response_synthesizer,
    set_global_service_context
)
from llama_index.indices.document_summary import DocumentSummaryIndex
import nest_asyncio

nest_asyncio.apply()


In [None]:
from llama_index.llms import Bedrock
from llama_index.embeddings import BedrockEmbedding

model_kwargs_claude = {
    "temperature": 0,
    "top_k": 10,
    "max_tokens_to_sample": 512
}

llm = Bedrock(model="anthropic.claude-v2",
              #context_size=512,
              temperature=0,
              additional_kwargs={'max_tokens_to_sample': 512,'top_k': 10})

embed_model = BedrockEmbedding().from_credentials(
    model_name='amazon.titan-embed-g1-text-02'
)

service_context = ServiceContext.from_defaults(llm=llm, 
                                               embed_model=embed_model, 
                                               chunk_size=512)
chunk_overlap = 20
chunk_size = 512
service_context = ServiceContext.from_defaults(llm=llm, 
                                               embed_model=embed_model, 
                                               chunk_size=chunk_size,
                                               chunk_overlap=chunk_overlap,
                                            )
set_global_service_context(service_context)



In [None]:
filename_fn = lambda filename: {"file_path": filename, "file_name": filename.replace('data/', "").replace('.pdf', "")}

# automatically sets the metadata of each document according to filename_fn
documents = SimpleDirectoryReader(
    "./data", file_metadata=filename_fn
).load_data()

In [None]:
#review metadata
print(documents[50].metadata)

In [None]:
from llama_index import SimpleDirectoryReader
from llama_index.vector_stores import (
    OpensearchVectorStore,
    OpensearchVectorClient,
)
from llama_index import VectorStoreIndex, StorageContext

In [None]:
from opensearchpy import OpenSearch, RequestsHttpConnection, AWSV4SignerAuth

host = os.environ['OPENSEARCH_COLLECTION'] # OpenSearch endpoint, for example: my-test-domain.us-east-1.aoss.amazonaws.com
service = 'aoss'
region = 'us-east-1'
credentials = boto3.Session().get_credentials()
auth = AWSV4SignerAuth(credentials, region, service)

endpoint = 'https://' + os.environ['OPENSEARCH_COLLECTION']
print(f'endpoint: {endpoint}')
index_name = "rag-eval-v1"
# OpensearchVectorClient stores text in this field by default
text_field = "content"
# OpensearchVectorClient stores embeddings in this field by default
embedding_field = "embedding"

client = OpensearchVectorClient(
    endpoint=endpoint,
    index=index_name, 
    dim=1536, 
    embedding_field=embedding_field, 
    text_field=text_field,
    http_auth=auth, 
    use_ssl=True, 
    verify_certs=True, 
    connection_class=RequestsHttpConnection, 
    timeout=10,
)
print(client)

In [None]:
# initialize vector store
vector_store = OpensearchVectorStore(client)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
# initialize an index using our sample data and the client we just created
index = VectorStoreIndex.from_documents(
    documents=documents, storage_context=storage_context
)

In [None]:
# run query
query_engine = index.as_query_engine()
res = query_engine.query("Who is Amazon's Senior Vice President and General Counsel?")
res.response

In [None]:
# query with filtering - NOT WORKING ATM
from llama_index import Document
from llama_index.vector_stores.types import MetadataFilters, ExactMatchFilter, MetadataFilter,FilterOperator
import regex as re

# Create a query engine that only searches certain documents.
metadata_query_engine = index.as_query_engine(
    filters=MetadataFilters(
        filters=[
            ExactMatchFilter(
                key="term", value='{"file_path": "data/AMZN-2023-10k.pdf"}'
            )
            #ExactMatchFilter(key="file_name", value="AMZN-2023-10k")
            
        ]
    )
)

res = metadata_query_engine.query(
    "who is Amazon's Senior Vice President and General Counsel?"
)
res.response