In [None]:
# Notebook Kernel required: Python 3, ipykernel
# !pip install --upgrade pip
# !pip install ipykernel
# !pip install trulens_eval llama_index html2text
# !pip install -r requirements.txt
!pip install llama-index llama-hub

In [None]:
# Use Azure OpenAI API
import os

# load environment variables from .env file
from dotenv import load_dotenv
load_dotenv()

### Azure OpenAI API key
AZURE_OPENAI_KEY = os.environ.get("AZURE_OPENAI_KEY")
AZURE_OPENAI_DEPLOYMENT = os.environ.get("AZURE_OPENAI_DEPLOYMENT")
AZURE_OPENAI_DEPLOYMENT_EMBEDDING = os.environ.get("AZURE_OPENAI_DEPLOYMENT_EMBEDDING")
AZURE_OPENAI_ENDPOINT = os.environ.get("AZURE_OPENAI_ENDPOINT")
AZURE_OPENAI_API_VERSION = os.environ.get("AZURE_OPENAI_API_VERSION")

def check_env_var(var_name):
    if var_name is None:
        print(f"Please set {var_name} environment variable.")
        exit(1)

check_env_var(AZURE_OPENAI_KEY)
check_env_var(AZURE_OPENAI_DEPLOYMENT)
check_env_var(AZURE_OPENAI_DEPLOYMENT_EMBEDDING)
check_env_var(AZURE_OPENAI_ENDPOINT)
check_env_var(AZURE_OPENAI_API_VERSION)

azure_oai_endpoint = f"https://{AZURE_OPENAI_ENDPOINT}/openai/deployments/{AZURE_OPENAI_DEPLOYMENT}/chat/completions?api-version={AZURE_OPENAI_API_VERSION}"

print(azure_oai_endpoint)

In [7]:
## Setup: Prepare the data
!mkdir -p data

!wget --no-clobber --user-agent "Mozilla" "https://arxiv.org/pdf/2307.09288.pdf" -O "data/llama2.pdf"

File ‘data/llama2.pdf’ already there; not retrieving.


In [8]:
## Create Simple RAG Application, setup loader and document objects

from pathlib import Path
from llama_hub.file.pymu_pdf.base import PyMuPDFReader
from llama_index import VectorStoreIndex
from llama_index.readers.web import SimpleWebPageReader
from llama_index import VectorStoreIndex, ServiceContext
from llama_index.node_parser import SentenceSplitter
from llama_index.llms import OpenAI

loader = PyMuPDFReader()
documents = loader.load(file_path="./data/llama2.pdf")

In [20]:
# use Azure OpenAI as llm
from llama_index.llms import AzureOpenAI

llm = AzureOpenAI(
    deployment=AZURE_OPENAI_DEPLOYMENT,
    api_key=AZURE_OPENAI_KEY,
    azure_endpoint=f"https://{AZURE_OPENAI_ENDPOINT}",
    api_version=AZURE_OPENAI_API_VERSION
)

node_parser = SentenceSplitter(chunk_size=1024)
service_context = ServiceContext.from_defaults(llm=llm)

nodes = node_parser.get_nodes_from_documents(documents)

## Setup Index from Nodes VectorStoreIndex
## index time varies depending on the number of nodes / documents
## For this simple PDF, takes around 10~15 seconds
index = VectorStoreIndex(nodes, service_context=service_context)

## Setup Query Engine from Index
query_engine = index.as_query_engine()



In [21]:
## Send first query to verify the RAG is working
query = "What is the purpose of this paper?"
print(query_engine.query(query))

The purpose of this paper is to explore the AI ethics of ChatGPT through a diagnostic analysis.


In [None]:
## Now you can use above query_engine to query any question

In [55]:
### Advanced: Customized RAG Application

from llama_index import (
    VectorStoreIndex,
    get_response_synthesizer,
)
from llama_index.retrievers import VectorIndexRetriever
from llama_index.query_engine import RetrieverQueryEngine
from llama_index.postprocessor import SimilarityPostprocessor

# reuse index or
# index = VectorStoreIndex(nodes, service_context=service_context)

# configure retriever
retriever = VectorIndexRetriever(
    index=index,
    similarity_top_k=10,
)

# configure response synthesizer
response_synthesizer = get_response_synthesizer()

# assemble query engine
query_engine = RetrieverQueryEngine(
    retriever=retriever,
    response_synthesizer=response_synthesizer,
    node_postprocessors=[SimilarityPostprocessor(similarity_cutoff=0.7)],
)


In [56]:
# query
response = query_engine.query("What is the purpose of this paper?")
print(response)



The purpose of this paper is to introduce Llama 2, a new family of pretrained and fine-tuned models with scales of 7 billion to 70 billion parameters. The paper discusses the methods and techniques used to develop these models and emphasizes their alignment with the principles of helpfulness and safety. The authors also highlight the competitiveness of Llama 2 models with existing open-source chat models and their commitment to transparency and ongoing improvements.
