# LlamaIndex Integration
##### LlamaIndex is a data framework aimed at helping developers build LLM applications by providing essential tools that facilitate data ingestion, structuring, retrieval, and integration with various application frameworks. This example shows indexing and querying with LlamaIndex and requires the following packages: llama_index>=0.9.30, langchain>=0.0.257

In [None]:
!pip3 install -q llama_index langchain

### 1. Load documents from 'data_folder'

In [None]:
from llama_index import SimpleDirectoryReader
documents = SimpleDirectoryReader('data_folder').load_data()

### 2. Create a 'ServiceContext' using Anyscale support on LlamaIndex

In [None]:
from llama_index import ServiceContext, VectorStoreIndex
from llama_index.llms import Anyscale
from llama_index.embeddings import AnyscaleEmbedding

# Set global tokenizer
from llama_index import set_global_tokenizer
from transformers import LlamaTokenizerFast
set_global_tokenizer(
    LlamaTokenizerFast.from_pretrained("hf-internal-testing/llama-tokenizer").encode
)

ANYSCALE_ENDPOINT_TOKEN = "YOUR_ANYSCALE_TOKEN"
service_context = ServiceContext.from_defaults(
    llm=Anyscale(model = "meta-llama/Llama-2-70b-chat-hf",
                 api_key=ANYSCALE_ENDPOINT_TOKEN),
    embed_model=AnyscaleEmbedding(model="thenlper/gte-large",
                                  api_key=ANYSCALE_ENDPOINT_TOKEN),
    chunk_size=512
)

### 3. Alternatively, you can build a similar LLM for the ServiceContext using 'ChatAnyscale' from LangChain

In [None]:
from langchain.chat_models import ChatAnyscale

service_context = ServiceContext.from_defaults(
    llm=ChatAnyscale(
        anyscale_api_key=ANYSCALE_ENDPOINT_TOKEN,
        model_name="meta-llama/Llama-2-70b-chat-hf"),
    embed_model=AnyscaleEmbedding(
        model="thenlper/gte-large",
        api_key=ANYSCALE_ENDPOINT_TOKEN),
    chunk_size=512
)

### 4. Create the index for documents with 'VectorStoreIndex' and query them

In [None]:
index = VectorStoreIndex.from_documents(documents, service_context=service_context)
query_engine = index.as_query_engine()
que = "Sample Query Texts"
response = query_engine.query(que)
print(response)

### 5. Run relevance evaluator

In [None]:
# Run following code in a python file to avoid Jupyter runtime error
from llama_index.evaluation import RelevancyEvaluator
evaluator_gpt4 = RelevancyEvaluator(service_context=service_context)
eval_result = evaluator_gpt4.evaluate_response( 
        query=que, response=response)