## Advanced RAG Pipeline

In [None]:
# Read Document
from llama_index.core import SimpleDirectoryReader
documents = SimpleDirectoryReader(
    input_files=["How-to-Build-a-Career-in-AI.pdf"]
).load_data()

In [None]:
# Create one big document
from llama_index.core import Document
document = Document(text="\n\n".join([doc.text for doc in documents]))

In [4]:
# Load Azure credentials

from llama_index.llms.azure_openai import AzureOpenAI
from llama_index.embeddings.azure_openai import AzureOpenAIEmbedding
from dotenv import load_dotenv, find_dotenv
import os

# Load dotenv
load_dotenv(find_dotenv())

# Parameters
azure_endpoint = os.getenv('OPENAI_ENDPOINT')
api_key = os.getenv('OPENAI_API_KEY')
api_version = "2023-09-15-preview"
llm_model = 'gpt-35-turbo-jdrios'
emb_model = 'text-embedding-ada-002-jdrios'

# Models
llm = AzureOpenAI(
    model="gpt-35-turbo",
    deployment_name=llm_model,
    api_key=api_key,
    azure_endpoint=azure_endpoint,
    api_version=api_version,
)

embed_model = AzureOpenAIEmbedding(
    model="text-embedding-ada-002",
    deployment_name=emb_model,
    api_key=api_key,
    azure_endpoint=azure_endpoint,
    api_version=api_version,
)

In [None]:
from llama_index.core import VectorStoreIndex, ServiceContext, StorageContext, load_index_from_storage

# In this function I could put also parameters as chunk_size or overlapping
service_context = ServiceContext.from_defaults(
    llm=llm, embed_model=embed_model
)

if not os.path.exists("./sentence_index"):
    sentence_index = VectorStoreIndex.from_documents(
        [document], service_context=service_context
    )

    sentence_index.storage_context.persist(persist_dir="./sentence_index")
else:
    sentence_index = load_index_from_storage(
        StorageContext.from_defaults(persist_dir="./sentence_index"),
        service_context=service_context 
    )

## Evaluation setup using TruLens

In [None]:
eval_questions = []
with open('eval_questions.txt', 'r') as file:
    for line in file:
        # Remove newline character and convert to integer
        item = line.strip()
        print(item)
        eval_questions.append(item)

In [None]:
# You can try your own question:
new_question = "What is the right AI job for me?"
eval_questions.append(new_question)

In [None]:
from trulens_eval import Tru
tru = Tru()
tru.reset_database()

In [None]:
import nest_asyncio
nest_asyncio.apply()

In [None]:
from trulens_eval import feedback
provider = feedback.AzureOpenAI(deployment_name=llm_model, api_key=api_key, api_version=api_version, base_url=azure_endpoint)

In [None]:
# Answer Relevance
from trulens_eval import Feedback
f_qa_relevance =(Feedback(
    provider.relevance_with_cot_reasons,
    name="Answer Relevance"
).on_input_output())

In [None]:
# Context Relevance
from trulens_eval import TruLlama
import numpy as np

context_selection = TruLlama.select_source_nodes().node.text
f_qs_relevance = (Feedback(provider.qs_relevance_with_cot_reasons,
                          name = "Context Relevance").on_input().on(context_selection).aggregate(np.mean))


In [None]:
# Groundedness
from trulens_eval.feedback import Groundedness

grounded = Groundedness(groundedness_provider=provider)
f_groundedness = (Feedback(grounded.groundedness_measure_with_cot_reasons,
                          name = "Groundedness").on(context_selection).on_output().aggregate(grounded.grounded_statements_aggregator))


In [None]:
# Create sentence window engine
from utils import get_sentence_window_query_engine
sentence_window_engine = get_sentence_window_query_engine(sentence_index)


In [None]:
# Recorder
from trulens_eval import TruLlama, FeedbackMode # Integration LlamaIndex and Trulens

tru_recorder = TruLlama(
    sentence_window_engine,
    app_id="App_1",
    feedbacks=[
        f_qa_relevance,
        #f_qs_relevance,
        #f_groundedness,
    ])

In [None]:
with tru_recorder as recording:
    sentence_window_engine.query(eval_questions[0])

In [None]:
records, feedback = tru.get_records_and_feedback(app_ids=[])

In [None]:
feedback

In [None]:
import pandas as pd
pd.set_option("display.max_colwidth", None)
records[["input", "output"] + feedback]

For the classroom, we've written some of the code in helper functions inside a utils.py file.  
- You can view the utils.py file in the file directory by clicking on the "Jupyter" logo at the top of the notebook.
- In later lessons, you'll get to work directly with the code that's currently wrapped inside these helper functions, to give you more options to customize your RAG pipeline.

In [None]:
from utils import get_prebuilt_trulens_recorder

tru_recorder = get_prebuilt_trulens_recorder(query_engine,
                                             app_id="Direct Query Engine")

In [None]:
with tru_recorder as recording:
    for question in eval_questions:
        response = query_engine.query(question)

In [None]:
records, feedback = tru.get_records_and_feedback(app_ids=[])

In [None]:
records.head()

In [None]:
# launches on http://localhost:8501/
tru.run_dashboard()

## Advanced RAG pipeline

### 1. Sentence Window retrieval

In [None]:
from llama_index.llms import OpenAI

llm = OpenAI(model="gpt-3.5-turbo", temperature=0.1)

In [None]:
from utils import build_sentence_window_index

sentence_index = build_sentence_window_index(
    document,
    llm,
    embed_model="local:BAAI/bge-small-en-v1.5",
    save_dir="sentence_index"
)

In [None]:
from utils import get_sentence_window_query_engine

sentence_window_engine = get_sentence_window_query_engine(sentence_index)

In [None]:
window_response = sentence_window_engine.query(
    "how do I get started on a personal project in AI?"
)
print(str(window_response))

In [None]:
tru.reset_database()

tru_recorder_sentence_window = get_prebuilt_trulens_recorder(
    sentence_window_engine,
    app_id = "Sentence Window Query Engine"
)

In [None]:
for question in eval_questions:
    with tru_recorder_sentence_window as recording:
        response = sentence_window_engine.query(question)
        print(question)
        print(str(response))

In [None]:
tru.get_leaderboard(app_ids=[])

In [None]:
# launches on http://localhost:8501/
tru.run_dashboard()

### 2. Auto-merging retrieval

In [None]:
from utils import build_automerging_index

automerging_index = build_automerging_index(
    documents,
    llm,
    embed_model="local:BAAI/bge-small-en-v1.5",
    save_dir="merging_index"
)

In [None]:
from utils import get_automerging_query_engine

automerging_query_engine = get_automerging_query_engine(
    automerging_index,
)

In [None]:
auto_merging_response = automerging_query_engine.query(
    "How do I build a portfolio of AI projects?"
)
print(str(auto_merging_response))

In [None]:
tru.reset_database()

tru_recorder_automerging = get_prebuilt_trulens_recorder(automerging_query_engine,
                                                         app_id="Automerging Query Engine")

In [None]:
for question in eval_questions:
    with tru_recorder_automerging as recording:
        response = automerging_query_engine.query(question)
        print(question)
        print(response)

In [None]:
tru.get_leaderboard(app_ids=[])

In [None]:
# launches on http://localhost:8501/
tru.run_dashboard()