# Quick Start

https://www.trulens.org/trulens_eval/quickstart/

In [11]:
# from trulens_eval.feedback.provider.litellm import LiteLLM
# litellm_provider = LiteLLM(model_engine='palm', endpoint='localhost:8000')

# Prepare data

In [20]:
university_info = """
The University of Washington, founded in 1861 in Seattle, is a public research university
with over 45,000 students across three campuses in Seattle, Tacoma, and Bothell.
As the flagship institution of the six public universities in Washington state,
UW encompasses over 500 buildings and 20 million square feet of space,
including one of the largest library systems in the world.
"""

## Create Vector Store

Create a chromadb vector store in memory.

In [21]:
import chromadb
from chromadb.utils import embedding_functions
default_ef = embedding_functions.DefaultEmbeddingFunction()
students_embeddings = default_ef([university_info])

client = chromadb.Client()
vector_store = client.create_collection(name="Students")

vector_store.add(
    embeddings = students_embeddings,
    documents = [university_info],
    metadatas = [{'source':'university info'}],
    ids = ["id1"]
)

/Users/alextanhongpin/.cache/chroma/onnx_models/all-MiniLM-L6-v2/onnx.tar.gz: 100%|██████████████████████████████| 79.3M/79.3M [00:32<00:00, 2.54MiB/s]


In [18]:
from trulens_eval import Tru
from trulens_eval.tru_custom_app import instrument
tru = Tru()
tru.reset_database()

In [22]:
from openai import OpenAI

# https://github.com/BerriAI/litellm?tab=readme-ov-file#step-2-replace-openai-base
# set proxy to base_url request sent to model set on litellm proxy, `litellm --model`
oai_client = OpenAI(api_key='anything', base_url='http://0.0.0.0:8000')

## Build RAG from scratch
Build a custom RAG from scratch, and add TruLens custom instrumentation.

In [23]:
class RAG_from_scratch:
    @instrument
    def retrieve(self, query: str) -> list:
        """
        Retrieve relevant text from vector store.
        """
        results = vector_store.query(
        query_texts=query,
        n_results=2
    )
        return results['documents'][0]

    @instrument
    def generate_completion(self, query: str, context_str: list) -> str:
        """
        Generate answer from context.
        """
        completion = oai_client.chat.completions.create(
        model="gpt-3.5-turbo",
        temperature=0,
        messages=
        [
            {"role": "user",
            "content": 
            f"We have provided context information below. \n"
            f"---------------------\n"
            f"{context_str}"
            f"\n---------------------\n"
            f"Given this information, please answer the question: {query}"
            }
        ]
        ).choices[0].message.content
        return completion

    @instrument
    def query(self, query: str) -> str:
        context_str = self.retrieve(query)
        completion = self.generate_completion(query, context_str)
        return completion

rag = RAG_from_scratch()

## Set up feedback functions.


In [33]:
from trulens_eval import Feedback, Select
from trulens_eval.feedback import Groundedness
from trulens_eval.feedback.provider import OpenAI as fOpenAI

import numpy as np

# Initialize provider class
fopenai = fOpenAI(client=oai_client)

grounded = Groundedness(groundedness_provider=fopenai)

# Define a groundedness feedback function
f_groundedness = (
    Feedback(grounded.groundedness_measure_with_cot_reasons, 
             name='Groundedness')
    .on(Select.RecordCalls.retrieve.rets.collect())
    .on_output()
    .aggregate(grounded.grounded_statements_aggregator)
)

# Question/answer relevance between overall question and answer.
f_qa_relevance = (
    Feedback(fopenai.relevance_with_cot_reasons, name='Answer Relevance')
    .on(Select.RecordCalls.retrieve.args.query)
    .on_output()
)

# Question/statement relevance between question and each context chunk.
f_context_relevance = (
    Feedback(fopenai.qs_relevance_with_cot_reasons, name = "Context Relevance")
    .on(Select.RecordCalls.retrieve.args.query)
    .on(Select.RecordCalls.retrieve.rets.collect())
    .aggregate(np.mean)
)

✅ In Groundedness, input source will be set to __record__.app.retrieve.rets.collect() .
✅ In Groundedness, input statement will be set to __record__.main_output or `Select.RecordOutput` .
✅ In Answer Relevance, input prompt will be set to __record__.app.retrieve.args.query .
✅ In Answer Relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .
✅ In Context Relevance, input question will be set to __record__.app.retrieve.args.query .
✅ In Context Relevance, input statement will be set to __record__.app.retrieve.rets.collect() .


## Construct the app

Wrap the custom RAG with TruCustomApp, add list of feedbacks for eval.



In [35]:
from trulens_eval import TruCustomApp
tru_rag = TruCustomApp(rag, 
                       app_id = 'RAG v1',
                       feedbacks = [f_groundedness, f_qa_relevance, f_context_relevance])

In [36]:
with tru_rag as recording:
    rag.query("When was the University of Washington founded?")

Number of requested results 2 is greater than number of elements in index 1, updating n_results = 1
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
Feedback Function exception caught: Traceback (most recent call last):
  File "/Users/alextanhongpin/Documents/python/python-trulens-palm/.venv/lib/python3.11/site-packages/trulens_eval/feedback/feedback.py", line 481, in run
    result_and_meta, part_cost = Endpoint.track_all_costs_tally(
                                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/alextanhongpin/Documents/python/python-trulens-palm/.venv/lib/python3.11/site-packages/trulens_eval/feedback/provider/endpoint/base.py", line 377, in track_all_costs_tally
    result, cbs = Endpoint.track_all_costs(
                  ^^^^^^^^^^^^

In [37]:
tru.run_dashboard()

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Starting dashboard ...


Accordion(children=(VBox(children=(VBox(children=(Label(value='STDOUT'), Output())), VBox(children=(Label(valu…

Dashboard started at http://192.168.8.16:8501 .


<Popen: returncode: None args: ['streamlit', 'run', '--server.headless=True'...>

In [38]:
with tru_rag as recording:
    rag.query("What is the meaning of life?")

Number of requested results 2 is greater than number of elements in index 1, updating n_results = 1


In [42]:
tru.stop_dashboard()