In [1]:
from llama_index import VectorStoreIndex, SimpleDirectoryReader, LLMPredictor, ServiceContext, StorageContext, load_index_from_storage
import os
from langchain import HuggingFaceHub
import sys
import logging
import numpy as np

sys.path.append('../')
from apikeys import huggingface_key, openai_key
import openai
openai.api_key = openai_key

In [2]:
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

In [3]:
os.environ['HUGGINGFACEHUB_API_TOKEN'] = huggingface_key
os.environ['OPENAI_API_KEY'] = openai_key

In [4]:
llm = LLMPredictor(llm=HuggingFaceHub(repo_id="mistralai/Mixtral-8x7B-Instruct-v0.1"))
service_context = ServiceContext.from_defaults(
    llm_predictor=llm, embed_model="local:BAAI/bge-small-en-v1.5"
)



LLMPredictor is deprecated, please use LLM instead.


In [5]:
# load the documents and create the index
documents = SimpleDirectoryReader("../data").load_data()
index = VectorStoreIndex.from_documents(documents,service_context=service_context)


In [6]:
query_engine = index.as_query_engine()
response = query_engine.query("when should you worry about baby split up?")
print(response)



You should worry about baby split up if the baby is under 3 months old and has a rectal temperature of 100.4° F or higher. Other symptoms that require immediate medical care include seizure activity, unresponsiveness, inconsolable crying for 2 or more hours, abdominal pain that hurts worse if you press one inch to the right or left of the bellybutton, a bright red or purple pin-prickly rash that does


In [7]:
from trulens_eval import Tru
tru = Tru()

tru.reset_database()

🦑 Tru initialized with db url sqlite:///default.sqlite .
🛑 Secret keys may be written to the database. See the `database_redact_keys` option of `Tru` to prevent this.
INFO:alembic.runtime.migration:Context impl SQLiteImpl.
Context impl SQLiteImpl.
INFO:alembic.runtime.migration:Will assume non-transactional DDL.
Will assume non-transactional DDL.
INFO:trulens_eval.database.sqlalchemy_db:Deleted 0 rows.
Deleted 0 rows.


In [8]:
from trulens_eval import (
    Feedback,
    TruLlama,
    Huggingface,
    OpenAI
)
from trulens_eval.feedback import Groundedness

In [9]:
openai = OpenAI()

qa_relevance = (
    Feedback(openai.relevance_with_cot_reasons, name="Answer Relevance")
    .on_input_output()
)

qs_relevance = (
    Feedback(openai.relevance_with_cot_reasons, name = "Context Relevance")
    .on_input()
    .on(TruLlama.select_source_nodes().node.text)
    .aggregate(np.mean)
)

#grounded = Groundedness(groundedness_provider=openai, summarize_provider=openai)
grounded = Groundedness(groundedness_provider=openai)

groundedness = (
    Feedback(grounded.groundedness_measure_with_cot_reasons, name="Groundedness")
        .on(TruLlama.select_source_nodes().node.text)
        .on_output()
        .aggregate(grounded.grounded_statements_aggregator)
)

feedbacks = [qa_relevance, qs_relevance, groundedness]

✅ In Answer Relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In Answer Relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .
✅ In Context Relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In Context Relevance, input response will be set to __record__.app.query.rets.source_nodes[:].node.text .
✅ In Groundedness, input source will be set to __record__.app.query.rets.source_nodes[:].node.text .
✅ In Groundedness, input statement will be set to __record__.main_output or `Select.RecordOutput` .


In [10]:
def get_trulens_recorder(query_engine, feedbacks, app_id):
    tru_recorder = TruLlama(
        query_engine,
        app_id=app_id,
        feedbacks=feedbacks
    )
    return tru_recorder

def get_prebuilt_trulens_recorder(query_engine, app_id):
    tru_recorder = TruLlama(
        query_engine,
        app_id=app_id,
        feedbacks=feedbacks
        )
    return tru_recorder

In [11]:
tru_recorder = get_prebuilt_trulens_recorder(query_engine,
                                             app_id="Mixtral Direct Query Engine")

INFO:alembic.runtime.migration:Context impl SQLiteImpl.
Context impl SQLiteImpl.
INFO:alembic.runtime.migration:Will assume non-transactional DDL.
Will assume non-transactional DDL.
INFO:trulens_eval.database.sqlalchemy_db:✅ added app Direct Query Engine
✅ added app Direct Query Engine
INFO:alembic.runtime.migration:Context impl SQLiteImpl.
Context impl SQLiteImpl.
INFO:alembic.runtime.migration:Will assume non-transactional DDL.
Will assume non-transactional DDL.
INFO:trulens_eval.database.sqlalchemy_db:✅ added feedback definition feedback_definition_hash_d868eddc257401ad74752dec15099816
✅ added feedback definition feedback_definition_hash_d868eddc257401ad74752dec15099816
INFO:alembic.runtime.migration:Context impl SQLiteImpl.
Context impl SQLiteImpl.
INFO:alembic.runtime.migration:Will assume non-transactional DDL.
Will assume non-transactional DDL.
INFO:trulens_eval.database.sqlalchemy_db:✅ added feedback definition feedback_definition_hash_50c6a6fd0694f8b892d0e2600b531066
✅ added f

In [12]:
eval_questions = [
    "when should you worry about baby split up?",
    "how often should you feed your baby?",
    "how much sleep should your baby get?",
    "how often should a baby poop?",
    "how should a clean my baby?"
]

In [13]:
with tru_recorder as recording:
    for question in eval_questions:
        response = query_engine.query(question)

INFO:alembic.runtime.migration:Context impl SQLiteImpl.
Context impl SQLiteImpl.
INFO:alembic.runtime.migration:Will assume non-transactional DDL.
Will assume non-transactional DDL.
INFO:trulens_eval.database.sqlalchemy_db:✅ added record record_hash_df184cd1edd92589b0fe1194f2bb1467
✅ added record record_hash_df184cd1edd92589b0fe1194f2bb1467
INFO:alembic.runtime.migration:Context impl SQLiteImpl.
Context impl SQLiteImpl.
INFO:alembic.runtime.migration:Will assume non-transactional DDL.
Will assume non-transactional DDL.
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:alembic.runtime.migration:Context impl SQLiteImpl.
Context impl SQLiteImpl.
INFO:alembic.runtime.migration:Will assume non-transactional DDL.
Will assume non-transactional DDL.
INFO:trulens_eval.database.sqlalchemy_db:✅ feedback result Answer Relevance DONE feedback_result_hash_0b7d72ff074883d5c5b9

In [14]:
records, feedback = tru.get_records_and_feedback(app_ids=[])

INFO:trulens_eval.database.sqlalchemy_db:🛑 feedback result Groundedness FAILED feedback_result_hash_f5a7a2610b98cee54e123207b8ede83c
🛑 feedback result Groundedness FAILED feedback_result_hash_f5a7a2610b98cee54e123207b8ede83c
INFO:alembic.runtime.migration:Context impl SQLiteImpl.
Context impl SQLiteImpl.
INFO:alembic.runtime.migration:Will assume non-transactional DDL.
Will assume non-transactional DDL.


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


INFO:alembic.runtime.migration:Context impl SQLiteImpl.
Context impl SQLiteImpl.
INFO:alembic.runtime.migration:Will assume non-transactional DDL.
Will assume non-transactional DDL.
INFO:alembic.runtime.migration:Context impl SQLiteImpl.
Context impl SQLiteImpl.
INFO:alembic.runtime.migration:Will assume non-transactional DDL.
Will assume non-transactional DDL.


In [15]:
records.head()

INFO:trulens_eval.database.sqlalchemy_db:✅ feedback result Answer Relevance DONE feedback_result_hash_2b490279b8c7ad3d798f6d6ef46d6d6f
✅ feedback result Answer Relevance DONE feedback_result_hash_2b490279b8c7ad3d798f6d6ef46d6d6f
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Unnamed: 0,app_id,app_json,type,record_id,input,output,tags,record_json,cost_json,perf_json,ts,Answer Relevance,Context Relevance,Groundedness,Answer Relevance_calls,Context Relevance_calls,Groundedness_calls,latency,total_tokens,total_cost
0,Direct Query Engine,"{""tru_class_info"": {""name"": ""TruLlama"", ""modul...",RetrieverQueryEngine(llama_index.query_engine....,record_hash_df184cd1edd92589b0fe1194f2bb1467,"""when should you worry about baby split up?""","""\n\nYou should worry about baby split up if t...",-,"{""record_id"": ""record_hash_df184cd1edd92589b0f...","{""n_requests"": 0, ""n_successful_requests"": 0, ...","{""start_time"": ""2023-12-22T10:53:33.361715"", ""...",2023-12-22T10:53:37.302180,1.0,0.0,1.0,[{'args': {'prompt': 'when should you worry ab...,[{'args': {'prompt': 'when should you worry ab...,"[{'args': {'source': '**6\. Remember, it gets ...",3,0,0.0
1,Direct Query Engine,"{""tru_class_info"": {""name"": ""TruLlama"", ""modul...",RetrieverQueryEngine(llama_index.query_engine....,record_hash_d9c25a9379e50bdf2dbf6055fd07f388,"""how often should you feed your baby?""","""\n\nBabies generally need feeding every 2 to ...",-,"{""record_id"": ""record_hash_d9c25a9379e50bdf2db...","{""n_requests"": 0, ""n_successful_requests"": 0, ...","{""start_time"": ""2023-12-22T10:53:37.849647"", ""...",2023-12-22T10:53:41.994143,0.9,0.6,,[{'args': {'prompt': 'how often should you fee...,[{'args': {'prompt': 'how often should you fee...,,4,0,0.0
2,Direct Query Engine,"{""tru_class_info"": {""name"": ""TruLlama"", ""modul...",RetrieverQueryEngine(llama_index.query_engine....,record_hash_0e7e7b5297affd8c6533a9807660d75c,"""how much sleep should your baby get?""","""16 -20 hours a day.""",-,"{""record_id"": ""record_hash_0e7e7b5297affd8c653...","{""n_requests"": 0, ""n_successful_requests"": 0, ...","{""start_time"": ""2023-12-22T10:53:43.049383"", ""...",2023-12-22T10:53:46.773562,1.0,0.5,1.0,[{'args': {'prompt': 'how much sleep should yo...,[{'args': {'prompt': 'how much sleep should yo...,[{'args': {'source': 'SLEEPY TIME ADVICE FOR P...,3,0,0.0
3,Direct Query Engine,"{""tru_class_info"": {""name"": ""TruLlama"", ""modul...",RetrieverQueryEngine(llama_index.query_engine....,record_hash_d6976e63376fd50c7584e32f2703dad7,"""how often should a baby poop?""","""\n- Newborns who are a few days old should ha...",-,"{""record_id"": ""record_hash_d6976e63376fd50c758...","{""n_requests"": 0, ""n_successful_requests"": 0, ...","{""start_time"": ""2023-12-22T10:53:47.458363"", ""...",2023-12-22T10:53:51.626736,,,,,,,4,0,0.0
4,Direct Query Engine,"{""tru_class_info"": {""name"": ""TruLlama"", ""modul...",RetrieverQueryEngine(llama_index.query_engine....,record_hash_5cc12ff173d0c064f96c4917b71e6cd4,"""how should a clean my baby?""","""\n\nTo clean your baby, follow these steps:\n...",-,"{""record_id"": ""record_hash_5cc12ff173d0c064f96...","{""n_requests"": 0, ""n_successful_requests"": 0, ...","{""start_time"": ""2023-12-22T10:53:52.117506"", ""...",2023-12-22T10:53:55.038642,,,,,,,2,0,0.0


In [None]:
# launches on http://localhost:8501/
tru.run_dashboard()

Starting dashboard ...
Config file already exists. Skipping writing process.
Credentials file already exists. Skipping writing process.


Accordion(children=(VBox(children=(VBox(children=(Label(value='STDOUT'), Output())), VBox(children=(Label(valu…

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:alembic.runtime.migration:Context impl SQLiteImpl.
Context impl SQLiteImpl.
INFO:alembic.runtime.migration:Will assume non-transactional DDL.
Will assume non-transactional DDL.
INFO:trulens_eval.database.sqlalchemy_db:✅ feedback result Answer Relevance DONE feedback_result_hash_ed48f5fb69953cd3d76a289bed51b631
✅ feedback result Answer Relevance DONE feedback_result_hash_ed48f5fb69953cd3d76a289bed51b631
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:alembic.runtime.migration:Context impl SQLiteImpl.
Context impl SQLiteI

<Popen: returncode: None args: ['streamlit', 'run', '--server.headless=True'...>

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:alembic.runtime.migration:Context impl SQLiteImpl.
Context impl SQLiteImpl.
INFO:alembic.runtime.migration:Will assume non-transactional DDL.
Will assume non-transactional DDL.
INFO:trulens_eval.database.sqlalchemy_db:✅ feedback result Context Relevance DONE feedback_result_hash_761db5b95d7c4b9669ec0e5bf7360b40
✅ feedback result Context Relevance DONE feedback_result_hash_761db5b95d7c4b9669ec0e5bf7360b40
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:alembic.runtime.migration:Context impl SQLiteImpl.
Context impl SQLit