In [1]:
# from google.colab import drive
from dotenv import load_dotenv, find_dotenv
import os

# –ú–æ–Ω—Ç–∏—Ä—É–µ–º Google Drive
# drive.mount('/content/drive')

# # –ó–∞–≥—Ä—É–∂–∞–µ–º .env —Ñ–∞–π–ª
# load_dotenv('/content/drive/My Drive/python/.env')
load_dotenv(find_dotenv())

# –ü—Ä–∏–º–µ—Ä –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–Ω–∏—è –ø–µ—Ä–µ–º–µ–Ω–Ω–æ–π —Å—Ä–µ–¥—ã
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
PINECONE_API_KEY = os.getenv('PINECONE_API_KEY')

In [2]:
# Imports main tools:
from trulens_eval import TruChain, Feedback, Tru
tru = Tru()
tru.reset_database()

# Imports from langchain to build app
import bs4
from langchain import hub
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import WebBaseLoader
from langchain.embeddings import OpenAIEmbeddings
from langchain.schema import StrOutputParser
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain_core.runnables import RunnablePassthrough

Package scikit-learn is installed but has a version conflict:
	(scikit-learn 1.1.1 (/Users/valentinshapovalov/opt/anaconda3/lib/python3.9/site-packages), Requirement.parse('scikit-learn>=1.3.1'))

This package is optional for trulens_eval so this may not be a problem but if
you need to use the related optional features and find there are errors, you
will need to resolve the conflict:

    ```bash
    pip install 'scikit-learn>=1.3.1'
    ```

If you are running trulens_eval in a notebook, you may need to restart the
kernel after resolving the conflict. If your distribution is in a bad place
beyond this package, you may need to reinstall trulens_eval so that all of the
dependencies get installed and hopefully corrected:
    
    ```bash
    pip uninstall -y trulens_eval
    pip install trulens_eval
    ```

Package langchain-community is installed but has a version conflict:
	(langchain-community 0.0.14 (/Users/valentinshapovalov/opt/anaconda3/lib/python3.9/site-packages), Requirement.p

ü¶ë Tru initialized with db url sqlite:///default.sqlite .
üõë Secret keys may be written to the database. See the `database_redact_keys` option of Tru` to prevent this.


In [3]:
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
docs = loader.load()

### Create Vector Store

In [4]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200
)

splits = text_splitter.split_documents(docs)

vectorstore = Chroma.from_documents(
    documents=splits,
    embedding=OpenAIEmbeddings()
)

  warn_deprecated(


### Create RAG

In [6]:
retriever = vectorstore.as_retriever()

prompt = hub.pull("rlm/rag-prompt")
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

  warn_deprecated(


In [7]:
# rag_chain.invoke("What is Task Decomposition?")

'Task Decomposition is a technique used to break down complex tasks into smaller and simpler steps. This approach helps agents to plan and execute tasks more efficiently by dividing them into manageable components. Task decomposition can be achieved through various methods such as prompting with specific instructions or utilizing human inputs.'

### Initialize Feedback Function(s)

In [8]:
from trulens_eval.feedback.provider import OpenAI
import numpy as np

# Initialize provider class
openai = OpenAI()

# select context to be used in feedback. the location of context is app specific.
from trulens_eval.app import App
context = App.select_context(rag_chain)

from trulens_eval.feedback import Groundedness
grounded = Groundedness(groundedness_provider=OpenAI())
# Define a groundedness feedback function
f_groundedness = (
    Feedback(grounded.groundedness_measure_with_cot_reasons)
    .on(context.collect()) # collect context chunks into a list
    .on_output()
    .aggregate(grounded.grounded_statements_aggregator)
)

# Question/answer relevance between overall question and answer.
f_qa_relevance = Feedback(openai.relevance).on_input_output()
# Question/statement relevance between question and each context chunk.
f_context_relevance = (
    Feedback(openai.qs_relevance)
    .on_input()
    .on(context)
    .aggregate(np.mean)
)

‚úÖ In groundedness_measure_with_cot_reasons, input source will be set to __record__.app.first.steps.context.first.get_relevant_documents.rets.collect() .
‚úÖ In groundedness_measure_with_cot_reasons, input statement will be set to __record__.main_output or `Select.RecordOutput` .
‚úÖ In relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
‚úÖ In relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .
‚úÖ In qs_relevance, input question will be set to __record__.main_input or `Select.RecordInput` .
‚úÖ In qs_relevance, input statement will be set to __record__.app.first.steps.context.first.get_relevant_documents.rets .


### Instrument chain for logging with TruLens

In [9]:
tru_recorder = TruChain(rag_chain,
    app_id='Chain1_ChatApplication',
    feedbacks=[f_qa_relevance, f_context_relevance, f_groundedness])

In [10]:
response, tru_record = tru_recorder.with_record(rag_chain.invoke, "What is Task Decomposition?")

In [11]:
json_like = tru_record.layout_calls_as_app()

In [12]:
json_like

Munch({'record_id': 'record_hash_d76abb3545faaa32feeae562b6b9a18f', 'app_id': 'Chain1_ChatApplication', 'cost': {'n_requests': 2, 'n_successful_requests': 2, 'n_classes': 0, 'n_tokens': 650, 'n_stream_chunks': 0, 'n_prompt_tokens': 590, 'n_completion_tokens': 60, 'cost': 0.000996}, 'perf': {'start_time': '2024-03-10T01:31:55.527402', 'end_time': '2024-03-10T01:31:58.724440'}, 'ts': '2024-03-10T01:31:58.724529', 'tags': '-', 'meta': None, 'main_input': 'What is Task Decomposition?', 'main_output': 'Task Decomposition is a technique used to break down complex tasks into smaller and simpler steps. This approach helps agents to plan and execute tasks more efficiently by dividing them into manageable subtasks. Task decomposition can be achieved through various methods, such as using prompting techniques, task-specific instructions, or human inputs.', 'main_error': None, 'calls': [{'stack': [{'path': 'app', 'method': {'obj': {'cls': {'name': 'RunnableSequence', 'module': {'package_name': 'la

In [14]:
from ipytree import Tree, Node

def display_call_stack(data):
    tree = Tree()
    tree.add_node(Node('Record ID: {}'.format(data['record_id'])))
    tree.add_node(Node('App ID: {}'.format(data['app_id'])))
    tree.add_node(Node('Cost: {}'.format(data['cost'])))
    tree.add_node(Node('Performance: {}'.format(data['perf'])))
    tree.add_node(Node('Timestamp: {}'.format(data['ts'])))
    tree.add_node(Node('Tags: {}'.format(data['tags'])))
    tree.add_node(Node('Main Input: {}'.format(data['main_input'])))
    tree.add_node(Node('Main Output: {}'.format(data['main_output'])))
    tree.add_node(Node('Main Error: {}'.format(data['main_error'])))

    calls_node = Node('Calls')
    tree.add_node(calls_node)

    for call in data['calls']:
        call_node = Node('Call')
        calls_node.add_node(call_node)

        for step in call['stack']:
            step_node = Node('Step: {}'.format(step['path']))
            call_node.add_node(step_node)
            if 'expanded' in step:
                expanded_node = Node('Expanded')
                step_node.add_node(expanded_node)
                for expanded_step in step['expanded']:
                    expanded_step_node = Node('Step: {}'.format(expanded_step['path']))
                    expanded_node.add_node(expanded_step_node)

    return tree

# Usage
tree = display_call_stack(json_like)
tree

Tree(nodes=(Node(name='Record ID: record_hash_d76abb3545faaa32feeae562b6b9a18f'), Node(name='App ID: Chain1_Ch‚Ä¶

In [16]:
# tree

In [17]:
with tru_recorder as recording:
    llm_response = rag_chain.invoke("What is Task Decomposition?")

display(llm_response)

'Task Decomposition is a technique used to break down complex tasks into smaller and simpler steps. This approach helps agents to plan and execute tasks more efficiently by dividing them into manageable components. Task decomposition can be achieved through various methods such as prompting with specific instructions or utilizing human inputs.'

### Retrieve records and feedback

In [18]:
# The record of the app invocation can be retrieved from the `recording`:

rec = recording.get() # use .get if only one record
# recs = recording.records # use .records if multiple

display(rec)

Record(record_id='record_hash_565eee26285cc2faea691ae332b2605e', app_id='Chain1_ChatApplication', cost=Cost(n_requests=2, n_successful_requests=2, n_classes=0, n_tokens=645, n_stream_chunks=0, n_prompt_tokens=590, n_completion_tokens=55, cost=0.000986), perf=Perf(start_time=datetime.datetime(2024, 3, 10, 1, 33, 29, 92004), end_time=datetime.datetime(2024, 3, 10, 1, 33, 33, 466445)), ts=datetime.datetime(2024, 3, 10, 1, 33, 33, 466497), tags='-', meta=None, main_input='What is Task Decomposition?', main_output='Task Decomposition is a technique used to break down complex tasks into smaller and simpler steps. This approach helps agents to plan and execute tasks more efficiently by dividing them into manageable components. Task decomposition can be achieved through various methods such as prompting with specific instructions or utilizing human inputs.', main_error=None, calls=[RecordAppCall(stack=[RecordAppCallMethod(path=Lens().app, method=Method(obj=Obj(cls=langchain_core.runnables.base

In [19]:
# The results of the feedback functions can be rertireved from
# `Record.feedback_results` or using the `wait_for_feedback_result` method. The
# results if retrieved directly are `Future` instances (see
# `concurrent.futures`). You can use `as_completed` to wait until they have
# finished evaluating or use the utility method:

for feedback, feedback_result in rec.wait_for_feedback_results().items():
    print(feedback.name, feedback_result.result)

# See more about wait_for_feedback_results:
# help(rec.wait_for_feedback_results)

relevance 1.0
qs_relevance 0.8
groundedness_measure_with_cot_reasons 1.0


In [20]:
records, feedback = tru.get_records_and_feedback(app_ids=["Chain1_ChatApplication"])

records.head()

Unnamed: 0,app_id,app_json,type,record_id,input,output,tags,record_json,cost_json,perf_json,ts,qs_relevance,relevance,groundedness_measure_with_cot_reasons,qs_relevance_calls,relevance_calls,groundedness_measure_with_cot_reasons_calls,latency,total_tokens,total_cost
0,Chain1_ChatApplication,"{""tru_class_info"": {""name"": ""TruChain"", ""modul...",RunnableSequence(langchain_core.runnables.base),record_hash_d76abb3545faaa32feeae562b6b9a18f,"""What is Task Decomposition?""","""Task Decomposition is a technique used to bre...",-,"{""record_id"": ""record_hash_d76abb3545faaa32fee...","{""n_requests"": 2, ""n_successful_requests"": 2, ...","{""start_time"": ""2024-03-10T01:31:55.527402"", ""...",2024-03-10T01:31:58.724529,0.8,1.0,0.666667,[{'args': {'question': 'What is Task Decomposi...,[{'args': {'prompt': 'What is Task Decompositi...,[{'args': {'source': [[{'page_content': 'Fig. ...,3,650,0.000996
1,Chain1_ChatApplication,"{""tru_class_info"": {""name"": ""TruChain"", ""modul...",RunnableSequence(langchain_core.runnables.base),record_hash_565eee26285cc2faea691ae332b2605e,"""What is Task Decomposition?""","""Task Decomposition is a technique used to bre...",-,"{""record_id"": ""record_hash_565eee26285cc2faea6...","{""n_requests"": 2, ""n_successful_requests"": 2, ...","{""start_time"": ""2024-03-10T01:33:29.092004"", ""...",2024-03-10T01:33:33.466497,0.8,1.0,1.0,[{'args': {'question': 'What is Task Decomposi...,[{'args': {'prompt': 'What is Task Decompositi...,[{'args': {'source': [[{'page_content': 'Fig. ...,4,645,0.000986


### Explore in a Dashboard

In [21]:
tru.run_dashboard() # open a local streamlit app to explore

# tru.stop_dashboard() # stop if needed

Starting dashboard ...


Accordion(children=(VBox(children=(VBox(children=(Label(value='STDOUT'), Output())), VBox(children=(Label(valu‚Ä¶

Dashboard started at http://192.168.1.17:8501 .


<Popen: returncode: None args: ['streamlit', 'run', '--server.headless=True'...>