# Trulens implementation on text using open source llms

In [1]:
# Trulens implementation on text using
import os
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from langchain.text_splitter import CharacterTextSplitter
from langchain.llms import HuggingFacePipeline
from langchain.chains import RetrievalQA
from langchain.document_loaders import TextLoader
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
from sklearn.metrics.pairwise import cosine_similarity
from trulens_eval import TruChain, Feedback, Tru
import numpy as np

In [2]:
# Initialize Tru
tru = Tru()
tru.reset_database()

🦑 Tru initialized with db url sqlite:///default.sqlite .
🛑 Secret keys may be written to the database. See the `database_redact_keys` option of `Tru` to prevent this.


In [3]:
# Load and preprocess the data
loader = TextLoader("D:/TruLens/speech.txt", encoding='utf-8')
documents = loader.load()
print(f"Loaded {len(documents)} documents")

Loaded 1 documents


In [4]:
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
texts = text_splitter.split_documents(documents)
print(f"Split into {len(texts)} text chunks")

Split into 68 text chunks


In [5]:
# Create embeddings and vector store with matching dimensions
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")  # 768 dimensions
print("Embeddings model loaded")

vectorstore = Chroma.from_documents(texts, embeddings)
print("Vector store created successfully")

Embeddings model loaded
Vector store created successfully


In [6]:
# Create a retriever
retriever = vectorstore.as_retriever()

In [7]:
# Set up the language model (FLAN-T5)
model_name = "google/flan-t5-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
print("Language model loaded")

Language model loaded


In [8]:
pipe = pipeline(
    "text2text-generation",
    model=model,
    tokenizer=tokenizer,
    max_length=512,
    device="cpu"  # Use "cuda" if you have a GPU
)

In [9]:
llm = HuggingFacePipeline(pipeline=pipe)
print("Pipeline setup complete")

Pipeline setup complete


In [10]:
# Create the RAG chain
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True
)
print("QA chain created")

QA chain created


In [11]:
from trulens_eval.feedback.provider.litellm import LiteLLM
provider = LiteLLM(model_engine="ollama/llama2")

In [12]:
import numpy as np
# coherence = Feedback(provider.coherence_with_cot_reasons).on_output()
# correctness = Feedback(provider.correctness_with_cot_reasons,name="Correctness").on_output()
# harmfulness = Feedback(provider.harmfulness_with_cot_reasons, higher_is_better=False).on_output()
# controversy = Feedback(provider.controversiality_with_cot_reasons, higher_is_better=False).on_output()
from trulens_eval.app import App
context = App.select_context(qa_chain)

# Define a groundedness feedback function
f_groundedness = (
    Feedback(provider.groundedness_measure_with_cot_reasons,name="Groundedness")
    .on(context.collect()) # collect context chunks into a list
    .on_output()
)

# Question/answer relevance between overall question and answer.
f_answer_relevance = (
    Feedback(provider.relevance,name="Q/A relevance")
    .on_input_output()
)
# Question/statement relevance between question and each context chunk.
f_context_relevance = (
    Feedback(provider.context_relevance_with_cot_reasons,name="Q/Context relevance")
    .on_input()
    .on(context)
    .aggregate(np.mean)
)


✅ In Groundedness, input source will be set to __record__.app.retriever.invoke.rets[:].page_content.collect() .
✅ In Groundedness, input statement will be set to __record__.main_output or `Select.RecordOutput` .
✅ In Q/A relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In Q/A relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .
✅ In Q/Context relevance, input question will be set to __record__.main_input or `Select.RecordInput` .
✅ In Q/Context relevance, input context will be set to __record__.app.retriever.invoke.rets[:].page_content .


In [13]:
app_id = "RAG_QA_Chain"

In [14]:
# Example questions
questions = [
    "What is the amrit kaal?",
    "What is the theme of the speech?"
]

In [15]:
# Create TruChain
tru_recorder = TruChain(
    qa_chain,
    app_id=app_id,
    feedbacks=[f_context_relevance,f_answer_relevance,f_groundedness]
)
print("TruChain created")

TruChain created


In [16]:
for question in questions:
    with tru_recorder as recording:
        response = qa_chain({"query": question})
    
    print(f"\nQuestion: {question}")
    print(f"Answer: {response['result']}")
    
    # Retrieve the record of the app invocation
    rec = recording.get()  
    
    # Display the record
    display(rec)
    
    # Wait for feedback results and print them
    print("\nFeedback Results:")
    for feedback, feedback_result in rec.wait_for_feedback_results().items():
        print(f"{feedback.name}: {feedback_result.result}")


Question: What is the amrit kaal?
Answer: time of duty


Record(record_id='record_hash_9c5243d76b7330a74b90bf4c039f0acc', app_id='RAG_QA_Chain', cost=Cost(n_requests=0, n_successful_requests=0, n_classes=0, n_tokens=0, n_stream_chunks=0, n_prompt_tokens=0, n_completion_tokens=0, cost=0.0), perf=Perf(start_time=datetime.datetime(2024, 7, 5, 22, 57, 59, 500915), end_time=datetime.datetime(2024, 7, 5, 22, 58, 5, 641621)), ts=datetime.datetime(2024, 7, 5, 22, 58, 5, 651634), tags='-', meta=None, main_input='What is the amrit kaal?', main_output='time of duty', main_error=None, calls=[RecordAppCall(call_id='4e2f4ebe-b272-46d2-9bdb-66e2c2c2ab5e', stack=[RecordAppCallMethod(path=Lens().app, method=Method(obj=Obj(cls=langchain.chains.retrieval_qa.base.RetrievalQA, id=1877701235344, init_bindings=None), name='__call__')), RecordAppCallMethod(path=Lens().app, method=Method(obj=Obj(cls=langchain.chains.retrieval_qa.base.RetrievalQA, id=1877701235344, init_bindings=None), name='invoke')), RecordAppCallMethod(path=Lens().app, method=Method(obj=Obj(cls=la


Feedback Results:




Q/Context relevance: 0.775
Q/A relevance: 0.6
Groundedness: 0.1

Question: What is the theme of the speech?
Answer: India's unity gives us strength


Record(record_id='record_hash_e95e49df606514e74f44424e0621f3fe', app_id='RAG_QA_Chain', cost=Cost(n_requests=0, n_successful_requests=0, n_classes=0, n_tokens=0, n_stream_chunks=0, n_prompt_tokens=0, n_completion_tokens=0, cost=0.0), perf=Perf(start_time=datetime.datetime(2024, 7, 5, 23, 3, 36, 708134), end_time=datetime.datetime(2024, 7, 5, 23, 3, 42, 217730)), ts=datetime.datetime(2024, 7, 5, 23, 3, 42, 219743), tags='-', meta=None, main_input='What is the theme of the speech?', main_output="India's unity gives us strength", main_error=None, calls=[RecordAppCall(call_id='5a6ac9a1-e50c-4710-98e7-aacdf04777ad', stack=[RecordAppCallMethod(path=Lens().app, method=Method(obj=Obj(cls=langchain.chains.retrieval_qa.base.RetrievalQA, id=1877701235344, init_bindings=None), name='__call__')), RecordAppCallMethod(path=Lens().app, method=Method(obj=Obj(cls=langchain.chains.retrieval_qa.base.RetrievalQA, id=1877701235344, init_bindings=None), name='invoke')), RecordAppCallMethod(path=Lens().app, m


Feedback Results:




Q/Context relevance: 0.75
Q/A relevance: 0.7
Groundedness: 0.0


In [17]:
records, feedback = tru.get_records_and_feedback(app_ids=[app_id])
records.to_numpy

<bound method DataFrame.to_numpy of          app_id                                           app_json  \
0  RAG_QA_Chain  {"tru_class_info": {"name": "TruChain", "modul...   
1  RAG_QA_Chain  {"tru_class_info": {"name": "TruChain", "modul...   

                                              type  \
0  RetrievalQA(langchain.chains.retrieval_qa.base)   
1  RetrievalQA(langchain.chains.retrieval_qa.base)   

                                      record_id  \
0  record_hash_9c5243d76b7330a74b90bf4c039f0acc   
1  record_hash_e95e49df606514e74f44424e0621f3fe   

                                input                             output tags  \
0           "What is the amrit kaal?"                     "time of duty"    -   
1  "What is the theme of the speech?"  "India's unity gives us strength"    -   

                                         record_json  \
0  {"record_id": "record_hash_9c5243d76b7330a74b9...   
1  {"record_id": "record_hash_e95e49df606514e74f4...   

                       

In [18]:
tru.run_dashboard()

Starting dashboard ...
Config file already exists. Skipping writing process.
Credentials file already exists. Skipping writing process.


Accordion(children=(VBox(children=(VBox(children=(Label(value='STDOUT'), Output())), VBox(children=(Label(valu…

Dashboard started at http://192.168.1.3:8501 .


<Popen: returncode: None args: ['streamlit', 'run', '--server.headless=True'...>