# Pinecone + Trulens on RAG

In [1]:
import os
import time
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Pinecone
from langchain.text_splitter import CharacterTextSplitter
from langchain.llms import HuggingFacePipeline
from langchain.chains import RetrievalQA
from langchain.document_loaders import PyPDFLoader
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
from sklearn.metrics.pairwise import cosine_similarity
from trulens_eval import TruChain, Feedback, Tru
import numpy as np
from IPython.display import display
from pinecone import Pinecone

In [2]:
tru = Tru()
tru.reset_database()

🦑 Tru initialized with db url sqlite:///default.sqlite .
🛑 Secret keys may be written to the database. See the `database_redact_keys` option of `Tru` to prevent this.


In [3]:
from dotenv import load_dotenv
load_dotenv()
api_key = os.environ.get("PINECONE_API_KEY")

In [4]:
pc = Pinecone(api_key=api_key)

In [5]:
index_name = "rag"

In [6]:
if index_name in pc.list_indexes():
    print(f"Deleting existing index: {index_name}")
    pc.delete_index(index_name)
    # Wait for the index to be fully deleted
    while index_name in pc.list_indexes():
        time.sleep(1)

In [7]:
# Create new Pinecone index
print(f"Creating new index: {index_name}")
from pinecone import Pinecone, ServerlessSpec

pc.create_index(
    name=index_name,
    dimension=384,
    metric='cosine',
    spec=ServerlessSpec(
        cloud='aws',
        region='us-east-1'
    )
)

print(f"Index {index_name} is ready")

Creating new index: rag
Index rag is ready


In [8]:
from PyPDF2 import PdfReader
def extract_text_from_pdf(pdf_path):
    with open(pdf_path, 'rb') as file:
        pdf = PdfReader(file)
        text = ""
        for page in pdf.pages:
            text += page.extract_text()
    return text

pdf_path = "D:/TruLens/attention.pdf"
raw_text = extract_text_from_pdf(pdf_path)

In [9]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200,
    length_function=len
)
texts = text_splitter.split_text(raw_text)

In [10]:
from langchain.schema import Document
documents = [Document(page_content=t) for t in texts]

In [27]:
from sentence_transformers import SentenceTransformer
embeddings_model = SentenceTransformer('all-MiniLM-L6-v2')
embeddings = HuggingFaceEmbeddings(model_name='all-MiniLM-L6-v2')
print("Embeddings model loaded")

Embeddings model loaded


SentenceTransformer(
  (0): Transformer({'max_seq_length': 256, 'do_lower_case': False}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
  (2): Normalize()
)

In [12]:
index = pc.Index(index_name)
for i, doc in enumerate(documents):
    embedding = embeddings_model.encode(doc.page_content).tolist()
    index.upsert(vectors=[(str(i), embedding, {"text": doc.page_content})])

print(f"Successfully stored {len(documents)} document chunks in Pinecone index '{index_name}'")

Successfully stored 41 document chunks in Pinecone index 'rag'


In [13]:
from langchain.vectorstores import Pinecone as LangchainPinecone
vectorstore = LangchainPinecone(index, embeddings.embed_query, "text")

  warn_deprecated(


In [14]:
model_name = "google/flan-t5-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
print("Language model loaded")

Language model loaded


In [15]:
pipe = pipeline(
    "text2text-generation",
    model=model,
    tokenizer=tokenizer,
    max_length=512,
    device="cpu"  # Use "cuda" if you have a GPU
)

In [16]:
llm = HuggingFacePipeline(pipeline=pipe)
print("Pipeline setup complete")

Pipeline setup complete


In [17]:
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vectorstore.as_retriever(),
    return_source_documents=True
)
print("QA chain created")

QA chain created


In [18]:
from trulens_eval.feedback.provider.litellm import LiteLLM
provider = LiteLLM(model_engine="ollama/llama2")

In [19]:
from trulens_eval.app import App
context = App.select_context(qa_chain)

# Define a groundedness feedback function
f_groundedness = (
    Feedback(provider.groundedness_measure_with_cot_reasons,name="Groundedness")
    .on(context.collect()) # collect context chunks into a list
    .on_output()
)

# Question/answer relevance between overall question and answer.
f_answer_relevance = (
    Feedback(provider.relevance,name="Q/A relevance")
    .on_input_output()
)
# Question/statement relevance between question and each context chunk.
f_context_relevance = (
    Feedback(provider.context_relevance_with_cot_reasons,name="Q/Context relevance")
    .on_input()
    .on(context)
    .aggregate(np.mean)
)

✅ In Groundedness, input source will be set to __record__.app.retriever.invoke.rets[:].page_content.collect() .
✅ In Groundedness, input statement will be set to __record__.main_output or `Select.RecordOutput` .
✅ In Q/A relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In Q/A relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .
✅ In Q/Context relevance, input question will be set to __record__.main_input or `Select.RecordInput` .
✅ In Q/Context relevance, input context will be set to __record__.app.retriever.invoke.rets[:].page_content .


In [20]:
app_id = "RAG_QA_Chain"

In [21]:
questions = [
    "What is the main topic of the document?",
    "Who is the author of this document?",
    # Add more questions as needed
]

In [22]:
tru_recorder = TruChain(
    qa_chain,
    app_id=app_id,
    feedbacks=[f_context_relevance,f_answer_relevance,f_groundedness]
)
print("TruChain created")

TruChain created


In [23]:
for question in questions:
    with tru_recorder as recording:
        response = qa_chain({"query": question})
    
    print(f"\nQuestion: {question}")
    print(f"Answer: {response['result']}")
    
    # Retrieve the record of the app invocation
    rec = recording.get()  
    
    # Display the record
    display(rec)
    
    # Wait for feedback results and print them
    print("\nFeedback Results:")
    for feedback, feedback_result in rec.wait_for_feedback_results().items():
        print(f"{feedback.name}: {feedback_result.result}")


Question: What is the main topic of the document?
Answer: Training This section describes the training regime for our models.


Record(record_id='record_hash_7d5f772e734257b34cd9af7479dd4b41', app_id='RAG_QA_Chain', cost=Cost(n_requests=0, n_successful_requests=0, n_classes=0, n_tokens=0, n_stream_chunks=0, n_prompt_tokens=0, n_completion_tokens=0, cost=0.0), perf=Perf(start_time=datetime.datetime(2024, 7, 6, 12, 23, 40, 169418), end_time=datetime.datetime(2024, 7, 6, 12, 23, 47, 695436)), ts=datetime.datetime(2024, 7, 6, 12, 23, 47, 697435), tags='-', meta=None, main_input='What is the main topic of the document?', main_output='Training This section describes the training regime for our models.', main_error=None, calls=[RecordAppCall(call_id='7c561a0f-3975-4bd6-bd6c-ab35ab602ddc', stack=[RecordAppCallMethod(path=Lens().app, method=Method(obj=Obj(cls=langchain.chains.retrieval_qa.base.RetrievalQA, id=2132288035472, init_bindings=None), name='__call__')), RecordAppCallMethod(path=Lens().app, method=Method(obj=Obj(cls=langchain.chains.retrieval_qa.base.RetrievalQA, id=2132288035472, init_bindings=None), name='inv


Feedback Results:
Q/Context relevance: 0.9
Q/A relevance: 0.8
Groundedness: 0.8

Question: Who is the author of this document?
Answer: Nal Kalchbrenner and Stephan Gouws


Record(record_id='record_hash_e938143f543d7faabb32670f5c795d89', app_id='RAG_QA_Chain', cost=Cost(n_requests=0, n_successful_requests=0, n_classes=0, n_tokens=0, n_stream_chunks=0, n_prompt_tokens=0, n_completion_tokens=0, cost=0.0), perf=Perf(start_time=datetime.datetime(2024, 7, 6, 12, 28, 20, 941996), end_time=datetime.datetime(2024, 7, 6, 12, 28, 28, 815213)), ts=datetime.datetime(2024, 7, 6, 12, 28, 28, 817213), tags='-', meta=None, main_input='Who is the author of this document?', main_output='Nal Kalchbrenner and Stephan Gouws', main_error=None, calls=[RecordAppCall(call_id='5edfd327-c563-4ed2-9524-d1cd3776ade3', stack=[RecordAppCallMethod(path=Lens().app, method=Method(obj=Obj(cls=langchain.chains.retrieval_qa.base.RetrievalQA, id=2132288035472, init_bindings=None), name='__call__')), RecordAppCallMethod(path=Lens().app, method=Method(obj=Obj(cls=langchain.chains.retrieval_qa.base.RetrievalQA, id=2132288035472, init_bindings=None), name='invoke')), RecordAppCallMethod(path=Lens


Feedback Results:
Q/Context relevance: 0.725
Q/A relevance: 0.8
Groundedness: 0.8


In [24]:
records, feedback = tru.get_records_and_feedback(app_ids=[app_id])
records.to_numpy

<bound method DataFrame.to_numpy of          app_id                                           app_json  \
0  RAG_QA_Chain  {"tru_class_info": {"name": "TruChain", "modul...   
1  RAG_QA_Chain  {"tru_class_info": {"name": "TruChain", "modul...   

                                              type  \
0  RetrievalQA(langchain.chains.retrieval_qa.base)   
1  RetrievalQA(langchain.chains.retrieval_qa.base)   

                                      record_id  \
0  record_hash_7d5f772e734257b34cd9af7479dd4b41   
1  record_hash_e938143f543d7faabb32670f5c795d89   

                                       input  \
0  "What is the main topic of the document?"   
1      "Who is the author of this document?"   

                                              output tags  \
0  "Training This section describes the training ...    -   
1               "Nal Kalchbrenner and Stephan Gouws"    -   

                                         record_json  \
0  {"record_id": "record_hash_7d5f772e734257b34cd..

In [25]:
tru.run_dashboard()

Starting dashboard ...
Config file already exists. Skipping writing process.
Credentials file already exists. Skipping writing process.


Accordion(children=(VBox(children=(VBox(children=(Label(value='STDOUT'), Output())), VBox(children=(Label(valu…

Dashboard started at http://192.168.1.5:8501 .


<Popen: returncode: None args: ['streamlit', 'run', '--server.headless=True'...>