# Project

In [1]:
import os
import time
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Pinecone
from langchain.text_splitter import CharacterTextSplitter
from langchain.llms import HuggingFacePipeline
from langchain.chains import RetrievalQA
from langchain.document_loaders import PyPDFLoader
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
from sklearn.metrics.pairwise import cosine_similarity
from trulens_eval import TruChain, Feedback, Tru
import numpy as np
from IPython.display import display
from pinecone import Pinecone

In [2]:
tru = Tru()
tru.reset_database()

🦑 Tru initialized with db url sqlite:///default.sqlite .
🛑 Secret keys may be written to the database. See the `database_redact_keys` option of `Tru` to prevent this.


In [3]:
from dotenv import load_dotenv
load_dotenv()
api_key = os.environ.get("PINECONE_API_KEY")
pc = Pinecone(api_key=api_key)

In [45]:
from sentence_transformers import SentenceTransformer

# Load the model
Transformer_Name='all-distilroberta-v1'
embeddings_model = SentenceTransformer(Transformer_Name)
embeddings = HuggingFaceEmbeddings(model_name=Transformer_Name)

# Access the Hugging Face Transformer model
hf_model = embeddings_model._first_module().auto_model

# Get the word embedding dimension
word_embedding_dimension = hf_model.config.hidden_size
print("Embeddings model loaded")

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.3k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/653 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/328M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/333 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Embeddings model loaded


In [46]:
index_name = "rag"
if index_name in pc.list_indexes():
    print(f"Deleting existing index: {index_name}")
    pc.delete_index(index_name)
    # Wait for the index to be fully deleted
    while index_name in pc.list_indexes():
        time.sleep(1)

In [49]:
# Create new Pinecone index
print(f"Creating new index: {index_name}")
from pinecone import Pinecone, ServerlessSpec

pc.create_index(
    name=index_name,
    dimension=word_embedding_dimension,
    metric='cosine',
    spec=ServerlessSpec(
        cloud='aws',
        region='us-east-1'
    )
)

print(f"Index {index_name} is ready")

Creating new index: rag
Index rag is ready


In [50]:
from PyPDF2 import PdfReader
def extract_text_from_pdf(pdf_path):
    with open(pdf_path, 'rb') as file:
        pdf = PdfReader(file)
        text = ""
        for page in pdf.pages:
            text += page.extract_text()
    return text

pdf_path = "D:/TruLens/attention.pdf"
raw_text = extract_text_from_pdf(pdf_path)

In [51]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200,
    length_function=len
)
texts = text_splitter.split_text(raw_text)

In [52]:
from langchain.schema import Document
documents = [Document(page_content=t) for t in texts]

In [53]:
index = pc.Index(index_name)
for i, doc in enumerate(documents):
    embedding = embeddings_model.encode(doc.page_content).tolist()
    index.upsert(vectors=[(str(i), embedding, {"text": doc.page_content})])

print(f"Successfully stored {len(documents)} document chunks in Pinecone index '{index_name}'")

Successfully stored 41 document chunks in Pinecone index 'rag'


In [54]:
from langchain.vectorstores import Pinecone as LangchainPinecone
vectorstore = LangchainPinecone(index, embeddings.embed_query, "text")



In [55]:
model_name = "google/flan-t5-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
print("Language model loaded")

Language model loaded


In [56]:
pipe = pipeline(
    "text2text-generation",
    model=model,
    tokenizer=tokenizer,
    max_length=512,
    device="cpu"  # Use "cuda" if you have a GPU
)

In [57]:
llm = HuggingFacePipeline(pipeline=pipe)
print("Pipeline setup complete")

Pipeline setup complete


In [58]:
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vectorstore.as_retriever(),
    return_source_documents=True
)
print("QA chain created")

QA chain created


In [59]:
from trulens_eval.feedback.provider.litellm import LiteLLM
provider = LiteLLM(model_engine="ollama/llama2")

In [60]:
from trulens_eval.app import App
context = App.select_context(qa_chain)

# Define a groundedness feedback function
f_groundedness = (
    Feedback(provider.groundedness_measure_with_cot_reasons,name="Groundedness")
    .on(context.collect()) # collect context chunks into a list
    .on_output()
)

# Question/answer relevance between overall question and answer.
f_answer_relevance = (
    Feedback(provider.relevance,name="Q/A relevance")
    .on_input_output()
)
# Question/statement relevance between question and each context chunk.
f_context_relevance = (
    Feedback(provider.context_relevance_with_cot_reasons,name="Q/Context relevance")
    .on_input()
    .on(context)
    .aggregate(np.mean)
)

✅ In Groundedness, input source will be set to __record__.app.retriever.invoke.rets[:].page_content.collect() .
✅ In Groundedness, input statement will be set to __record__.main_output or `Select.RecordOutput` .
✅ In Q/A relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In Q/A relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .
✅ In Q/Context relevance, input question will be set to __record__.main_input or `Select.RecordInput` .
✅ In Q/Context relevance, input context will be set to __record__.app.retriever.invoke.rets[:].page_content .


In [61]:
app_id = f"Rag Using {Transformer_Name}"

In [62]:
questions = [
    "What is the main topic of the document?",
    "Who is the author of this document?",
    # Add more questions as needed
]

In [63]:
tru_recorder = TruChain(
    qa_chain,
    app_id=app_id,
    feedbacks=[f_context_relevance,f_answer_relevance,f_groundedness]
)
print("TruChain created")

TruChain created


In [64]:
for question in questions:
    with tru_recorder as recording:
        response = qa_chain({"query": question})
    
    print(f"\nQuestion: {question}")
    print(f"Answer: {response['result']}")
    
    # Retrieve the record of the app invocation
    rec = recording.get()  
    
    # Display the record
    display(rec)
    
    # Wait for feedback results and print them
    print("\nFeedback Results:")
    for feedback, feedback_result in rec.wait_for_feedback_results().items():
        print(f"{feedback.name}: {feedback_result.result}")


Question: What is the main topic of the document?
Answer: age recognition.


Record(record_id='record_hash_0d0c7815063d1f57861443459a01e732', app_id='Rag Using all-distilroberta-v1', cost=Cost(n_requests=0, n_successful_requests=0, n_classes=0, n_tokens=0, n_stream_chunks=0, n_prompt_tokens=0, n_completion_tokens=0, cost=0.0), perf=Perf(start_time=datetime.datetime(2024, 7, 6, 19, 32, 9, 511191), end_time=datetime.datetime(2024, 7, 6, 19, 32, 17, 728650)), ts=datetime.datetime(2024, 7, 6, 19, 32, 17, 728650), tags='-', meta=None, main_input='What is the main topic of the document?', main_output='age recognition.', main_error=None, calls=[RecordAppCall(call_id='e1107a89-fdb0-4000-b63a-9b60a50be9af', stack=[RecordAppCallMethod(path=Lens().app, method=Method(obj=Obj(cls=langchain.chains.retrieval_qa.base.RetrievalQA, id=2590121368592, init_bindings=None), name='__call__')), RecordAppCallMethod(path=Lens().app, method=Method(obj=Obj(cls=langchain.chains.retrieval_qa.base.RetrievalQA, id=2590121368592, init_bindings=None), name='invoke')), RecordAppCallMethod(path=L


Feedback Results:
Q/Context relevance: 0.625
Q/A relevance: 0.7
Groundedness: -1.0

Question: Who is the author of this document?
Answer: [11]


Record(record_id='record_hash_f61e45a8d0fe4c8bdf94277e7eeb4a78', app_id='Rag Using all-distilroberta-v1', cost=Cost(n_requests=0, n_successful_requests=0, n_classes=0, n_tokens=0, n_stream_chunks=0, n_prompt_tokens=0, n_completion_tokens=0, cost=0.0), perf=Perf(start_time=datetime.datetime(2024, 7, 6, 19, 38, 7, 899757), end_time=datetime.datetime(2024, 7, 6, 19, 38, 15, 600708)), ts=datetime.datetime(2024, 7, 6, 19, 38, 15, 601708), tags='-', meta=None, main_input='Who is the author of this document?', main_output='[11]', main_error=None, calls=[RecordAppCall(call_id='ad3c138c-7976-4386-87e3-a5c96664c922', stack=[RecordAppCallMethod(path=Lens().app, method=Method(obj=Obj(cls=langchain.chains.retrieval_qa.base.RetrievalQA, id=2590121368592, init_bindings=None), name='__call__')), RecordAppCallMethod(path=Lens().app, method=Method(obj=Obj(cls=langchain.chains.retrieval_qa.base.RetrievalQA, id=2590121368592, init_bindings=None), name='invoke')), RecordAppCallMethod(path=Lens().app, metho


Feedback Results:
Q/Context relevance: 0.625
Q/A relevance: 1.0
Groundedness: -1.0


In [65]:
records, feedback = tru.get_records_and_feedback(app_ids=[app_id])
records.to_numpy

<bound method DataFrame.to_numpy of                            app_id  \
0  Rag Using all-distilroberta-v1   
1  Rag Using all-distilroberta-v1   

                                            app_json  \
0  {"tru_class_info": {"name": "TruChain", "modul...   
1  {"tru_class_info": {"name": "TruChain", "modul...   

                                              type  \
0  RetrievalQA(langchain.chains.retrieval_qa.base)   
1  RetrievalQA(langchain.chains.retrieval_qa.base)   

                                      record_id  \
0  record_hash_0d0c7815063d1f57861443459a01e732   
1  record_hash_f61e45a8d0fe4c8bdf94277e7eeb4a78   

                                       input              output tags  \
0  "What is the main topic of the document?"  "age recognition."    -   
1      "Who is the author of this document?"              "[11]"    -   

                                         record_json  \
0  {"record_id": "record_hash_0d0c7815063d1f57861...   
1  {"record_id": "record_hash_f61e

In [66]:
tru.run_dashboard()

Starting dashboard ...
Config file already exists. Skipping writing process.
Credentials file already exists. Skipping writing process.
Dashboard already running at path:   Network URL: http://192.168.1.5:8501



<Popen: returncode: None args: ['streamlit', 'run', '--server.headless=True'...>

In [67]:
def delete_pinecone_index(index_name):
    try:
        if index_name in pc.list_indexes().names():
            pc.delete_index(index_name)
            print(f"Successfully deleted Pinecone index: {index_name}")
        else:
            print(f"Index {index_name} does not exist or has already been deleted.")
    except Exception as e:
        print(f"Error deleting Pinecone index: {str(e)}")
delete_pinecone_index(index_name)

Successfully deleted Pinecone index: rag
