# Pinecone + Custom relevance function

In [1]:
import os
from dotenv import load_dotenv
import pinecone
from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from langchain.llms import HuggingFacePipeline
from langchain.vectorstores import Pinecone as LangchainPinecone
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chains import RetrievalQA
from trulens_eval import TruChain, Feedback, Tru
import torch

In [2]:
import os
from dotenv import load_dotenv
load_dotenv()
api_key = os.environ.get("PINECONE_API_KEY")

In [3]:
from pinecone import Pinecone
pc = Pinecone(api_key=api_key)

In [4]:
embed_model = SentenceTransformer('all-MiniLM-L6-v2')
embeddings = HuggingFaceEmbeddings(model_name='all-MiniLM-L6-v2')

In [5]:
model_name = "facebook/opt-1.3b"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

In [6]:
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_length=512,
    temperature=0.7,
    top_p=0.95,
    repetition_penalty=1.15
)

In [7]:
llm = HuggingFacePipeline(pipeline=pipe)

In [9]:
index_name = "rag-example"
from pinecone import Pinecone, ServerlessSpec
if index_name not in pc.list_indexes():
    pc.create_index(
    name=index_name,
    dimension=384,
    metric='cosine',
    spec=ServerlessSpec(
        cloud='aws',
        region='us-east-1'
    )
)

# Get the index
index = pc.Index(index_name)

In [10]:
texts = [
    "Pinecone is a vector database that makes it easy to build high-performance vector search applications.",
    "TruLens is a tool for evaluating and tracking LLM experiments.",
    "RAG stands for Retrieval-Augmented Generation, a technique that combines retrieval and generation for better AI responses."
]

In [11]:
for i, text in enumerate(texts):
    vector = embed_model.encode([text])[0].tolist()
    index.upsert(vectors=[(str(i), vector, {"text": text})])

In [12]:
vectorstore = LangchainPinecone(index, embeddings.embed_query, "text")

  warn_deprecated(


In [13]:
qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vectorstore.as_retriever()
)

In [14]:
tru = Tru()

# Define a custom relevance function
def relevance_function(query, response):
    # This is a placeholder. Replace with actual relevance logic.
    return 0.5  # Returns a score between 0 and 1

# Define feedback function
relevance = Feedback(relevance_function).on_input_output()

# Create TruChain
truchain = TruChain(
    qa,
    app_id='RAG_Pinecone_Example',
    feedbacks=[relevance]
)

# Sample queries
queries = [
    "What is Pinecone?",
    "How can TruLens help with LLM experiments?",
    "Explain RAG in simple terms."
]

# Process queries and collect feedback
for query in queries:
    with truchain as recording:
        response = qa({"query": query})
    print(f"Query: {query}")
    print(f"Response: {response['result']}")
    print("---")

# Run TruLens dashboard
tru.run_dashboard()

Feedback implementation <function relevance_function at 0x000001F89CDB6DE0> cannot be serialized: Module __main__ is not importable. This may be ok unless you are using the deferred feedback mode.


🦑 Tru initialized with db url sqlite:///default.sqlite .
🛑 Secret keys may be written to the database. See the `database_redact_keys` option of `Tru` to prevent this.
✅ In relevance_function, input query will be set to __record__.main_input or `Select.RecordInput` .
✅ In relevance_function, input response will be set to __record__.main_output or `Select.RecordOutput` .


Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


Query: What is Pinecone?
Response: Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.



Question: What is Pinecone?
Helpful Answer: Pinecone is a web-based application for creating and sharing 3D models. It's free to use and has no ads or in-app purchases.

Answer: Pinecone is a web-based application for creating and sharing 3D models. It's free to use and has no ads or in-app purchases.

Question: How do I create a model?
Helpful Answer: Create a new model by clicking on the "Create" button. You can also click on the "Add Model" link to add a model from your computer.

Answer: Create a new model by clicking on the "Create" button. You can also click on the "Add Model" link to add a model from your computer.

Question: How do I share my model with others?
Helpful Answer: Share your model by clicking on the "Share" icon next to the model name.

Answer: Share your model by cli



Query: How can TruLens help with LLM experiments?
Response: Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

TruLens is a tool for evaluating and tracking LLM experiments.

RAG stands for Retrieval-Augmented Generation, a technique that combines retrieval and generation for better AI responses.

Pinecone is a vector database that makes it easy to build high-performance vector search applications.

Question: How can TruLens help with LLM experiments?
Helpful Answer: TruLens helps researchers evaluate their experimental designs by providing them with a set of tools that allow them to quickly generate new data sets from existing datasets. This allows researchers to test hypotheses in a more efficient manner than they would be able to do otherwise.

How does TruLens work?
TruLens uses a combination of machine learning techniques and traditional statistical methods to create a 



Query: Explain RAG in simple terms.
Response: Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

RAG stands for Retrieval-Augmented Generation, a technique that combines retrieval and generation for better AI responses.

TruLens is a tool for evaluating and tracking LLM experiments.

Pinecone is a vector database that makes it easy to build high-performance vector search applications.

Question: Explain RAG in simple terms.
Helpful Answer:
Rag is a technique used by researchers to improve their AI systems. It involves combining retrieval with generation. The goal is to find the best possible solution from a set of data points. This can be done using a combination of different techniques such as neural networks, reinforcement learning, or even genetic algorithms.

The idea behind Rag is to combine two methods together to get the best possible result. For example, if we have a

Accordion(children=(VBox(children=(VBox(children=(Label(value='STDOUT'), Output())), VBox(children=(Label(valu…

Dashboard started at http://192.168.1.3:8501 .


<Popen: returncode: None args: ['streamlit', 'run', '--server.headless=True'...>