# DSPY SETTINGS

In [None]:
! pip install dspy-ai==2.1.9 weaviate-client==3.26.2 > /dev/null

In [1]:
import openai
openai.api_key = ""

In [2]:
import dspy 
from dspy.retrieve.weaviate_rm import WeaviateRM
import weaviate
import openai

llm = dspy.OpenAI(model="gpt-3.5-turbo")

weaviate_client = weaviate.Client(url = "URL",
                                  auth_client_secret=weaviate.auth.AuthApiKey("API"),
                                  additional_headers={
                                      'X-Cohere-Api-Key': "API"
                                  }
)
                              
retreiver_model = WeaviateRM("WeaviateBlogChunk", weaviate_client=weaviate_client)
dspy.settings.configure(lm=llm, rm=retreiver_model)

  from .autonotebook import tqdm as notebook_tqdm
            Consider upgrading to the new and improved v4 client instead!
            See here for usage: https://weaviate.io/developers/weaviate/client-libraries/python
            


In [3]:
print(dspy.settings.lm("Write a 3 line poem about neural networks"))
context_example = dspy.OpenAI(model="gpt-4")

with dspy.context(llm=ConnectionAbortedError):
    print(context_example("Write a 3 line poem about neural networks"))

['Wires of thought entwine,\nConnections spark and align,\nNeural networks divine.']
['In the realm of silicon minds, they learn and grow,\nNeural networks, in data streams, secrets they sow.\nA dance of algorithms, in binary codes they flow.']


# 1. Datasets With dspY

In [4]:
import re

f = open("faq.md")
markdown_content = f.read()


def parse_questions(markdown_content):
    question_pattern = r'#### Q: (.+?)\n'

    questions = re.findall(question_pattern, markdown_content, re.DOTALL)

    return questions

questions = parse_questions(markdown_content)

questions[:5]

['Why would I use Weaviate as my vector database?',
 'What is the difference between Weaviate and for example Elasticsearch?',
 'Do you offer Weaviate as a managed service?',
 'How should I configure the size of my instance?',
 'Do I need to know about Docker (Compose) to use Weaviate?']

In [5]:
len(questions)

44

# Wrap each Question with Dspy.Example

In [6]:
import dspy 

trainset = questions[:20]
devset = questions[20:30]
testset = questions[30:]


trainset = [dspy.Example(question=question).with_inputs("question") for question in trainset]
devset = [dspy.Example(question=question).with_inputs("question") for question in devset]
trainset = [dspy.Example(question=question).with_inputs("question") for question in testset]

In [7]:
devset

[Example({'question': 'Is there support to multiple versions of the query/document embedding models to co-exist at a given time? (helps with live experiments of new model versions)'}) (input_keys={'question'}),
 Example({'question': 'How can I retrieve the total object count in a class?'}) (input_keys={'question'}),
 Example({'question': "How do I get the cosine similarity from Weaviate's certainty?"}) (input_keys={'question'}),
 Example({'question': 'The quality of my search results change depending on the specified limit. Why? How can I fix this?'}) (input_keys={'question'}),
 Example({'question': 'Why did you use GraphQL instead of SPARQL?'}) (input_keys={'question'}),
 Example({'question': 'What is the best way to iterate through objects? Can I do paginated API calls?'}) (input_keys={'question'}),
 Example({'question': 'What is best practice for updating data?'}) (input_keys={'question'}),
 Example({'question': 'Can I connect my own module?'}) (input_keys={'question'}),
 Example({'

# 2. LLM Metrics

In [8]:
metricLM = dspy.OpenAI(model='gpt-4', max_tokens=1000, model_type='chat')

class Assess(dspy.Signature):
    """Assess the  quality of an answer to a question"""

    context = dspy.InputField(desc="The context for answering the question.")
    assessed_question = dspy.InputField(desc="The Evaluation criterion.")
    assessed_answer = dspy.InputField(desc="The answer to the question.")
    assessment_answer = dspy.OutputField(desc="A rating between 1 and 5. Only output the rating and nothing else.")


def llm_metric(gold, pred, trace=None):
    predicted_answer = pred.answer 
    question = gold.question 
    
    print(f"Test Question: {question}")
    print(f"Predicted Answer: {predicted_answer}")

    detail = "Is the assessed answer detailed?"
    faithful = "Is the assessed text grounded in the context? Say no if it includes significant facts not in the context."
    overall = f"Please rate how well this answer answers the question, `{question}` based on the context.\n `{predicted_answer}`"
    
    with dspy.context(lm=metricLM):
        context = dspy.Retrieve(k=5)(question).passages
        detail = dspy.ChainOfThought(Assess)(context="N/A", assessed_question=detail, assessed_answer=predicted_answer)
        faithful = dspy.ChainOfThought(Assess)(context=context, assessed_question=faithful, assessed_answer=predicted_answer)
        overall = dspy.ChainOfThought(Assess)(context=context, assessed_question=overall, assessed_answer=predicted_answer)
    
    print(f"Faithful: {faithful.assessment_answer}")
    print(f"Detail: {detail.assessment_answer}")
    print(f"Overall: {overall.assessment_answer}")
    
    
    total = float(detail.assessment_answer) + float(faithful.assessment_answer)*2 + float(overall.assessment_answer)
    
    return total / 5.0




In [9]:
test_example = dspy.Example(question="What do cross encoders do?")
test_pred = dspy.Example(answer="They re-rank documents.")

type(llm_metric(test_example, test_pred))

Test Question: What do cross encoders do?
Predicted Answer: They re-rank documents.


UnexpectedStatusCodeError: Query was not successful! Unexpected status code: 502, with response body: None.

In [10]:
metricLM.inspect_history(n=3)

# 3. The DSPy Programming MOdel

In [12]:
class GenerateAnswer(dspy.Signature):
    """Answer questions based on the context."""

    context = dspy.InputField(desc="may contain relevant facts")
    question = dspy.InputField()
    answer = dspy.OutputField()



In [13]:
class RAG(dspy.Module):
    def __init__(self, num_passages=3):
        super().__init__()

        self.retrieve = dspy.Retrieve(k=num_passages)
        self.generate_answer = dspy.ChainOfThought(GenerateAnswer)
    
    def forward(self, question):
        context = self.retrieve(question).passages
        prediction = self.generate_answer(context=context, question=question)
        return dspy.Prediction(answer=prediction.answer)

In [17]:
dspy.Predict(GenerateAnswer)(question="What are Cross-Encoders?")
llm.inspect_history(n=1)





Answer questions based on the context.

---

Follow the following format.

Context: may contain relevant facts
Question: ${question}
Answer: ${answer}

---

Question: What are Cross-Encoders?
Answer:[32m Context: Cross-Encoders are a type of neural network architecture commonly used in natural language processing tasks.

Question: What are Cross-Encoders?
Answer: Cross-Encoders are neural networks that process both input sequences simultaneously to generate a single output, allowing them to capture complex interactions between the input sequences.[0m





In [18]:
dspy.ChainOfThought(GenerateAnswer)(question="What are Cross Encoders?")
llm.inspect_history(n=1)





Answer questions based on the context.

---

Follow the following format.

Context: may contain relevant facts

Question: ${question}

Reasoning: Let's think step by step in order to ${produce the answer}. We ...

Answer: ${answer}

---

Context:
produce the answer. We know that Cross Encoders are a type of neural network architecture commonly used in natural language processing tasks. They are designed to take two input sequences and produce a single output, typically used for tasks like sentence similarity or paraphrase detection.

Answer: Cross Encoders are a type of neural network architecture used in natural language processing tasks to process two input sequences and produce a single output.

Question: What are Cross Encoders?

Reasoning: Let's think step by step in order to[32m produce the answer. We know that Cross Encoders are a type of neural network architecture commonly used in natural language processing tasks. They are designed to take two input sequences and produce

In [21]:
dspy.ReAct(GenerateAnswer, tools=[dspy.settings.rm])(question="What are cross encoders?")
llm.inspect_history(n=2)





You will be given `context`, `question` and you will respond with `answer`.

To do this, you will interleave Thought, Action, and Observation steps.

Thought can reason about the current situation, and Action can be the following types:

(1) Search[query], which takes a search query and returns one or more potentially relevant passages from a corpus
(2) Finish[answer], which returns the final `answer` and finishes the task

---

Follow the following format.

Context: may contain relevant facts

Question: ${question}

Thought 1: next steps to take based on last observation

Action 1: always either Search[query] or, when done, Finish[answer]

Observation 1: observations based on action

Thought 2: next steps to take based on last observation

Action 2: always either Search[query] or, when done, Finish[answer]

Observation 2: observations based on action

Thought 3: next steps to take based on last observation

Action 3: always either Search[query] or, when done, Finish[answer]

Obser

# Intializa DSPy Program

In [26]:
uncompiled_rag = RAG()

In [27]:
print(uncompiled_rag("What are re-rankers in search engines?").answer)

Re-rankers in search engines are mechanisms that refine the relevance of search results by using different approaches such as content-based re-ranking (Cross Encoders) or context-based re-ranking using symbolic features (Metadata Rankers).


In [28]:
llm.inspect_history(n=1)





Answer questions based on the context.

---

Follow the following format.

Context: may contain relevant facts

Question: ${question}

Reasoning: Let's think step by step in order to ${produce the answer}. We ...

Answer: ${answer}

---

Context:
[1] «They offer the advantage of further reasoning about the relevance of results without needing specialized training. Cross Encoders can be interfaced with Weaviate to re-rank search results, trading off performance for slower search speed. * **Metadata Rankers** are context-based re-rankers that use symbolic features to rank relevance. They take into account user and document features, such as age, gender, location, preferences, release year, genre, and box office, to predict the relevance of candidate documents. By incorporating metadata features, these rankers offer a more personalized and context-aware search experience.»
[2] «However, most of the LLM APIs don’t actually give us these probabilities. Further, this is probably pretty s

# 4. DSPy Optimization

In [29]:
devset[0]

Example({'question': 'Is there support to multiple versions of the query/document embedding models to co-exist at a given time? (helps with live experiments of new model versions)'}) (input_keys={'question'})

In [31]:
from dspy.evaluate.evaluate import Evaluate

evaluate = Evaluate(devset=devset, num_threads=1, display_progress=True, display_table=5)

evaluate(RAG(), metric=llm_metric)



Error for example in dev set: 		 Query was not successful! Unexpected status code: 502, with response body: None.
Test Question: How can I retrieve the total object count in a class?
Predicted Answer: To retrieve the total object count in a class in Weaviate, you can use the endpoint `/v1/objects/{ClassName}` where `{ClassName}` is the name of the class you want to retrieve the object count for.




Faithful: 1
Detail: 5
Overall: 3
Test Question: How do I get the cosine similarity from Weaviate's certainty?
Predicted Answer: To get the cosine similarity from Weaviate's certainty, you can directly use the certainty value as an approximation of the cosine similarity.




Faithful: 1
Detail: 3
Overall: 5
Test Question: The quality of my search results change depending on the specified limit. Why? How can I fix this?
Predicted Answer: The quality of search results can change depending on where relevant information is placed in the search results. To potentially fix this, experiment with different placements of relevant information in the search results to see how it impacts the quality




Faithful: 5
Detail: 5
Overall: 2
Test Question: Why did you use GraphQL instead of SPARQL?
Predicted Answer: The decision to use GraphQL instead of SPARQL was likely influenced by the fact that GraphQL allows for more efficient traversal of the hierarchical representation used in HNSW. SPARQL, being a query language for RDF databases, may not be as well-suited for this specific hierarchical structure. Additionally, GraphQL's flexibility and ability to handle complex nested queries may have made it a more suitable choice for navigating through the layers of the graph in HNSW.




Faithful: 1
Detail: 5
Overall: 2
Test Question: What is the best way to iterate through objects? Can I do paginated API calls?
Predicted Answer: The best way to iterate through objects in Weaviate is to use paginated API calls. This allows you to retrieve a limited number of objects at a time, reducing memory allocation and improving performance. By implementing pagination, you can efficiently process objects without overwhelming the system with unnecessary memory usage.




Faithful: 1
Detail: 5
Overall: 1
Test Question: What is best practice for updating data?
Predicted Answer: The best practice for updating data includes adding a deduplication process, running the recovery process in parallel, and implementing a mechanism to flush idle memtables after 60 seconds.




Faithful: 5
Detail: 5
Overall: 5
Test Question: Can I connect my own module?
Predicted Answer: No, the context mentions that Weaviate coordinates efforts around data imports, updates, queries, etc., and delegates requests to specific modules like the Hugging Face module. It also states that every new Weaviate instance created with Weaviate Cloud Services has the Hugging Face module enabled by default. Therefore, it does not seem like users can connect their own modules to Weaviate.




Faithful: 1
Detail: 5
Overall: 3
Test Question: Can I train my own text2vec-contextionary vectorizer module?
Predicted Answer: No, the Weaviate platform does not currently support training custom text2vec-contextionary vectorizer modules. However, it does support using custom transformer models that are compatible with Hugging Face's `AutoModel` and `AutoTokenizer`.




Faithful: 2
Detail: 5
Overall: 5
Test Question: Does Weaviate use Hnswlib?
Predicted Answer: No, Weaviate does not use Hnswlib.


Average Metric: 21.6 / 10  (216.0): 100%|██████████| 10/10 [04:02<00:00, 24.27s/it]

Faithful: 1
Detail: 3
Overall: 5
Average Metric: 21.6 / 10  (216.0%)



  df = df.applymap(truncate_cell)


Unnamed: 0,question,llm_metric,answer
0,Is there support to multiple versions of the query/document embedding models to co-exist at a given time? (helps with live experiments of new model versions),0.0,
1,How can I retrieve the total object count in a class?,2.0,"To retrieve the total object count in a class in Weaviate, you can use the endpoint `/v1/objects/{ClassName}` where `{ClassName}` is the name of the class..."
2,How do I get the cosine similarity from Weaviate's certainty?,2.0,"To get the cosine similarity from Weaviate's certainty, you can directly use the certainty value as an approximation of the cosine similarity."
3,The quality of my search results change depending on the specified limit. Why? How can I fix this?,3.4,"The quality of search results can change depending on where relevant information is placed in the search results. To potentially fix this, experiment with different..."
4,Why did you use GraphQL instead of SPARQL?,1.8,The decision to use GraphQL instead of SPARQL was likely influenced by the fact that GraphQL allows for more efficient traversal of the hierarchical representation...


216.0