# you.com <> dspy: ChatVC

A chatbot that I could ask questions about early-stage investing and any relevants news to potential investment opportunities.

In [None]:
%%capture
! pip install dspy==0.1.5
! pip install dotenv==0.0.5

## Load API keys

In [9]:
# assumes a .env file exists with api keys YDC_API_KEY and OPENAI_API_KEY

from dotenv import load_dotenv

load_dotenv()

True

## Create Language Model (lm)

In [11]:
import dspy

turbo = dspy.OpenAI(model='gpt-4o')
dspy.settings.configure(lm=turbo)

### Signature

Every call to the LM in a DSPy program needs to have a `Signature`.

A signature consists of three simple elements:

A minimal description of the sub-task the LM is supposed to solve.
A description of one or more input fields (e.g., input question) that we will give to the LM.
A description of one or more output fields (e.g., the question's answer) that we will expect from the LM.

In [12]:
class BasicQA(dspy.Signature):
    """Answer questions with wise suggestions"""

    question = dspy.InputField()
    answer = dspy.OutputField(desc="often between 40-50 words")

In [13]:
question = "If you are advising a founder on how they should choose an invester in their company, what qualities should they look for?"

# Define the predictor.
generate_answer = dspy.Predict(BasicQA)

# Call the predictor on a particular input.
pred = generate_answer(question=question)

# Print the input and the prediction.
print(f"Question: {question}")
print(f"Predicted Answer: {pred.answer}")

Question: If you are advising a founder on how they should choose an invester in their company, what qualities should they look for?
Predicted Answer: Question: If you are advising a founder on how they should choose an investor in their company, what qualities should they look for?
Answer: Look for investors who align with your vision, bring industry expertise, and offer valuable networks. Ensure they have a track record of supporting startups and can provide strategic guidance. Compatibility in values and communication style is also crucial for a successful partnership.


## Create Retriever Model (rm)

In [14]:
# TODO has this been merged?

import os
import warnings
from typing import Any, Literal, Optional, Union

import requests

import dspy
from dsp.utils import dotdict


class YouRM(dspy.Retrieve):
    """Retriever for You.com's Search and News API.

    [API reference](https://documentation.you.com/api-reference/)

    Args:
        ydc_api_key: you.com API key, if `YDC_API_KEY` is not set in the environment
        k: If ``endpoint="search"``, the max snippets to return per search hit.
           If ``endpoint="news"``, the max articles to return.
        endpoint: you.com endpoints
        num_web_results: The max number of web results to return, must be under 20
        safesearch: Safesearch settings, one of "off", "moderate", "strict", defaults to moderate
        country: Country code, ex: 'US' for United States, see API reference for more info
        search_lang: (News API) Language codes, ex: 'en' for English, see API reference for more info
        ui_lang: (News API) User interface language for the response, ex: 'en' for English.
                            See API reference for more info
        spellcheck: (News API) Whether to spell check query or not, defaults to True
    """

    def __init__(
        self,
        ydc_api_key: Optional[str] = None,
        k: int = 3,
        endpoint: Literal["search", "news"] = "search",
        num_web_results: Optional[int] = None,
        safesearch: Optional[Literal["off", "moderate", "strict"]] = None,
        country: Optional[str] = None,
        search_lang: Optional[str] = None,
        ui_lang: Optional[str] = None,
        spellcheck: Optional[bool] = None,
    ):
        super().__init__(k=k)

        # Data validation
        if not ydc_api_key and not os.environ.get("YDC_API_KEY"):
            raise RuntimeError('You must supply `ydc_api_key` or set environment variable "YDC_API_KEY"')

        if endpoint not in ("search", "news"):
            raise ValueError('`endpoint` must be either "search" or "news"')

        # Raise warning if News API-specific fields are set but endpoint is not "news"
        if endpoint != "news":
            news_api_fields = (search_lang, ui_lang, spellcheck)
            for field in news_api_fields:
                if field:
                    warnings.warn(
                        (
                            f"News API-specific field '{field}' is set but `{endpoint=}`. "
                            "This will have no effect."
                        ),
                        UserWarning,
                    )

        self.ydc_api_key = ydc_api_key or os.environ.get("YDC_API_KEY")
        self.endpoint = endpoint
        self.num_web_results = num_web_results
        self.safesearch = safesearch
        self.country = country
        self.search_lang = search_lang
        self.ui_lang = ui_lang
        self.spellcheck = spellcheck

    def _generate_params(self, query: str) -> dict[str, Any]:
        params = {"safesearch": self.safesearch, "country": self.country}

        if self.endpoint == "search":
            params.update(
                query=query,
                num_web_results=self.num_web_results,
            )
        elif self.endpoint == "news":
            params.update(
                q=query,
                count=self.num_web_results,
                search_lang=self.search_lang,
                ui_lang=self.ui_lang,
                spellcheck=self.spellcheck,
            )

        # Remove `None` values
        params = {k: v for k, v in params.items() if v is not None}
        return params

    def forward(self, query_or_queries: Union[str, list[str]], k: Optional[int] = None) -> dspy.Prediction:
        k = k if k is not None else self.k

        queries = [query_or_queries] if isinstance(query_or_queries, str) else query_or_queries
        docs: list[str]
        for query in queries:
            headers = {"X-API-Key": self.ydc_api_key}
            params = self._generate_params(query)
            response = requests.get(
                f"https://api.ydc-index.io/{self.endpoint}",
                params=params,
                headers=headers,
            )
            response.raise_for_status()
            results = response.json()

            if self.endpoint == "search":
                docs = [snippet for hits in results["hits"][:k] for snippet in hits["snippets"]]
            elif self.endpoint == "news":
                docs = [article["description"] for article in results["news"]["results"][:k]]
        return [dotdict({"long_text": document}) for document in docs]

In [15]:
# from dspy.retrieve.you_rm import YouRM

news_rm = YouRM(endpoint="news")
res = news_rm("Princeton")
res

[{'long_text': "It's not quite summer yet, though it might as well be ..."},
 {'long_text': 'PRINCETON, NJ - The Princeton wrestling team announced Thursday that the program will be welcoming seven incoming freshman as a part of the Class of 2028.'},
 {'long_text': 'The new true crime series — from the creators of the award-winning podcast "Father Wants Us Dead" — investigates the 1989 cold-case killing of a Princeton grande dame.'}]

### Retrieve

A module `dspy.Retrieve(k)` will search for the top-k passages that match a given query. 
 
By default, this will use the retriever we configure in `dspy.settings.configure()`.

In [16]:
dspy.settings.configure(lm=turbo, rm=news_rm)

In [17]:
question = "What is latest news about Princeton University?"

retrieve = dspy.Retrieve(k=3)
topK_passages = retrieve(question).passages

print(f"Top {retrieve.k} passages for question: {question} \n", '-' * 30, '\n')

for idx, passage in enumerate(topK_passages):
    print(f'{idx+1}]', passage, '\n')

Top 3 passages for question: What is latest news about Princeton University? 
 ------------------------------ 

1] Reunions events begin Thursday, May 23, and run through Sunday, May 26. 

2] More than a dozen students at Princeton University said they were ending their hunger strike amid continued anti-Israel demonstrations at the university. 

3] Over a dozen students at Princeton University have been on hunger strike for the past week as part of a Gaza solidarity encampment on campus protesting Israel’s war on Gaza and calling on the university to disclose and divest from companies with ties to Israel, among other demands. 



## Option 1: RAG

Given a question, we'll search for the latest news through you.com nws API and then feed them as context for answer generation.

### Signature

Let's start by defining this signature: `context, question --> answer.`

In [37]:
class GenerateAnswer(dspy.Signature):
    """Answer questions with the news in the context"""
    context = dspy.InputField(desc="may contain relevant news")
    question = dspy.InputField()
    answer = dspy.OutputField(desc="highlights key points in context - often between 50-100 words")

### Module

* The `__init__` method will simply declare the sub-modules it needs: `dspy.Retrieve` and `dspy.ChainOfThought`. The latter is defined to implement our GenerateAnswer signature.
* The `forward` method will describe the control flow of answering the question using the modules we have.

In [38]:
class RAG(dspy.Module):
    def __init__(self, num_passages=3):
        super().__init__()

        self.retrieve = dspy.Retrieve(k=num_passages)
        self.generate_answer = dspy.ChainOfThought(GenerateAnswer)
    
    def forward(self, question):
        context = self.retrieve(question).passages
        prediction = self.generate_answer(context=context, question=question)
        return dspy.Prediction(context=context, answer=prediction.answer)

### Try it out

In [39]:
my_question = "Princeton"

# Get the prediction. This contains `pred.context` and `pred.answer`.
uncompiled_rag = RAG()  # uncompiled (i.e., zero-shot) program
pred = uncompiled_rag(my_question)

# Print the contexts and the answer.
print(f"Question: {my_question}")
print(f"Predicted Answer: {pred.answer}")
# print(f"Retrieved Contexts (truncated): {[c[:200] + '...' for c in pred.context]}")

Question: Princeton
Predicted Answer: The Princeton wrestling team announced that they will be welcoming seven incoming freshmen as part of the Class of 2028. Additionally, a new true crime series investigates the 1989 cold-case killing of a Princeton grande dame.


inspect the chain of thought for the LM - to iterate and modify signature.

In [40]:
turbo.inspect_history(n=3)





Answer questions with the news in the context

---

Follow the following format.

Context: may contain relevant news

Question: ${question}

Reasoning: Let's think step by step in order to ${produce the answer}. We ...

Answer: often between 50-100 words

---

Context:
[1] «It's not quite summer yet, though it might as well be ...»
[2] «PRINCETON, NJ - The Princeton wrestling team announced Thursday that the program will be welcoming seven incoming freshman as a part of the Class of 2028.»
[3] «The new true crime series — from the creators of the award-winning podcast "Father Wants Us Dead" — investigates the 1989 cold-case killing of a Princeton grande dame.»

Question: Princeton

Reasoning: Let's think step by step in order to[32m Context:
[1] «It's not quite summer yet, though it might as well be ...»
[2] «PRINCETON, NJ - The Princeton wrestling team announced Thursday that the program will be welcoming seven incoming freshman as a part of the Class of 2028.»
[3] «The new true 

## Option 2: ReAct Agent with Tools

* Tools in ReAct can shape the agent's interaction and response mechanisms, and DSPy ensures this customizability by allowing users to pass in their toolsets tailored for their task scenarios. 
* The default tool is the `dspy.Retrieve` module (serving to retrieve information from Retrieval Models during the Action step) with default num_results=3, and these can be passed as arguments to the initialization of the ReAct module.

In [26]:
import dspy

class NewsModule(dspy.Module):
    def __init__(self, num_passages=3):
        super().__init__()
        self.retrieve = dspy.Retrieve(k=num_passages)
        self.generate_answer = dspy.ChainOfThought(GenerateAnswer)
    
    def forward(self, question):
        context = self.retrieve(question).passages
        prediction = self.generate_answer(context=context, question=question)
        return dspy.Prediction(context=context, answer=prediction.answer)

In [None]:
# create a news module

# create a search module

# then, you can just make it a function (one line) and call it inside your DSPy program when needed!
# If you want to use this with dspy.ReAct, then usage would be like:

mytool = NewsModule()
gen = dspy.ReAct('question -> answer', tools=[mytool])

In [28]:
class GenerateResponse(dspy.Signature):
    """Answer questions with the news in the context"""
    context = dspy.InputField(desc="may contain relevant news")
    question = dspy.InputField()
    answer = dspy.OutputField(desc="often between 50-100 words")

# Pass signature to ReAct module
react_module = dspy.ReAct(GenerateResponse)

In [None]:
# Call the ReAct module on a particular input
question = 'What ?'
result = react_module(question=question)

print(f"Question: {question}")
print(f"Final Predicted Answer (after ReAct process): {result.answer}")