# DSPy Chain-of-Knowledge

In [1]:
#|default_exp dspy.predict.cok

In [2]:
#|hide
from fastcore.test import *
from nbdev.showdoc import *

In [3]:
#|export

import dspy
from dspy.primitives.program import Module
from dspy.signatures.signature import ensure_signature

from bellem.logging import get_logger

log = get_logger(__name__)

In [4]:
#|export

class ChainOfKnowledge(Module):
    def __init__(self, signature, rationale_type=None, activated=True, **config):
        super().__init__()

        self.activated = activated

        self.signature = signature = ensure_signature(signature)

        prefix = "Let's list evidence triples in the form of (subject, relation, object) and then identify key logical connections or observations linking the evidence triples to the answer"
        desc = "${evidence_triples}\n${explanation_hints}"
        rationale_type = rationale_type or dspy.OutputField(prefix=prefix, desc=desc)

        # Add fields
        extended_signature = signature.prepend("Explanation hints", rationale_type, type_=str)
        extended_signature = extended_signature.prepend("Evidence triples", rationale_type, type_=str)

        self._predict = dspy.Predict(extended_signature, **config)
        self._predict.extended_signature = extended_signature

    def forward(self, **kwargs):
        assert self.activated in [True, False]

        signature = kwargs.pop("new_signature", self._predict.extended_signature if self.activated else self.signature)
        return self._predict(signature=signature, **kwargs)

    @property
    def demos(self):
        return self._predict.demos

    @property
    def extended_signature(self):
        return self._predict.extended_signature

In [5]:
#|hide
from dotenv import load_dotenv
load_dotenv()

True

In [6]:
import dspy
from bellem.dspy.utils import configure_lm

lm = configure_lm('openai/gpt-4o-mini')

In [7]:
context = """
Glenhis Hernández (born 7 October 1990 in Havana) is a taekwondo practitioner from Cuba. She was the 2013 World
Champion in middleweight.

The current mayor of Havana ("President of the People's Power Provincial Assembly") is Marta Hernández Romero, she
was elected on March 5, 2011.
""".strip()

question = 'Who is the current mayor of the city Glenhis Hernández was born?'

In [8]:
ccot = ChainOfKnowledge('context,question -> answer')
prediction = ccot(context=context, question=question)
prediction

Prediction(
    Evidence triples='(Glenhis Hernández, born in, Havana) (Marta Hernández Romero, is the current mayor of, Havana)',
    Explanation hints='Glenhis Hernández was born in Havana, and the current mayor of Havana is Marta Hernández Romero. Therefore, the answer to the question about the current mayor of the city where Glenhis Hernández was born is directly linked to the evidence provided.',
    answer='Marta Hernández Romero'
)

In [9]:
def format_message(message):
    return f"{message['role']}: {message['content']}"

In [10]:
for msg in lm.history[0]['messages']:
    print(format_message(msg))  
    print()

system: Your input fields are:
1. `context` (str)
2. `question` (str)

Your output fields are:
1. `Evidence triples` (str): ${evidence_triples}
${explanation_hints}
2. `Explanation hints` (str): ${evidence_triples}
${explanation_hints}
3. `answer` (str)

All interactions will be structured in the following way, with the appropriate values filled in.

[[ ## context ## ]]
{context}

[[ ## question ## ]]
{question}

[[ ## Evidence triples ## ]]
{Evidence triples}

[[ ## Explanation hints ## ]]
{Explanation hints}

[[ ## answer ## ]]
{answer}

[[ ## completed ## ]]

In adhering to this structure, your objective is: 
        Given the fields `context`, `question`, produce the fields `answer`.

user: [[ ## context ## ]]
Glenhis Hernández (born 7 October 1990 in Havana) is a taekwondo practitioner from Cuba. She was the 2013 World
Champion in middleweight.

The current mayor of Havana ("President of the People's Power Provincial Assembly") is Marta Hernández Romero, she
was elected on March 5

In [None]:
for msg in lm.history[1]['messages']:
    print(format_message(msg))  
    print()

system: Your input fields are:
1. `context` (str)
2. `question` (str)

Your output fields are:
1. `reasoning` (str)
2. `answer` (str)

All interactions will be structured in the following way, with the appropriate values filled in.

[[ ## context ## ]]
{context}

[[ ## question ## ]]
{question}

[[ ## reasoning ## ]]
{reasoning}

[[ ## answer ## ]]
{answer}

[[ ## completed ## ]]

In adhering to this structure, your objective is: 
        Given the fields `context`, `question`, produce the fields `answer`.

user: [[ ## context ## ]]
Glenhis Hernández (born 7 October 1990 in Havana) is a taekwondo practitioner from Cuba. She was the 2013 World
Champion in middleweight.

The current mayor of Havana ("President of the People's Power Provincial Assembly") is Marta Hernández Romero, she
was elected on March 5, 2011.

[[ ## question ## ]]
Who is the current mayor of the city Glenhis Hernández was born?

Respond with the corresponding output fields, starting with the field `[[ ## reasoning ##

In [None]:
#|hide
import nbdev; nbdev.nbdev_export()