In [1]:
!pip install dspy

Collecting dspy
  Downloading dspy-0.1.5-py3-none-any.whl.metadata (692 bytes)
Collecting dspy-ai==2.4.5 (from dspy)
  Downloading dspy_ai-2.4.5-py3-none-any.whl.metadata (36 kB)
Collecting backoff~=2.2.1 (from dspy-ai==2.4.5->dspy)
  Downloading backoff-2.2.1-py3-none-any.whl.metadata (14 kB)
Collecting joblib~=1.3.2 (from dspy-ai==2.4.5->dspy)
  Downloading joblib-1.3.2-py3-none-any.whl.metadata (5.4 kB)
Collecting openai<2.0.0,>=0.28.1 (from dspy-ai==2.4.5->dspy)
  Downloading openai-1.23.6-py3-none-any.whl.metadata (21 kB)
Collecting datasets<3.0.0,~=2.14.6 (from dspy-ai==2.4.5->dspy)
  Downloading datasets-2.14.7-py3-none-any.whl.metadata (19 kB)
Collecting optuna (from dspy-ai==2.4.5->dspy)
  Downloading optuna-3.6.1-py3-none-any.whl.metadata (17 kB)
Collecting pydantic==2.5.0 (from dspy-ai==2.4.5->dspy)
  Downloading pydantic-2.5.0-py3-none-any.whl.metadata (174 kB)
     ---------------------------------------- 0.0/174.6 kB ? eta -:--:--
     ---------------- -------------------

ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
anaconda-cloud-auth 0.1.4 requires pydantic<2.0, but you have pydantic 2.5.0 which is incompatible.


In [4]:
import sys
import os
import dspy
from dspy.evaluate import Evaluate
from dspy.datasets.hotpotqa import HotPotQA
from dspy.teleprompt import BootstrapFewShot, BootstrapFewShotWithRandomSearch, BootstrapFinetune

In [5]:
llama = dspy.HFClientTGI(model="meta-llama/Llama-2-13b-chat-hf", port=[7140, 7141, 7142, 7143], max_tokens=150)
colbertv2 = dspy.ColBERTv2(url='http://20.102.90.50:2017/wiki17_abstracts')

# # NOTE: After you finish this notebook, you can use GPT-3.5 like this if you like.
# turbo = dspy.OpenAI(model='gpt-3.5-turbo-instruct')
# # In that case, make sure to configure lm=turbo below if you choose to do that.

dspy.settings.configure(rm=colbertv2, lm=llama)

In [None]:
train = [('<Golden Context>', 'Generate a ChatGPT Prompt for <Question>', 'Optimized ChatGPT Prompt')]

train = [dspy.Example(context=context, question=question, prompt=prompt).with_inputs('context, question') for context, question, prompt in train]

dev = [('<Golden Context>', 'Generate a ChatGPT Prompt for <Question>', 'Optimized ChatGPT Prompt')]

dev = [dspy.Example(context=context, question=question, prompt=prompt).with_inputs('context, question') for context, question, prompt in dev]

In [None]:
# Define a dspy.Predict module with the signature `question -> answer` (i.e., takes a question and outputs an answer).
predict = dspy.Predict('context, question -> prompt')

class CoT(dspy.Module):  # let's define a new module
    def __init__(self):
        super().__init__()

        # here we declare the chain of thought sub-module, so we can later compile it (e.g., teach it a prompt)
        self.generate_answer = dspy.ChainOfThought('context, question -> prompt')
    
    def forward(self, question):
        return self.generate_answer(question=question)  # here we use the module

In [None]:
gpt4T = dspy.OpenAI(model='gpt-4-1106-preview', max_tokens=1000, model_type='chat')

# See how well the 
def metric(gold, pred):

    gold_answer, pred_prompt = gold.answer, pred.prompt

    engaging = "Does the assessed text make for a self-contained, engaging tweet?"
    correct = f"The text should answer `{pred_prompt}` with `{gold_answer}`. Does the assessed text contain this answer?"
    
    with dspy.context(lm=gpt4T):
        pred_answer = dspy.Predict('prompt -> answer')(prompt=pred_prompt)
        correct = dspy.Predict('question, answer -> assessment')(question=correct, answer=pred_answer)

        # can implement other metrics here

    correct = [m.assessment_answer.lower() == 'yes' for m in [correct, engaging]]
    score = (correct) if correct else 0

    return score 

In [None]:

metric_EM = metric

teleprompter = BootstrapFewShot(metric=metric_EM, max_bootstrapped_demos=2)
cot_compiled = teleprompter.compile(CoT(), trainset=train)

In [None]:
# we can test by asking
cot_compiled(context='new golden context', question='Generate a ChatGPT Prompt for <Question>')

In [None]:
NUM_THREADS = 32
evaluate_optimizer = Evaluate(devset=dev, metric=metric_EM, num_threads=NUM_THREADS, display_progress=True, display_table=15)

evaluate_optimizer(cot_compiled)