In [None]:
import phoenix as px
px.launch_app()

In [None]:
from opentelemetry import trace as trace_api
from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
from opentelemetry.sdk import trace as trace_sdk
from opentelemetry.sdk.trace.export import SimpleSpanProcessor

tracer_provider = trace_sdk.TracerProvider()
span_exporter = OTLPSpanExporter("http://localhost:6006/v1/traces")
span_processor = SimpleSpanProcessor(span_exporter)
tracer_provider.add_span_processor(span_processor)
trace_api.set_tracer_provider(tracer_provider)

In [None]:
from openinference.instrumentation.dspy import DSPyInstrumentor

DSPyInstrumentor().instrument()

In [None]:
class BasicQA(dspy.Signature):
    """Answer questions with short factoid answers."""

    question = dspy.InputField()
    answer = dspy.OutputField(desc="often between 1 and 5 words")


if __name__ == "__main__":
    turbo = dspy.OpenAI(model="gpt-3.5-turbo")

    dspy.settings.configure(lm=turbo)

    with using_attributes(
        session_id="my-test-session",
        user_id="my-test-user",
        metadata={
            "test-int": 1,
            "test-str": "string",
            "test-list": [1, 2, 3],
            "test-dict": {
                "key-1": "val-1",
                "key-2": "val-2",
            },
        },
        tags=["tag-1", "tag-2"],
        prompt_template_version="v1.0",
        prompt_template_variables={
            "city": "Johannesburg",
            "date": "July 11th",
        },
    ):
        # Define the predictor.
        generate_answer = dspy.Predict(BasicQA)

        # Call the predictor on a particular input.
        pred = generate_answer(
            question="What is the capital of the united states?"  # noqa: E501
        )  # noqa: E501
        print(f"Predicted Answer: {pred.answer}")

https://github.com/Arize-ai/openinference/tree/main/python/instrumentation/openinference-instrumentation-dspy

dspy_tracing_tutorial.ipynb

https://colab.research.google.com/github/Arize-ai/phoenix/blob/main/tutorials/tracing/dspy_tracing_tutorial.ipynb

In [3]:
!pip install dspy-ai

Collecting dspy-ai
  Downloading dspy_ai-2.4.13-py3-none-any.whl (280 kB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m280.7/280.7 KB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m[36m0:00:01[0mm eta [36m0:00:01[0m
[?25hCollecting datasets
  Using cached datasets-2.21.0-py3-none-any.whl (527 kB)
Collecting backoff
  Using cached backoff-2.2.1-py3-none-any.whl (15 kB)
Collecting optuna
  Downloading optuna-3.6.1-py3-none-any.whl (380 kB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m380.1/380.1 KB[0m [31m6.0 MB/s[0m eta [36m0:00:00[0mm eta [36m0:00:01[0m
[?25hCollecting ujson
  Downloading ujson-5.10.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (53 kB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m53.6/53.6 KB[0m [31m1.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting structlog
  Downloading structlog-24.4.0-py3-none-any.whl (67 kB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━

In [4]:
import os
from dotenv import load_dotenv
import dspy
import openai
import phoenix as px

* 'allow_population_by_field_name' has been renamed to 'populate_by_name'
* 'smart_union' has been removed
  from .autonotebook import tqdm as notebook_tqdm


In [5]:
load_dotenv("/media/uberdev/ddrv/gitFolders/python_de_learners_data/.env")
openai.api_key = os.environ['OPENAI_API_KEY']

In [None]:
turbo = dspy.OpenAI(model="gpt-3.5-turbo")
colbertv2_wiki17_abstracts = dspy.ColBERTv2(
    url="http://20.102.90.50:2017/wiki17_abstracts"  # endpoint for a hosted ColBERTv2 service
)

dspy.settings.configure(lm=turbo, rm=colbertv2_wiki17_abstracts)

In [None]:
from dspy.datasets import HotPotQA

# Load the dataset.
dataset = HotPotQA(train_seed=1, train_size=20, eval_seed=2023, dev_size=50, test_size=10)

# Tell DSPy that the 'question' field is the input. Any other fields are labels and/or metadata.
trainset = [x.with_inputs("question") for x in dataset.train]
devset = [x.with_inputs("question") for x in dataset.dev]

print(f"Train set size: {len(trainset)}")
print(f"Dev set size: {len(devset)}")

In [None]:
train_example = trainset[0]
dev_example = devset[18]

In [None]:
class GenerateAnswer(dspy.Signature):
    """Answer questions with short factoid answers."""

    context = dspy.InputField(desc="may contain relevant facts")
    question = dspy.InputField()
    answer = dspy.OutputField(desc="often between 1 and 5 words")

In [None]:
class RAG(dspy.Module):
    def __init__(self, num_passages=3):
        super().__init__()
        self.retrieve = dspy.Retrieve(k=num_passages)
        self.generate_answer = dspy.ChainOfThought(GenerateAnswer)

    def forward(self, question):
        context = self.retrieve(question).passages
        prediction = self.generate_answer(context=context, question=question)
        return dspy.Prediction(context=context, answer=prediction.answer)

In [None]:
from dspy.teleprompt import BootstrapFewShot


# Validation logic: check that the predicted answer is correct.
# Also check that the retrieved context does actually contain that answer.
def validate_context_and_answer(example, pred, trace=None):
    answer_EM = dspy.evaluate.answer_exact_match(example, pred)
    answer_PM = dspy.evaluate.answer_passage_match(example, pred)
    return answer_EM and answer_PM


input_module = RAG()
teleprompter = BootstrapFewShot(metric=validate_context_and_answer)
compiled_module = teleprompter.compile(input_module, trainset=trainset)

In [None]:
phoenix_session = px.launch_app()

In [8]:
!pip install openinference-instrumentation-dspy opentelemetry-exporter-otlp

Collecting openinference-instrumentation-dspy
  Downloading openinference_instrumentation_dspy-0.1.11-py3-none-any.whl (13 kB)
Collecting protobuf<5.0,>=3.19
  Using cached protobuf-4.25.4-cp37-abi3-manylinux2014_x86_64.whl (294 kB)
Installing collected packages: protobuf, openinference-instrumentation-dspy
  Attempting uninstall: protobuf
    Found existing installation: protobuf 5.27.3
    Uninstalling protobuf-5.27.3:
      Successfully uninstalled protobuf-5.27.3
[31mERROR: Could not install packages due to an OSError: [Errno 39] Directory not empty: '/media/uberdev/ddrv/telemetenv/lib/python3.10/site-packages/google/~upb'
[0m[31m
[0m

In [10]:
from openinference.instrumentation.dspy import DSPyInstrumentor
from opentelemetry import trace as trace_api
from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
from opentelemetry.sdk import trace as trace_sdk
from opentelemetry.sdk.resources import Resource
from opentelemetry.sdk.trace.export import SimpleSpanProcessor

ModuleNotFoundError: No module named 'openinference.instrumentation.dspy'

In [9]:
endpoint = "http://127.0.0.1:6006/v1/traces"
resource = Resource(attributes={})

tracer_provider = trace_sdk.TracerProvider(resource=resource)

span_otlp_exporter = OTLPSpanExporter(endpoint=endpoint)

tracer_provider.add_span_processor(SimpleSpanProcessor(span_exporter=span_otlp_exporter))

trace_api.set_tracer_provider(tracer_provider=tracer_provider)

DSPyInstrumentor().instrument(skip_dep_check=True)

ModuleNotFoundError: No module named 'openinference.instrumentation.dspy'

In [None]:
for example in devset:
    question = example["question"]
    prediction = compiled_module(question)
    print("Question")
    print("========")
    print(question)
    print()
    print("Predicted Answer")
    print("================")
    print(prediction.answer)
    print()
    print("Retrieved Contexts (truncated)")
    print(f"{[c[:200] + '...' for c in prediction.context]}")
    print()
    print()