# Prompt Optimization

In [37]:
import os
import json


import requests
import boto3
import pandas as pd
import os
import dspy
from dspy.evaluate import SemanticF1

In [28]:
# Read in synthetic datset
dataset = pd.read_excel("synthetics/synthetic_dataset.xlsx")
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 66 entries, 0 to 65
Data columns (total 11 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   input                    66 non-null     object 
 1   actual_output            0 non-null      float64
 2   expected_output          66 non-null     object 
 3   context                  66 non-null     object 
 4   retrieval_context        0 non-null      float64
 5   n_chunks_per_context     66 non-null     int64  
 6   context_length           66 non-null     int64  
 7   evolutions               66 non-null     object 
 8   context_quality          66 non-null     float64
 9   synthetic_input_quality  66 non-null     float64
 10  source_file              66 non-null     object 
dtypes: float64(4), int64(2), object(5)
memory usage: 5.8+ KB


In [29]:
# create the dspy dataset
dataset_dict = dataset.to_dict(orient='records')
dspy_dataset = []

for row in dataset_dict:
	dspy_dataset.append(dspy.Example(context=row['context'], question=row['input'], answer=row['expected_output']).with_inputs('input', 'context'))

dspy_dataset[:5]

[Example({'context': "[' Report. The Assessment Report is submitted to the Site Lead. The DCEP revises the draft to the satisfaction of the Site Lead. The result is the final Assessment Report.\\n\\nStep 9: Compilation of Assessment and Attendance Reports\\n\\nCompilation of Assessment Report (see Appendices B-H for templates)\\n\\nThe Assessment Report should be a brief narrative summary of the energy assessment. The Report should contain the following elements.\\n\\n## Executive Summary\\n\\nThe executive summary should include key observations, opportunities, and estimated energy savings.\\n\\n## DCEP Program Objective and Approach\\n\\nBrief description of the Program objective and approach\\n\\n- The objective of the Program is to provide the data center industry with technical assistance targeted to reduce energy expenditures in data centers\\n- The approach is for the DCEP to facilitate the completion of an energy assessment as well as provide training in the DOE Software Tools 

In [30]:
# Create the evaluation object
trainset, valset, devset, testset = dspy_dataset[:10], dspy_dataset[10:20], dspy_dataset[20:30], dspy_dataset[30:40]

metric = SemanticF1()
evaluate = dspy.Evaluate(devset=devset, metric=metric, num_threads=24, display_progress=True, display_table=3)

## Compose DSPy Custom RAG Framework

In [31]:
class TitanLM(dspy.LM):
    def __init__(self, model: str, client, temperature: float = 0.7, **kwargs):
        self.client = client
        self.history = []
        self.temperature = temperature

        super().__init__(model, **kwargs)
        self.model = model
    
    def _format_message(self, prompt: str):
        body = json.dumps(
            {
                "inputText": prompt,
                "textGenerationConfig": {
                    "maxTokenCount": 512,
                    "stopSequences": [],
                    "temperature": self.temperature,
                    "topP": 0.9,
                },
            }
        )
        return body

    def generate_content(self, prompt: str) -> str:
        body = self._format_message(prompt)
        response = self.client.invoke_model(
            body=body,
            modelId=self.model,
            accept="application/json",
            contentType="application/json",
        )
        response_body = json.loads(response.get("body").read())
        return response_body.get("results")

    def __call__(self, prompt=None, messages=None, **kwargs):
        # Custom chat model working for text completion model
        prompt = '\n\n'.join([x['content'] for x in messages] + ['BEGIN RESPONSE:'])

        completions = self.generate_content(prompt)
        self.history.append({"prompt": prompt, "completions": completions})

        # Must return a list of strings
        return [completions[0].get("outputText")]

    def inspect_history(self):
        for interaction in self.history:
            print(f"Prompt: {interaction['prompt']} -> Completions: {interaction['completions']}")

In [32]:
lm = TitanLM("amazon.titan-text-premier-v1:0", client=boto3.client("bedrock-runtime"))
dspy.configure(lm=lm)

qa = dspy.ChainOfThought("question->answer")
qa(question="What is the capital of France?")

Prediction(
    reasoning='Paris is the capital of France.',
    answer='Paris'
)

In [38]:
def search(query: str, top_k: int):

    url = "http://greencompute-1575332443.us-east-1.elb.amazonaws.com/api/llm/retrieval"
    headers = {
        "accept": "application/json",
        "Content-Type": "application/json"
    }
    data = {
        "query": query,
        "top_k": top_k
    }

    response = requests.post(url, headers=headers, json=data)

    print(response.status_code)
    print(response.json())


In [39]:
search("Increase data center efficiency", 5)

200
{'documents': [{'doc_title': 'SmallServerRooms_Final Report Task 2.13_2013', 'content': '# LEGAL NOTICE\n\nThe Lawrence Berkeley National Laboratory, a laboratory owned by DOE, is located at 1 Cyclotron Rd., Berkeley, California is a national laboratory of the DOE managed by Regents of the University of California for the U.S. Department of Energy under Contract Number DE- AC02-05CH11231. This report was prepared as an account of work sponsored by the Sponsor and pursuant to an M&O Contract with the United States Department of Energy (DOE). Neither Regents of the University of California, nor the DOE, nor the Sponsor, nor any of their employees, contractors, or subcontractors, makes any warranty, express or implied, or assumes any legal liability or responsibility for the accuracy, completeness, or usefulness of any information, apparatus, product, or process disclosed, or represents that its use would not infringe on privately owned rights. Reference herein to any specific commerc

In [34]:
class RAG(dspy.Module):
    def __init__(self, num_docs=5):
        self.num_docs = num_docs
        self.respond = dspy.ChainOfThought('context, question -> response')

    def forward(self, question):
        context = search(question, k=self.num_docs)
        return self.respond(context=context, question=question)

## Conduct Optimization