# Scoring Different Prompting Techniques with DeepEval

Three different prompting techniques are evaluated in this notebook:

1. Base RAG Prompt 
2. Advanced RAG Prompt (role-based)
3. Optimized RAG Prompt (based on DSPy)

In [8]:
import pandas as pd
import deepeval
import dspy
import requests
import json

In [20]:
# Load data
dataset = pd.read_excel("synthetics/synthetic_dataset_revised.xlsx")
dataset_dict = dataset.to_dict(orient="records")
print(f"Total records: {len(dataset_dict)}")

Total records: 52


In [13]:
# Setting constants for all the models
TOP_K = 20
MAX_TOKENS = 1024
TOP_P = 0.8

In [24]:
# Base Prompt
from typing import Literal

def rag_call(prompt: str, prompt_type: Literal["cite", "base"]) -> str:
    request_body = {
        "body": prompt,
        "max_tokens": 1024,
		"prompt": prompt_type,
		"top_k": TOP_K,
		"top_p": TOP_P,
	}
    response = requests.post(
        url="http://greencompute-1575332443.us-east-1.elb.amazonaws.com/api/llm/rag",
        json=request_body
	)
    return response.json()["response"]

In [25]:
from tqdm.notebook import tqdm

for prompt_type in ["cite", "base"]:
	print(f"Prompt Type: {prompt_type}")
	for record in tqdm(dataset_dict):
		generated_text = rag_call(record["input"], prompt_type)
		record[f"generated_{prompt_type}"] = generated_text

Prompt Type: cite


  0%|          | 0/52 [00:00<?, ?it/s]

Prompt Type: base


  0%|          | 0/52 [00:00<?, ?it/s]

In [26]:
# save dictionary to json 
with open("synthetics/synthetic_dataset_revised.json", "w") as f:
	json.dump(dataset_dict, f, indent=4)

In [None]:
# read json file
with open("synthetics/synthetic_dataset_revised.json", "r") as f:
	dataset_dict = json.load(f)