# Auto Evaluate a prompt against dataset


## The Idea

Evaluate a prompt against a dataset.

## Let's build it!


In [1]:
from langchain.chat_models import ChatOpenAI
from langchain.cache import SQLiteCache
from langchain.globals import set_llm_cache
from langchain_core.prompts import PromptTemplate

from data_handling import load_and_clean_dataset
from eval import invoke_test_prompt_against_dataset

### The configs


In [2]:
PROMPT_TO_EVAL_FILE = "./prompt-v-or-not-v-01.md"

# Maximum number of rows in each chunk
ROWS_MAX = 1
# Maximum number of rows to use from the dataset for initial prompt generation
ROWS_INITIAL = 4
# Number of rows to use as `incorrect` examples
ROWS_INCORRECT = ROWS_MAX

# Seed Idea for prompt generation
IDEA_SEED = """
Compare the writing style of the two pieces of text, and output the score on a scale from 1 to 10, where 1 means it's different authors and 10 means it's the same author.

IMPORTANT: The score MUST ONLY take the writing style into consideration, not the actual meaning of the texts.
""".strip()


# Prompt file paths
PROMPT_INIT_FILE = "./prompts/PROMPT_WRITEP_ZERO_SHOT.json"
PROMPT_UPDATE_FILE = "./prompts/PROMPT_UPDATEP.json"

### Load the dataset


In [3]:
# Load the dataset
# dataset_file = "./datasets/sentiment_analysis_examples_10.csv"
dataset_file = "./datasets/dataset-writing-style-v-not-v.xlsx"
df_all = load_and_clean_dataset(dataset_file)

df_all

Unnamed: 0,INPUT: TEXT_1,INPUT: TEXT_2,OUTPUT: SCORE_SAME_AUTHOR
0,# The Ultimate Travel Nurse Salary Guide: 4 Mi...,# Find the Best Travel Nursing Jobs\n\nLooking...,9
1,# The Ultimate Travel Nurse Salary Guide: 4 Mi...,## Background on Nursing Job Search Book\n\nTh...,1
2,## Get A Travel Nurse Pay Breakdown\n\nUnderst...,# Find the Best Travel Nursing Jobs\n\nLooking...,10
3,## Get A Travel Nurse Pay Breakdown\n\nUnderst...,## Background on Nursing Job Search Book\n\nTh...,2
4,### Taxable vs Tax Free Pay\n\nFollowing on fr...,# Find the Best Travel Nursing Jobs\n\nLooking...,9
5,### Taxable vs Tax Free Pay\n\nFollowing on fr...,## Background on Nursing Job Search Book\n\nTh...,2
6,### Top 5 Highest Paying States for Travel Nur...,# Find the Best Travel Nursing Jobs\n\nLooking...,8
7,### Top 5 Highest Paying States for Travel Nur...,## Background on Nursing Job Search Book\n\nTh...,1


### Generate the Initial Prompt


In [4]:
# Set up LangChain models
set_llm_cache(SQLiteCache(database_path=".langchain.db"))
model = ChatOpenAI(model="gpt-3.5-turbo", temperature=0.1, max_tokens=750)
# model = ChatOpenAI(model="gpt-4-1106-preview", temperature=0, max_tokens=750)

# Load the prompt
# Read the file PROMPT_TO_EVAL_FILE into a string
prompt_str = ""
with open(PROMPT_TO_EVAL_FILE, "r") as f:
    prompt_str = f.read()

# Load the prompt into a PromptTemplate object
prompt_to_eval = PromptTemplate.from_template(prompt_str)

# print(prompt_str)

### Test the Initial Prompt against the dataset


In [5]:
df_generated, accuracy = invoke_test_prompt_against_dataset(
    prompt_to_eval, df_all, model, 999, ROWS_MAX, concurrency=4
)

df_generated

Getting chunk 1 with 1 rows...
Getting chunk 2 with 1 rows...
Getting chunk 3 with 1 rows...
Getting chunk 4 with 1 rows...
Getting chunk 5 with 1 rows...
Getting chunk 6 with 1 rows...
Getting chunk 7 with 1 rows...
Getting chunk 8 with 1 rows...
Correct answers: 87.50%


Unnamed: 0,ROW_NO,Thinking_step_by_step,OUTPUT: SCORE_SAME_AUTHOR,INPUT: TEXT_1,INPUT: TEXT_2,Truth,Is Correct?
0,0,Both texts discuss the benefits and advantages...,8,# The Ultimate Travel Nurse Salary Guide: 4 Mi...,# Find the Best Travel Nursing Jobs\n\nLooking...,9,True
1,1,Both texts have different writing styles. \n\n...,1,# The Ultimate Travel Nurse Salary Guide: 4 Mi...,## Background on Nursing Job Search Book\n\nTh...,1,True
2,2,"Both texts have a similar tone, using a conver...",9,## Get A Travel Nurse Pay Breakdown\n\nUnderst...,# Find the Best Travel Nursing Jobs\n\nLooking...,10,True
3,3,Both texts have different subject matters and ...,1,## Get A Travel Nurse Pay Breakdown\n\nUnderst...,## Background on Nursing Job Search Book\n\nTh...,2,True
4,4,Both texts have different subject matters and ...,2,### Taxable vs Tax Free Pay\n\nFollowing on fr...,# Find the Best Travel Nursing Jobs\n\nLooking...,9,False
5,5,Text 1:\n- The writing style in Text 1 is info...,2,### Taxable vs Tax Free Pay\n\nFollowing on fr...,## Background on Nursing Job Search Book\n\nTh...,2,True
6,6,Both texts discuss travel nursing jobs and pro...,9,### Top 5 Highest Paying States for Travel Nur...,# Find the Best Travel Nursing Jobs\n\nLooking...,8,True
7,7,Both texts have different subject matters and ...,2,### Top 5 Highest Paying States for Travel Nur...,## Background on Nursing Job Search Book\n\nTh...,1,True
