## Prompt Engineering
- Try different prompts and formatting
- Check results against human benchmark

In [78]:
import pandas as pd
import numpy as np
import os
from openai import OpenAI
import json
from IPython.display import display, HTML
from tqdm.notebook import tqdm

## Load the Open AI API Key

In [79]:
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file

api_key = os.getenv('OPENAI_API_KEY')

# Check if the API key is loaded
if api_key:
    print("API key is loaded.")
else:
    print("API key is not loaded.")

API key is loaded.


## Load the benchmark data

#### Load the place results

In [80]:
benchmark_place_results = pd.read_csv('./Data/benchmark_input.csv')
benchmark_place_results

Unnamed: 0,Name,Place ID,Description,Website,Address,Phone,Rating,Price,Type,Highlights,Offerings,Q_and_A,Snippets,Reviews_Most_Relevant
0,The Fresh Market,ChIJmb6kSDZ_54gRowDRme47G0k,Upscale grocery store chain with an old-world ...,https://stores.thefreshmarket.com/fl/orlando/38,"5000 Dr Phillips Blvd, Orlando, FL 32819",(407) 294-1516,4.5,$$$,"['Grocery store', 'American grocery store', 'B...",Great produce,Organic products,[{'question': {'user': {'name': 'Anita Caragan...,"[{'snippet': '""Exclusive place to buy good foo...","[{'username': 'Scott Ziegler', 'rating': 5, 'd..."
1,The Ancient Olive Gourmet of Winter Garden,ChIJyZdkgPuD54gRqtF-E30N4x0,,http://www.theancientolive.com/,"125 W Plant St, Winter Garden, FL 34787",(407) 656-6457,4.8,,['Gourmet grocery store'],,,[],"[{'snippet': '""It is good with any kind of foo...","[{'username': 'Mesh G', 'rating': 5, 'descript..."
2,Seriously Great Market Solution,ChIJu9MkmVF_54gRwZIoka6RoQk,,,"8815 Conroy Windermere Rd # 35, Orlando, FL 32835",(321) 251-6383,,,['Grocery store'],,,[],,[]
3,Tn Food & Grocery,ChIJmfVTDG2D54gRFdvotMCET7U,,http://tn-food-grocery.keeq.io/,"66 S Dillard St, Winter Garden, FL 34787",(407) 656-1551,4.3,,['Grocery store'],,,[{'question': {'user': {'name': 'keycha “ ” vi...,"[{'snippet': '""Very friendly staff and the pla...","[{'username': 'Tara G.', 'rating': 5, 'descrip..."
4,SC Foods,ChIJ2esYs3yP54gRAarbKINxxME,,https://alisanc.com/contact-us/,"15701 State Rte 50, Clermont, FL 34711, United...",+1 407-347-8135,,,['Grocery store'],,,[],,[]
5,Holson's Produce,ChIJo5_x_gV554gRp3F5Ml9JGxY,,,"5853 Old Winter Garden Rd, Orlando, FL 32835",(407) 292-5678,4.1,,"['Produce market', 'Meat wholesaler', 'Produce...",,,[],"[{'snippet': '""Service is good, food is good.....","[{'username': 'Angelo M', 'rating': 5, 'descri..."
6,Suns Market,ChIJwVN3bHOD54gRSlij1UhudVc,,,"523 S Dillard St, Winter Garden, FL 34787",(407) 877-3020,4.0,,['Grocery store'],,,[{'question': {'user': {'name': 'Kelly Pounds'...,"[{'snippet': '""Nice experience prices a little...","[{'username': ""Dad's Home Movies"", 'rating': 2..."
7,Colonial Grocery Deli & Bodega,ChIJEaAcVKx554gRbVLiyDDehHI,,,"6339 W Colonial Dr, Orlando, FL 32818",(407) 296-9955,4.7,,['Grocery store'],Great produce,,[],"[{'snippet': '""Very good food! and great choic...","[{'username': 'Robert Brown', 'rating': 5, 'de..."
8,Trader Joe's,ChIJbeFK_p5_54gRGvfzUjKvlFA,Grocery chain with a variety of signature item...,https://locations.traderjoes.com/fl/orlando/76...,"8323 Sand Lake Rd, Orlando, FL 32819",(407) 345-0611,4.7,$$,"['Grocery store', 'Cheese shop', 'Fresh food m...",Great produce,Organic products,"[{'question': {'user': {'name': 'ed m', 'link'...","[{'snippet': '""Great selection, great meats, g...","[{'username': 'Richard Yu', 'rating': 5, 'desc..."
9,JNM Super Market,ChIJIxxg6f9-54gRVzC_PSR-wn8,,,"7521 International Dr, Orlando, FL 32819",(407) 592-4748,3.3,,"['Grocery store', 'Beer store', 'Convenience s...",,,"[{'question': {'user': {'name': 'LINDA RUPP', ...","[{'snippet': '""Great selection and good qualit...","[{'username': 'Anthony Barney', 'rating': 1, '..."


#### Separate a single record for prompt engineering

In [81]:
with open('./Data/test.json', 'r') as file:
    test_json = json.load(file)
test_json

'{"Name":"The Fresh Market","Description":"Upscale grocery store chain with an old-world vibe offers local produce, prepared eats, wine & more.","Website":"https:\\/\\/stores.thefreshmarket.com\\/fl\\/orlando\\/38","Address":"5000 Dr Phillips Blvd, Orlando, FL 32819","Phone":"(407) 294-1516","Rating":4.5,"Price":"$$$","Type":["Grocery store","American grocery store","Bakery","Butcher shop","Fruit and vegetable store","Gourmet grocery store","Market","Organic food store","Produce market","Supermarket"],"Highlights":"Great produce","Offerings":"Organic products","Q_and_A":[{"question":{"user":{"name":"Anita Caragan","link":"https:\\/\\/www.google.com\\/maps\\/contrib\\/113357052055774270348","thumbnail":"https:\\/\\/lh3.googleusercontent.com\\/a\\/ACg8ocKdK8_h2Q72RjHiQIaBRzydgHNYXF_L4fVzUnKZqkzr=s120-c-rp-mo-br100"},"text":"Do you have bake chicken?","date":"4 years ago","language":"en"},"answer":{"user":{"name":"Jeanne M","link":"https:\\/\\/www.google.com\\/maps\\/contrib\\/10327323170

#### Load human scores

In [82]:
human_scores = pd.read_csv('./Data/human_scores.csv')
human_scores

Unnamed: 0,Name,Human Score,Human Notes
0,The Fresh Market,8.0,Local market with a focus on gourmet. Several ...
1,The Ancient Olive Gourmet of Winter Garden,8.0,Gourmet pantry with a honey pantry section. Mi...
2,Seriously Great Market Solution,1.0,No reviews or website
3,Tn Food & Grocery,4.0,No focus on gourmet
4,Holson's Produce,5.0,No website but maybe.
5,Suns Market,3.0,"Family owned so that's a plus, but gas station..."
6,SC Foods,4.0,I don't know what this is but there's no focus...
7,Colonial Grocery Deli & Bodega,6.5,Ethnic market. Strong focus on local. No clear...
8,Trader Joe's,4.0,Massive chain. Too much red tape.
9,JNM Super Market,1.0,Gas station.


## Prompt Engineering

### Prompt 1: Basic, Unstructured
Relying on GPT4's NLP ability to do a better job than I would do

In [83]:
client = OpenAI()

instructions = ("You are a helpful assistant tasked with helping us find grocery stores to stock our honey. "
                "We are a fourth-generation, family-owned apiary based out of northern Florida that specializes "
                "in the production of extremely high quality, mono-floral honey. We are looking for grocers that "
                "specialize in curating gourmet food products, especially from small businesses such as ours. "
                "Your task is to assign a compatibility score from 1-10 to the grocery store based on the {json} "
                "of store data we provide. The output should be a json with the score and a short description "
                "justifying why a given score was received.")

response = client.chat.completions.create(
    model="gpt-4-1106-preview",
    temperature=0.0,
    messages=[
        {"role": "system", "content": instructions},
        {"role": "user", "content": test_json}
  ]
)

In [84]:
response

ChatCompletion(id='chatcmpl-8xOfLD97ED0juBds4IRc7WNoQFUYC', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='```json\n{\n  "Score": 9,\n  "Description": "The Fresh Market appears to be an excellent match for your high-quality, mono-floral honey. With its upscale atmosphere, focus on gourmet and organic products, and a clientele that appreciates local and specialty foods, it aligns well with your product offering. The store\'s high rating and reviews indicate a satisfied customer base that values quality, which is consistent with your honey\'s premium positioning. The only reason it does not receive a perfect score is due to the potential for competition with other premium products and the need to ensure that your honey stands out in their selection."\n}\n```', role='assistant', function_call=None, tool_calls=None))], created=1709166859, model='gpt-4-1106-preview', object='chat.completion', system_fingerprint='fp_7cc080b25b', usage=Com

#### Parse the response

In [85]:
def parse_chat_completion(response):
    """
    Parses a chat completion response to extract score, description, and calculate the cost.

    Args:
    response (ChatCompletion): A response object containing the completion message and usage data.

    Returns:
    tuple: A tuple containing the score, description, and cost of the chat completion.
    """

    # Extracting and cleaning the JSON string for score and description
    json_response = response.choices[0].message.content
    cleaned_json_string = json_response.strip('```json\n').rstrip('\n```')

    # Parse the JSON string
    data = json.loads(cleaned_json_string)

    # Extracting score and description
    score = data.get('Score')
    description = data.get('Description')

    # Cost calculation
    cost_per_input_token = 0.01 / 1000
    cost_per_output_token = 0.03 / 1000
    input_tokens = response.usage.prompt_tokens
    output_tokens = response.usage.completion_tokens
    cost = round((input_tokens * cost_per_input_token) + (output_tokens * cost_per_output_token), 4)
    print("input_tokens",input_tokens)
    print("output_tokens",output_tokens)
    
    return score, description, cost

# Example usage
# Assuming 'response' is a valid ChatCompletion object
# score, description, total_cost = analyze_chat_completion(response)
# print("Score:", score)
# print("Description:", description)
# print(f"Total Cost = ${total_cost:.3f}")

In [86]:
score, description, total_cost = parse_chat_completion(response)

display(HTML(f"<strong>Score:</strong> {score}"))
display(HTML(f"<strong>Description:</strong> {description}"))
display(HTML(f"<strong>GPT Cost =</strong> ${total_cost:.3f}"))

input_tokens 1345
output_tokens 130


#### This prompt does really well, but it fails to punish large stores. 

### Prompt 2: Better formatting, estimate size

In [88]:
client = OpenAI()

instructions = (
"""
Given the details of a grocery store provided in JSON format, please evaluate the compatibility of our fourth-generation, family-owned apiary based in northern Florida with the grocery store. Our apiary specializes in producing high-quality, mono-floral honey. We are targeting small grocers that curate gourmet food products, particularly from small businesses like ours. 

Your task is to:
1. Assign a compatibility score from 1 to 10, where 10 indicates the highest compatibility with our high-quality, mono-floral honey.
2. Provide a short description justifying the assigned compatibility score. This description should be a few sentences explaining why the grocery store is considered compatible or incompatible with our product.
3. Research and estimate the number of stores in the grocery store chain. This should be an integer value.

Please format your response as a JSON object with the following fields:
- "Compatibility Score": [An integer from 1 to 10 indicating the compatibility score]
- "Compatibility Description": [A short description in a few sentences justifying the assigned score]
- "Estimated Number of Stores": [An integer estimate of the number of stores in the chain]

For example, if the provided JSON data suggests a small, gourmet-focused grocer with a strong emphasis on supporting local small businesses, and it's part of a chain with 15 stores, your output should look something like this:

{
  "Compatibility Score": 9,
  "Compatibility Description": "The grocer's focus on gourmet products and support for local small businesses aligns well with our apiary's high-quality, mono-floral honey, making it a highly compatible partner.",
  "Estimated Number of Stores": 15
}

Note: Do not include any extraneous information such as research notes.
"""
)

response = client.chat.completions.create(
    model="gpt-4-1106-preview",
    temperature=0.0,
    messages=[
        {"role": "system", "content": instructions},
        {"role": "user", "content": test_json}
  ]
)

#### Parse the response

In [110]:
def parse_chat_completion_2(response):
    """
    Parses a chat completion response to extract score, description, and calculate the cost.

    Args:
    response (ChatCompletion): A response object containing the completion message and usage data.

    Returns:
    tuple: A tuple containing the score, description, and cost of the chat completion.
    """

    # Extracting and cleaning the JSON string for score and description
    json_response = response.choices[0].message.content
    cleaned_json_string = json_response.strip('```json\n').rstrip('\n```')

    # Parse the JSON string
    data = json.loads(cleaned_json_string)


    # Extracting score and description
    score = data.get('Compatibility Score')
    description = data.get('Compatibility Description')
    num_stores = data.get('Estimated Number of Stores')


    # Cost calculation
    cost_per_input_token = 0.01 / 1000
    cost_per_output_token = 0.03 / 1000
    input_tokens = response.usage.prompt_tokens
    output_tokens = response.usage.completion_tokens
    cost = round((input_tokens * cost_per_input_token) + (output_tokens * cost_per_output_token), 4)
    #print("input_tokens",input_tokens)
    #print("output_tokens",output_tokens)
    
    return score, description, num_stores, cost

# Example usage
# Assuming 'response' is a valid ChatCompletion object
# score, description, num_stores, total_cost = analyze_chat_completion(response)
# print("Score:", score)
# print("Description:", description)
# print("Estimated Number of Stores:", num_stores)
# print(f"Total Cost = ${total_cost:.3f}")

In [92]:
score, description, num_stores, total_cost = parse_chat_completion_2(response)

display(HTML(f"<strong>Score:</strong> {score}"))
display(HTML(f"<strong>Description:</strong> {description}"))
display(HTML(f"<strong>Num Stores:</strong> {num_stores}"))
display(HTML(f"<strong>GPT Cost =</strong> ${total_cost:.3f}"))

input_tokens 1579
output_tokens 107


#### An 8 seems a bit low, maybe this prompt is more strict across the board.

## Repeat for all locations

In [96]:
records = []

for i in tqdm(benchmark_place_results.index):

    ## Convert input to json, in accordance with chat instructions
    input_data = benchmark_place_results.loc[i].to_json()

    ## Get the GPT 4 chat completion object
    ## Temp = 0 for reproducability
    response = client.chat.completions.create(
        model="gpt-4-1106-preview",
        temperature=0.0,
        messages=[
            {"role": "system", "content": instructions},
            {"role": "user", "content": input_data}
        ]
    )

    ## Parse the response
    score, description, num_stores, total_cost = parse_chat_completion_2(response)

    ## Append the record as a dictionary to the list
    records.append({
        'Store Name': benchmark_place_results['Name'].loc[i],
        'Place ID': benchmark_place_results['Place ID'].loc[i],
        'GPT Description': description,
        'GPT Score': score,
        'GPT Num Stores': num_stores,
        'GPT Cost ($)': total_cost
    })

# Convert the list of records into a DataFrame
gpt_results = pd.DataFrame(records)
gpt_results

  0%|          | 0/10 [00:00<?, ?it/s]

input_tokens 1722
output_tokens 128
input_tokens 1270
output_tokens 106
input_tokens 481
output_tokens 108
input_tokens 1370
output_tokens 111
input_tokens 485
output_tokens 109
input_tokens 1115
output_tokens 129
input_tokens 1377
output_tokens 132
input_tokens 1174
output_tokens 101
input_tokens 2130
output_tokens 133
input_tokens 1181
output_tokens 100


Unnamed: 0,Store Name,Place ID,GPT Description,GPT Score,GPT Num Stores,GPT Cost ($)
0,The Fresh Market,ChIJmb6kSDZ_54gRowDRme47G0k,"The Fresh Market's upscale and gourmet focus, ...",8,159,0.0211
1,The Ancient Olive Gourmet of Winter Garden,ChIJyZdkgPuD54gRqtF-E30N4x0,The Ancient Olive Gourmet of Winter Garden spe...,9,4,0.0159
2,Seriously Great Market Solution,ChIJu9MkmVF_54gRwZIoka6RoQk,While the provided JSON data lacks detailed in...,7,1,0.008
3,Tn Food & Grocery,ChIJmfVTDG2D54gRFdvotMCET7U,Tn Food & Grocery appears to be a local store ...,7,1,0.017
4,SC Foods,ChIJ2esYs3yP54gRAarbKINxxME,"SC Foods, being a grocery store, is a potentia...",8,1,0.0081
5,Holson's Produce,ChIJo5_x_gV554gRp3F5Ml9JGxY,Holson's Produce appears to be a local market ...,7,1,0.015
6,Suns Market,ChIJwVN3bHOD54gRSlij1UhudVc,"Suns Market appears to be a small, independent...",5,1,0.0177
7,Colonial Grocery Deli & Bodega,ChIJEaAcVKx554gRbVLiyDDehHI,Colonial Grocery Deli & Bodega seems to be a s...,6,1,0.0148
8,Trader Joe's,ChIJbeFK_p5_54gRGvfzUjKvlFA,Trader Joe's is known for its curated selectio...,7,530,0.0253
9,JNM Super Market,ChIJIxxg6f9-54gRVzC_PSR-wn8,Given the mixed reviews about the store's focu...,2,1,0.0148


#### Join Human Results

In [97]:
human_scores

Unnamed: 0,Name,Human Score,Human Notes
0,The Fresh Market,8.0,Local market with a focus on gourmet. Several ...
1,The Ancient Olive Gourmet of Winter Garden,8.0,Gourmet pantry with a honey pantry section. Mi...
2,Seriously Great Market Solution,1.0,No reviews or website
3,Tn Food & Grocery,4.0,No focus on gourmet
4,Holson's Produce,5.0,No website but maybe.
5,Suns Market,3.0,"Family owned so that's a plus, but gas station..."
6,SC Foods,4.0,I don't know what this is but there's no focus...
7,Colonial Grocery Deli & Bodega,6.5,Ethnic market. Strong focus on local. No clear...
8,Trader Joe's,4.0,Massive chain. Too much red tape.
9,JNM Super Market,1.0,Gas station.


In [98]:
benchmark_results = pd.merge(human_scores,gpt_results,how='left',left_on='Name',right_on='Store Name').drop("Store Name",axis=1)
benchmark_results

Unnamed: 0,Name,Human Score,Human Notes,Place ID,GPT Description,GPT Score,GPT Num Stores,GPT Cost ($)
0,The Fresh Market,8.0,Local market with a focus on gourmet. Several ...,ChIJmb6kSDZ_54gRowDRme47G0k,"The Fresh Market's upscale and gourmet focus, ...",8,159,0.0211
1,The Ancient Olive Gourmet of Winter Garden,8.0,Gourmet pantry with a honey pantry section. Mi...,ChIJyZdkgPuD54gRqtF-E30N4x0,The Ancient Olive Gourmet of Winter Garden spe...,9,4,0.0159
2,Seriously Great Market Solution,1.0,No reviews or website,ChIJu9MkmVF_54gRwZIoka6RoQk,While the provided JSON data lacks detailed in...,7,1,0.008
3,Tn Food & Grocery,4.0,No focus on gourmet,ChIJmfVTDG2D54gRFdvotMCET7U,Tn Food & Grocery appears to be a local store ...,7,1,0.017
4,Holson's Produce,5.0,No website but maybe.,ChIJo5_x_gV554gRp3F5Ml9JGxY,Holson's Produce appears to be a local market ...,7,1,0.015
5,Suns Market,3.0,"Family owned so that's a plus, but gas station...",ChIJwVN3bHOD54gRSlij1UhudVc,"Suns Market appears to be a small, independent...",5,1,0.0177
6,SC Foods,4.0,I don't know what this is but there's no focus...,ChIJ2esYs3yP54gRAarbKINxxME,"SC Foods, being a grocery store, is a potentia...",8,1,0.0081
7,Colonial Grocery Deli & Bodega,6.5,Ethnic market. Strong focus on local. No clear...,ChIJEaAcVKx554gRbVLiyDDehHI,Colonial Grocery Deli & Bodega seems to be a s...,6,1,0.0148
8,Trader Joe's,4.0,Massive chain. Too much red tape.,ChIJbeFK_p5_54gRGvfzUjKvlFA,Trader Joe's is known for its curated selectio...,7,530,0.0253
9,JNM Super Market,1.0,Gas station.,ChIJIxxg6f9-54gRVzC_PSR-wn8,Given the mixed reviews about the store's focu...,2,1,0.0148


#### Export

In [99]:
benchmark_results.to_csv('./Results/benchmark_results_prompt2.csv',index=False)
print("DONE")

DONE


## Print to Screen

In [101]:
for i in benchmark_results.index:

    ## Parse Results
    name = benchmark_results['Name'].loc[i]
    
    gpt_score = benchmark_results['GPT Score'].loc[i]
    gpt_description = benchmark_results['GPT Description'].loc[i]
    gpt_num_stores = benchmark_results['GPT Num Stores'].loc[i]
    gpt_cost = benchmark_results['GPT Cost ($)'].loc[i]
    
    human_score = benchmark_results['Human Score'].loc[i]
    human_description = benchmark_results['Human Notes'].loc[i]

    ## Display with HTML formatting
    display(HTML(f"<strong>Name:</strong> {name}"))
    print("\n")
    display(HTML(f"<strong>GPT Compatibility Score:</strong> {float(gpt_score)}"))
    display(HTML(f"<strong>GPT Description:</strong> {gpt_description}"))
    display(HTML(f"<strong>GPT Number of Stores:</strong> {gpt_num_stores}"))
    print("\n")
    display(HTML(f"<strong>Human Compatibility Score:</strong> {human_score}"))
    display(HTML(f"<strong>Human Description:</strong> {human_description}"))
    print("\n")
    display(HTML(f"<strong>GPT Cost =</strong> ${gpt_cost:.3f}"))
    print("\n\n")











































































































































































#### This model overvalues FL based stores, which all of these inputs are. It's overly optimistic with the potential to stock products without explicit mention.

### Prompt 3: Use explicit data, don't overvalue FL stores. More like Prompt 1 but with estimated store number.

In [106]:
client = OpenAI()

instructions = ("You are a helpful assistant tasked with helping us find grocery stores to stock our honey. "
                "We are a fourth-generation, family-owned apiary based out of northern Florida that specializes "
                "in the production of extremely high quality, mono-floral honey. We are looking for grocers that "
                "specialize in curating gourmet food products, especially from small businesses such as ours. "
                "Your task is to assign a compatibility score from 1-10 to the grocery store based on the {json} "
                "of store data we provide. The output should be a json with the score and a short description "
                "justifying why a given score was received.")

instructions = (
"""
You are a helpful assistant tasked with helping us find grocery stores to stock our honey. We are a fourth-generation, family-owned apiary based out of northern Florida that specializes in the production of extremely high quality, mono-floral honey. We are looking for grocers that specialize in curating gourmet food products, especially from small businesses such as ours.

Your task is to:
1. Assign a compatibility score from 1 to 10, where 10 indicates the highest compatibility with our high-quality, mono-floral honey.
2. Provide a short description justifying the assigned compatibility score. This description should be a few sentences explaining why the grocery store is considered compatible or incompatible with our product.
3. Research and estimate the number of stores in the grocery store chain. This should be an integer value.

Please format your response as a JSON object with the following fields:
- "Compatibility Score": [An integer from 1 to 10 indicating the compatibility score]
- "Compatibility Description": [A short description in a few sentences justifying the assigned score]
- "Estimated Number of Stores": [An integer estimate of the number of stores in the chain]

For example, if the provided JSON data suggests a small, gourmet-focused grocer with a strong emphasis on supporting local small businesses, and it's part of a chain with 15 stores, your output should look something like this:

{
  "Compatibility Score": 9,
  "Compatibility Description": "The grocer's focus on gourmet products and support for local small businesses aligns well with our apiary's high-quality, mono-floral honey, making it a highly compatible partner.",
  "Estimated Number of Stores": 15
}

Note: Do not include any extraneous information such as research notes.
Note: Large chains should be penalized.
Note: Stores that don't explicitly focus on gourmet should be penalized.
"""
)

response = client.chat.completions.create(
    model="gpt-4-1106-preview",
    temperature=0.0,
    messages=[
        {"role": "system", "content": instructions},
        {"role": "user", "content": test_json}
  ]
)

In [107]:
response

ChatCompletion(id='chatcmpl-8xOv8vb0Wq78qlT3ac4In8gYHiPSG', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='```json\n{\n  "Compatibility Score": 8,\n  "Compatibility Description": "The Fresh Market\'s upscale atmosphere and focus on gourmet products, including local produce and prepared eats, suggest a strong alignment with high-quality, mono-floral honey from a small family-owned apiary. Their emphasis on organic and unique food items further supports compatibility. However, as a chain, they may not be as focused on local products as smaller, independent stores.",\n  "Estimated Number of Stores": 159\n}\n```', role='assistant', function_call=None, tool_calls=None))], created=1709167838, model='gpt-4-1106-preview', object='chat.completion', system_fingerprint='fp_7cc080b25b', usage=CompletionUsage(completion_tokens=102, prompt_tokens=1604, total_tokens=1706))

In [108]:
score, description, num_stores, total_cost = parse_chat_completion_2(response)

display(HTML(f"<strong>Score:</strong> {score}"))
display(HTML(f"<strong>Description:</strong> {description}"))
display(HTML(f"<strong>Num Stores:</strong> {num_stores}"))
display(HTML(f"<strong>GPT Cost =</strong> ${total_cost:.3f}"))

input_tokens 1604
output_tokens 102


In [109]:
records = []

for i in tqdm(benchmark_place_results.index):

    ## Convert input to json, in accordance with chat instructions
    input_data = benchmark_place_results.loc[i].to_json()

    ## Get the GPT 4 chat completion object
    ## Temp = 0 for reproducability
    response = client.chat.completions.create(
        model="gpt-4-1106-preview",
        temperature=0.0,
        messages=[
            {"role": "system", "content": instructions},
            {"role": "user", "content": input_data}
        ]
    )

    ## Parse the response
    score, description, num_stores, total_cost = parse_chat_completion_2(response)

    ## Append the record as a dictionary to the list
    records.append({
        'Store Name': benchmark_place_results['Name'].loc[i],
        'Place ID': benchmark_place_results['Place ID'].loc[i],
        'GPT Description': description,
        'GPT Score': score,
        'GPT Num Stores': num_stores,
        'GPT Cost ($)': total_cost
    })

# Convert the list of records into a DataFrame
gpt_results = pd.DataFrame(records)
gpt_results

  0%|          | 0/10 [00:00<?, ?it/s]

input_tokens 1747
output_tokens 92
input_tokens 1295
output_tokens 95
input_tokens 506
output_tokens 91
input_tokens 1395
output_tokens 110
input_tokens 510
output_tokens 88
input_tokens 1140
output_tokens 109
input_tokens 1402
output_tokens 84
input_tokens 1199
output_tokens 100
input_tokens 2155
output_tokens 97
input_tokens 1206
output_tokens 85


Unnamed: 0,Store Name,Place ID,GPT Description,GPT Score,GPT Num Stores,GPT Cost ($)
0,The Fresh Market,ChIJmb6kSDZ_54gRowDRme47G0k,"The Fresh Market's upscale and gourmet focus, ...",8,159,0.0202
1,The Ancient Olive Gourmet of Winter Garden,ChIJyZdkgPuD54gRqtF-E30N4x0,The Ancient Olive Gourmet of Winter Garden spe...,9,4,0.0158
2,Seriously Great Market Solution,ChIJu9MkmVF_54gRwZIoka6RoQk,Without specific information on their focus on...,4,1,0.0078
3,Tn Food & Grocery,ChIJmfVTDG2D54gRFdvotMCET7U,Tn Food & Grocery appears to be a local store ...,4,1,0.0173
4,SC Foods,ChIJ2esYs3yP54gRAarbKINxxME,"SC Foods, being a grocery store, may carry a v...",6,1,0.0077
5,Holson's Produce,ChIJo5_x_gV554gRp3F5Ml9JGxY,Holson's Produce appears to be a market that f...,4,1,0.0147
6,Suns Market,ChIJwVN3bHOD54gRSlij1UhudVc,Suns Market does not appear to specialize in g...,2,1,0.0165
7,Colonial Grocery Deli & Bodega,ChIJEaAcVKx554gRbVLiyDDehHI,While Colonial Grocery Deli & Bodega has a hig...,3,1,0.015
8,Trader Joe's,ChIJbeFK_p5_54gRGvfzUjKvlFA,Trader Joe's offers a variety of organic and g...,5,530,0.0245
9,JNM Super Market,ChIJIxxg6f9-54gRVzC_PSR-wn8,JNM Super Market appears to be more of a conve...,2,1,0.0146


In [111]:
benchmark_results = pd.merge(human_scores,gpt_results,how='left',left_on='Name',right_on='Store Name').drop("Store Name",axis=1)
benchmark_results

Unnamed: 0,Name,Human Score,Human Notes,Place ID,GPT Description,GPT Score,GPT Num Stores,GPT Cost ($)
0,The Fresh Market,8.0,Local market with a focus on gourmet. Several ...,ChIJmb6kSDZ_54gRowDRme47G0k,"The Fresh Market's upscale and gourmet focus, ...",8,159,0.0202
1,The Ancient Olive Gourmet of Winter Garden,8.0,Gourmet pantry with a honey pantry section. Mi...,ChIJyZdkgPuD54gRqtF-E30N4x0,The Ancient Olive Gourmet of Winter Garden spe...,9,4,0.0158
2,Seriously Great Market Solution,1.0,No reviews or website,ChIJu9MkmVF_54gRwZIoka6RoQk,Without specific information on their focus on...,4,1,0.0078
3,Tn Food & Grocery,4.0,No focus on gourmet,ChIJmfVTDG2D54gRFdvotMCET7U,Tn Food & Grocery appears to be a local store ...,4,1,0.0173
4,Holson's Produce,5.0,No website but maybe.,ChIJo5_x_gV554gRp3F5Ml9JGxY,Holson's Produce appears to be a market that f...,4,1,0.0147
5,Suns Market,3.0,"Family owned so that's a plus, but gas station...",ChIJwVN3bHOD54gRSlij1UhudVc,Suns Market does not appear to specialize in g...,2,1,0.0165
6,SC Foods,4.0,I don't know what this is but there's no focus...,ChIJ2esYs3yP54gRAarbKINxxME,"SC Foods, being a grocery store, may carry a v...",6,1,0.0077
7,Colonial Grocery Deli & Bodega,6.5,Ethnic market. Strong focus on local. No clear...,ChIJEaAcVKx554gRbVLiyDDehHI,While Colonial Grocery Deli & Bodega has a hig...,3,1,0.015
8,Trader Joe's,4.0,Massive chain. Too much red tape.,ChIJbeFK_p5_54gRGvfzUjKvlFA,Trader Joe's offers a variety of organic and g...,5,530,0.0245
9,JNM Super Market,1.0,Gas station.,ChIJIxxg6f9-54gRVzC_PSR-wn8,JNM Super Market appears to be more of a conve...,2,1,0.0146


In [112]:
benchmark_results.to_csv('./Results/benchmark_results_prompt3.csv',index=False)
print("DONE")

DONE


In [113]:
for i in benchmark_results.index:

    ## Parse Results
    name = benchmark_results['Name'].loc[i]
    
    gpt_score = benchmark_results['GPT Score'].loc[i]
    gpt_description = benchmark_results['GPT Description'].loc[i]
    gpt_num_stores = benchmark_results['GPT Num Stores'].loc[i]
    gpt_cost = benchmark_results['GPT Cost ($)'].loc[i]
    
    human_score = benchmark_results['Human Score'].loc[i]
    human_description = benchmark_results['Human Notes'].loc[i]

    ## Display with HTML formatting
    display(HTML(f"<strong>Name:</strong> {name}"))
    print("\n")
    display(HTML(f"<strong>GPT Compatibility Score:</strong> {float(gpt_score)}"))
    display(HTML(f"<strong>GPT Description:</strong> {gpt_description}"))
    display(HTML(f"<strong>GPT Number of Stores:</strong> {gpt_num_stores}"))
    print("\n")
    display(HTML(f"<strong>Human Compatibility Score:</strong> {human_score}"))
    display(HTML(f"<strong>Human Description:</strong> {human_description}"))
    print("\n")
    display(HTML(f"<strong>GPT Cost =</strong> ${gpt_cost:.3f}"))
    print("\n\n")











































































































































































### This prompt looks good

### Next: tuning hyperparameters