## Prompt Engineering
- Try different prompts and formatting
- Check Results against human benchmark

In [1]:
import pandas as pd
import numpy as np
import os
from openai import OpenAI
import json
from IPython.display import display, HTML
from tqdm.notebook import tqdm

## Load the Open AI API Key

In [2]:
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file

api_key = os.getenv('OPENAI_API_KEY')

# Check if the API key is loaded
if api_key:
    print("API key is loaded.")
else:
    print("API key is not loaded.")

API key is loaded.


## Load the benchmark data

#### Load the place results

In [3]:
benchmark_place_results = pd.read_csv('./Data/benchmark_input.csv')
benchmark_place_results

Unnamed: 0,Name,Place ID,Description,Website,Address,Phone,Rating,Price,Type,Highlights,Offerings,Q_and_A,Snippets,Reviews_Most_Relevant
0,The Fresh Market,ChIJmb6kSDZ_54gRowDRme47G0k,Upscale grocery store chain with an old-world ...,https://stores.thefreshmarket.com/fl/orlando/38,"5000 Dr Phillips Blvd, Orlando, FL 32819",(407) 294-1516,4.5,$$$,"['Grocery store', 'American grocery store', 'B...",Great produce,Organic products,[{'question': {'user': {'name': 'Anita Caragan...,"[{'snippet': '""Exclusive place to buy good foo...","[{'username': 'Scott Ziegler', 'rating': 5, 'd..."
1,The Ancient Olive Gourmet of Winter Garden,ChIJyZdkgPuD54gRqtF-E30N4x0,,http://www.theancientolive.com/,"125 W Plant St, Winter Garden, FL 34787",(407) 656-6457,4.8,,['Gourmet grocery store'],,,[],"[{'snippet': '""It is good with any kind of foo...","[{'username': 'Mesh G', 'rating': 5, 'descript..."
2,Seriously Great Market Solution,ChIJu9MkmVF_54gRwZIoka6RoQk,,,"8815 Conroy Windermere Rd # 35, Orlando, FL 32835",(321) 251-6383,,,['Grocery store'],,,[],,[]
3,Tn Food & Grocery,ChIJmfVTDG2D54gRFdvotMCET7U,,http://tn-food-grocery.keeq.io/,"66 S Dillard St, Winter Garden, FL 34787",(407) 656-1551,4.3,,['Grocery store'],,,[{'question': {'user': {'name': 'keycha “ ” vi...,"[{'snippet': '""Very friendly staff and the pla...","[{'username': 'Tara G.', 'rating': 5, 'descrip..."
4,SC Foods,ChIJ2esYs3yP54gRAarbKINxxME,,https://alisanc.com/contact-us/,"15701 State Rte 50, Clermont, FL 34711, United...",+1 407-347-8135,,,['Grocery store'],,,[],,[]
5,Holson's Produce,ChIJo5_x_gV554gRp3F5Ml9JGxY,,,"5853 Old Winter Garden Rd, Orlando, FL 32835",(407) 292-5678,4.1,,"['Produce market', 'Meat wholesaler', 'Produce...",,,[],"[{'snippet': '""Service is good, food is good.....","[{'username': 'Angelo M', 'rating': 5, 'descri..."
6,Suns Market,ChIJwVN3bHOD54gRSlij1UhudVc,,,"523 S Dillard St, Winter Garden, FL 34787",(407) 877-3020,4.0,,['Grocery store'],,,[{'question': {'user': {'name': 'Kelly Pounds'...,"[{'snippet': '""Nice experience prices a little...","[{'username': ""Dad's Home Movies"", 'rating': 2..."
7,Colonial Grocery Deli & Bodega,ChIJEaAcVKx554gRbVLiyDDehHI,,,"6339 W Colonial Dr, Orlando, FL 32818",(407) 296-9955,4.7,,['Grocery store'],Great produce,,[],"[{'snippet': '""Very good food! and great choic...","[{'username': 'Robert Brown', 'rating': 5, 'de..."
8,Trader Joe's,ChIJbeFK_p5_54gRGvfzUjKvlFA,Grocery chain with a variety of signature item...,https://locations.traderjoes.com/fl/orlando/76...,"8323 Sand Lake Rd, Orlando, FL 32819",(407) 345-0611,4.7,$$,"['Grocery store', 'Cheese shop', 'Fresh food m...",Great produce,Organic products,"[{'question': {'user': {'name': 'ed m', 'link'...","[{'snippet': '""Great selection, great meats, g...","[{'username': 'Richard Yu', 'rating': 5, 'desc..."
9,JNM Super Market,ChIJIxxg6f9-54gRVzC_PSR-wn8,,,"7521 International Dr, Orlando, FL 32819",(407) 592-4748,3.3,,"['Grocery store', 'Beer store', 'Convenience s...",,,"[{'question': {'user': {'name': 'LINDA RUPP', ...","[{'snippet': '""Great selection and good qualit...","[{'username': 'Anthony Barney', 'rating': 1, '..."


#### Separate a single record for prompt engineering

In [4]:
with open('./Data/test.json', 'r') as file:
    test_json = json.load(file)
test_json

'{"Name":"The Fresh Market","Description":"Upscale grocery store chain with an old-world vibe offers local produce, prepared eats, wine & more.","Website":"https:\\/\\/stores.thefreshmarket.com\\/fl\\/orlando\\/38","Address":"5000 Dr Phillips Blvd, Orlando, FL 32819","Phone":"(407) 294-1516","Rating":4.5,"Price":"$$$","Type":["Grocery store","American grocery store","Bakery","Butcher shop","Fruit and vegetable store","Gourmet grocery store","Market","Organic food store","Produce market","Supermarket"],"Highlights":"Great produce","Offerings":"Organic products","Q_and_A":[{"question":{"user":{"name":"Anita Caragan","link":"https:\\/\\/www.google.com\\/maps\\/contrib\\/113357052055774270348","thumbnail":"https:\\/\\/lh3.googleusercontent.com\\/a\\/ACg8ocKdK8_h2Q72RjHiQIaBRzydgHNYXF_L4fVzUnKZqkzr=s120-c-rp-mo-br100"},"text":"Do you have bake chicken?","date":"4 years ago","language":"en"},"answer":{"user":{"name":"Jeanne M","link":"https:\\/\\/www.google.com\\/maps\\/contrib\\/10327323170

#### Load human scores

In [6]:
human_scores = pd.read_csv('./Data/human_scores.csv')
human_scores

Unnamed: 0,Name,Human Score,Human Notes
0,The Fresh Market,8.0,Local market with a focus on gourmet. Several ...
1,The Ancient Olive Gourmet of Winter Garden,8.0,Gourmet pantry with a honey pantry section. Mi...
2,Seriously Great Market Solution,1.0,No reviews or website
3,Tn Food & Grocery,4.0,No focus on gourmet
4,Holson's Produce,5.0,No website but maybe.
5,Suns Market,3.0,"Family owned so that's a plus, but gas station..."
6,SC Foods,4.0,I don't know what this is but there's no focus...
7,Colonial Grocery Deli & Bodega,6.5,Ethnic market. Strong focus on local. No clear...
8,Trader Joe's,4.0,Massive chain. Too much red tape.
9,JNM Super Market,1.0,Gas station.


## Prompt Engineering

#### Prompt 1: Basic, Unstructured
Relying on GPT4's NLP ability to do a better job than I would do

In [10]:
client = OpenAI()

instructions = ("You are a helpful assistant tasked with helping us find grocery stores to stock our honey. "
                "We are a fourth-generation, family-owned apiary based out of northern Florida that specializes "
                "in the production of extremely high quality, mono-floral honey. We are looking for grocers that "
                "specialize in curating gourmet food products, especially from small businesses such as ours. "
                "Your task is to assign a compatibility score from 1-10 to the grocery store based on the {json} "
                "of store data we provide. The output should be a json with the score and a short description "
                "justifying why a given score was received.")

response = client.chat.completions.create(
    model="gpt-4-1106-preview",
    temperature=0.0,
    messages=[
        {"role": "system", "content": instructions},
        {"role": "user", "content": test_json}
  ]
)

In [17]:
response

ChatCompletion(id='chatcmpl-8gFtWLJ5MMWJq9r1qcZ4oHlFj0SGp', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='```json\n{\n  "Score": 9,\n  "Description": "The Fresh Market\'s upscale and gourmet focus, along with its high customer ratings and emphasis on quality and local produce, makes it an excellent match for a high-quality, mono-floral honey from a small family-owned business. The store\'s offerings of organic products and a variety of gourmet grocery items align well with the target market for specialty honey. The only reason it does not receive a perfect score is due to the lack of explicit mention of a focus on small business products, but the overall vibe and product selection suggest a strong compatibility."\n}\n```', role='assistant', function_call=None, tool_calls=None))], created=1705081566, model='gpt-4-1106-preview', object='chat.completion', system_fingerprint='fp_168383a679', usage=CompletionUsage(completion_tokens=124,

#### Parse the response

In [12]:
def parse_chat_completion(response):
    """
    Parses a chat completion response to extract score, description, and calculate the cost.

    Args:
    response (ChatCompletion): A response object containing the completion message and usage data.

    Returns:
    tuple: A tuple containing the score, description, and cost of the chat completion.
    """

    # Extracting and cleaning the JSON string for score and description
    json_response = response.choices[0].message.content
    cleaned_json_string = json_response.strip('```json\n').rstrip('\n```')

    # Parse the JSON string
    data = json.loads(cleaned_json_string)

    # Extracting score and description
    score = data.get('Score')
    description = data.get('Description')

    # Cost calculation
    cost_per_input_token = 0.01 / 1000
    cost_per_output_token = 0.03 / 1000
    input_tokens = response.usage.prompt_tokens
    output_tokens = response.usage.completion_tokens
    cost = round((input_tokens * cost_per_input_token) + (output_tokens * cost_per_output_token), 4)

    return score, description, cost

# Example usage
# Assuming 'response' is a valid ChatCompletion object
# score, description, total_cost = analyze_chat_completion(response)
# print("Score:", score)
# print("Description:", description)
# print(f"Total Cost = ${total_cost:.3f}")

In [75]:
score, description, total_cost = parse_chat_completion(response)

display(HTML(f"<strong>Score:</strong> {score}"))
display(HTML(f"<strong>Description:</strong> {description}"))
display(HTML(f"<strong>GPT Cost =</strong> ${total_cost:.3f}"))

## Repeat for all locations

In [13]:
records = []

for i in tqdm(place_data_df.index):

    ## Convert input to json, in accordance with chat instructions
    input_data = place_data_df.loc[i].to_json()

    ## Get the GPT 4 chat completion object
    ## Temp = 0 for reproducability
    response = client.chat.completions.create(
        model="gpt-4-1106-preview",
        temperature=0.0,
        messages=[
            {"role": "system", "content": instructions},
            {"role": "user", "content": input_data}
        ]
    )

    ## Parse the response
    score, description, total_cost = parse_chat_completion(response)

    ## Append the record as a dictionary to the list
    records.append({
        'Store Name': place_data_df['Name'].loc[i],
        'Place ID': place_data_df['Place ID'].loc[i],
        'GPT Description': description,
        'GPT Score': score,
        'GPT Cost ($)': total_cost
    })

# Convert the list of records into a DataFrame
gpt_results = pd.DataFrame(records)
gpt_results

  0%|          | 0/10 [00:00<?, ?it/s]

Unnamed: 0,Store Name,Place ID,GPT Description,GPT Score,GPT Cost ($)
0,The Fresh Market,ChIJmb6kSDZ_54gRowDRme47G0k,The Fresh Market appears to be an excellent ma...,9,0.0186
1,The Ancient Olive Gourmet of Winter Garden,ChIJyZdkgPuD54gRqtF-E30N4x0,The Ancient Olive Gourmet of Winter Garden is ...,9,0.0156
2,Seriously Great Market Solution,ChIJu9MkmVF_54gRwZIoka6RoQk,Seriously Great Market Solution is listed as a...,2,0.0056
3,Tn Food & Grocery,ChIJmfVTDG2D54gRFdvotMCET7U,Tn Food & Grocery has a community-focused atmo...,5,0.0157
4,SC Foods,ChIJ2esYs3yP54gRAarbKINxxME,SC Foods is listed as a 'Grocery store' withou...,2,0.0059
5,Holson's Produce,ChIJo5_x_gV554gRp3F5Ml9JGxY,Holson's Produce appears to be a well-regarded...,7,0.0135
6,Suns Market,ChIJwVN3bHOD54gRSlij1UhudVc,"Suns Market appears to be a small, local groce...",3,0.015
7,Colonial Grocery Deli & Bodega,ChIJEaAcVKx554gRbVLiyDDehHI,Colonial Grocery Deli & Bodega has a high cust...,6,0.0135
8,Trader Joe's,ChIJbeFK_p5_54gRGvfzUjKvlFA,Trader Joe's has a strong reputation for quali...,7,0.0231
9,JNM Super Market,ChIJIxxg6f9-54gRVzC_PSR-wn8,JNM Super Market appears to be more of a conve...,2,0.0129


#### Join Human Results

In [14]:
human_scores

Unnamed: 0,Name,Human Score,Human Notes
0,The Fresh Market,7.0,Curates artisans but a bit of a chain. Lots of...
1,The Ancient Olive Gourmet of Winter Garden,8.0,Gourmet pantry with a honey pantry section. Mi...
2,Seriously Great Market Solution,1.0,No reviews or website
3,Tn Food & Grocery,4.0,No focus on gourmet
4,Holson's Produce,5.0,No website but maybe.
5,Suns Market,3.0,"Family owned so that's a plus, but gas station..."
6,SC Foods,4.0,I don't know what this is but there's no focus...
7,Colonial Grocery Deli & Bodega,6.5,Ethnic market. Strong focus on local. No clear...
8,Trader Joe's,4.0,Massive chain. Too much red tape.
9,JNM Super Market,1.0,Gas station.


In [23]:
benchmark_results = pd.merge(human_scores,gpt_results,how='left',left_on='Name',right_on='Store Name').drop("Store Name",axis=1)
benchmark_results

Unnamed: 0,Name,Human Score,Human Notes,Place ID,GPT Description,GPT Score,GPT Cost ($)
0,The Fresh Market,8.0,Local market with a focus on gourmet. Several ...,ChIJmb6kSDZ_54gRowDRme47G0k,The Fresh Market appears to be an excellent ma...,9,0.0186
1,The Ancient Olive Gourmet of Winter Garden,8.0,Gourmet pantry with a honey pantry section. Mi...,ChIJyZdkgPuD54gRqtF-E30N4x0,The Ancient Olive Gourmet of Winter Garden is ...,9,0.0156
2,Seriously Great Market Solution,1.0,No reviews or website,ChIJu9MkmVF_54gRwZIoka6RoQk,Seriously Great Market Solution is listed as a...,2,0.0056
3,Tn Food & Grocery,4.0,No focus on gourmet,ChIJmfVTDG2D54gRFdvotMCET7U,Tn Food & Grocery has a community-focused atmo...,5,0.0157
4,Holson's Produce,5.0,No website but maybe.,ChIJo5_x_gV554gRp3F5Ml9JGxY,Holson's Produce appears to be a well-regarded...,7,0.0135
5,Suns Market,3.0,"Family owned so that's a plus, but gas station...",ChIJwVN3bHOD54gRSlij1UhudVc,"Suns Market appears to be a small, local groce...",3,0.015
6,SC Foods,4.0,I don't know what this is but there's no focus...,ChIJ2esYs3yP54gRAarbKINxxME,SC Foods is listed as a 'Grocery store' withou...,2,0.0059
7,Colonial Grocery Deli & Bodega,6.5,Ethnic market. Strong focus on local. No clear...,ChIJEaAcVKx554gRbVLiyDDehHI,Colonial Grocery Deli & Bodega has a high cust...,6,0.0135
8,Trader Joe's,4.0,Massive chain. Too much red tape.,ChIJbeFK_p5_54gRGvfzUjKvlFA,Trader Joe's has a strong reputation for quali...,7,0.0231
9,JNM Super Market,1.0,Gas station.,ChIJIxxg6f9-54gRVzC_PSR-wn8,JNM Super Market appears to be more of a conve...,2,0.0129


#### Export

In [18]:
benchmark_results.to_csv('./Results/benchmark_results.csv',index=False)
print("DONE")

DONE


## Print to Screen

In [26]:
for i in benchmark_results.index:

    ## Parse Results
    name = benchmark_results['Name'].loc[i]
    
    gpt_score = benchmark_results['GPT Score'].loc[i]
    gpt_description = benchmark_results['GPT Description'].loc[i]
    gpt_cost = benchmark_results['GPT Cost ($)'].loc[i]
    
    human_score = benchmark_results['Human Score'].loc[i]
    human_description = benchmark_results['Human Notes'].loc[i]

    ## Display with HTML formatting
    display(HTML(f"<strong>Name:</strong> {name}"))
    print("\n")
    display(HTML(f"<strong>GPT Compatibility Score:</strong> {float(gpt_score)}"))
    display(HTML(f"<strong>GPT Description:</strong> {gpt_description}"))
    print("\n")
    display(HTML(f"<strong>Human Compatibility Score:</strong> {human_score}"))
    display(HTML(f"<strong>Human Description:</strong> {human_description}"))
    print("\n")
    display(HTML(f"<strong>GPT Cost =</strong> ${gpt_cost:.3f}"))
    print("\n\n")











































































































































































## That looks good, let's do it for all place data
### There are a lot of hyperparameters to tune
- How zoomed in the map search is (z). We want more unique place IDs where type includes "Gourmet Grocery Store"
- Pagination parameter (p) which specifies how many pages to keep.
- The initial latitude and longitudes to pass for location. How dense of a grid? Should we search high density populations first?

### Next: tuning hyperparameters