In [1]:
# Import necessary libraries
import openai
from openai.embeddings_utils import get_embedding, cosine_similarity
import pandas as pd
import numpy as np

In [2]:
# Load the csv file as dataframe.
df = pd.read_csv('./data/grocery_store_items.csv')
df.head(10)

Unnamed: 0,Item
0,Milk
1,Bread
2,Eggs
3,Cheese
4,Yogurt
5,Butter
6,Cereal
7,Pasta
8,Rice
9,Flour


In [3]:
# Configure the baseline configuration of the OpenAI library for Azure OpenAI Service.
openai.api_type = "azure"
openai.api_base = "https://PLESAE_ENTER_YOUR_OWNED_AOAI_RESOURCE_NAME.openai.azure.com/"
openai.api_version = "2022-12-01"
openai.api_key = "PLEASE_ENTER_YOUR_OWNED_AOAI_SERVICE_KEY"
engine = "PLEASE_ENTER_YOUR_OWNED_AOAI_TEXT_EMBEDDING_MODEL_NAME"

In [4]:
# Perform word embedding for each item name in the dataframe in vector form and save it as a new column "embedding".
df['embedding'] = df['Item'].apply(lambda x: get_embedding(x, engine=engine))

In [5]:
# Save the dataframe with the word embedding result as a CSV file.
df.to_csv('./data/grocery_store_items_word_embeddings.csv')

In [6]:
# Reload the item CSV file with the word embedding result.
df = pd.read_csv('./data/grocery_store_items_word_embeddings.csv')
df['embedding'] = df['embedding'].apply(eval).apply(np.array)

In [7]:
# Display the top 10 rows from the dataframe.
df.head(10)

Unnamed: 0.1,Unnamed: 0,Item,embedding
0,0,Milk,"[0.007104953285306692, -0.012599196285009384, ..."
1,1,Bread,"[0.017689064145088196, -0.015538378618657589, ..."
2,2,Eggs,"[-0.003550312714651227, -0.01650831289589405, ..."
3,3,Cheese,"[0.010652019642293453, 0.0009668035199865699, ..."
4,4,Yogurt,"[0.001584541518241167, -0.014242922887206078, ..."
5,5,Butter,"[-0.00777808390557766, -0.024573862552642822, ..."
6,6,Cereal,"[3.560942423064262e-05, -0.00978065375238657, ..."
7,7,Pasta,"[0.02129378356039524, -0.002485408913344145, 0..."
8,8,Rice,"[0.01777689903974533, -0.015344271436333656, 0..."
9,9,Flour,"[0.011583199724555016, -0.0026128862518817186,..."


In [8]:
# Making input prompt for entering the search terms for semantic search.
search_terms = input('Please enter search terms: ')

Please enter search terms: hotdog


In [9]:
# Perform word embedding for the search terms.
search_terms_vector = get_embedding(search_terms, engine=engine)
search_terms_vector

[-0.018107039853930473,
 -0.004403006751090288,
 -0.013508632779121399,
 -0.02808544971048832,
 -0.006405853666365147,
 0.009020952507853508,
 -0.03475509211421013,
 -0.020829608663916588,
 -0.01857599802315235,
 -0.023187430575489998,
 0.025454068556427956,
 0.027824917808175087,
 -0.017364520579576492,
 0.007327488623559475,
 -0.01356073934584856,
 0.0029212255030870438,
 0.03954889997839928,
 -0.014055752195417881,
 0.025362880900502205,
 0.002917968900874257,
 -0.009060032665729523,
 -0.0012090356322005391,
 0.013599819503724575,
 -0.006484013516455889,
 0.0011243624612689018,
 -7.734571408946067e-05,
 0.01559289637953043,
 -0.011001002974808216,
 -0.003412981517612934,
 -0.0023871329613029957,
 0.04199790954589844,
 0.004757982678711414,
 -0.021858712658286095,
 -0.007093009073287249,
 -0.005122728645801544,
 -0.015084857121109962,
 -0.0022422114852815866,
 -0.00700182281434536,
 0.01001097820699215,
 -0.007119062356650829,
 0.005226941779255867,
 -0.005103189032524824,
 -0.003676

In [10]:
# Calculate the cosine similarity between each item name and the search terms.
df["similarity"] = df['embedding'].apply(lambda x: cosine_similarity(x, search_terms_vector))

In [11]:
# Display the top 10 semantic-related items by highest similarity.
df.sort_values("similarity", ascending=False).head(10)

Unnamed: 0.1,Unnamed: 0,Item,embedding,similarity
37,37,Sausage,"[0.01726933754980564, 0.015029799193143845, -0...",0.868639
15,15,Ketchup,"[0.0023621052969247103, -0.002763488097116351,...",0.850977
19,19,Hot sauce,"[-0.0020702260080724955, -0.008981933817267418...",0.848937
36,36,Bacon,"[0.009423656389117241, -0.010492254048585892, ...",0.847789
16,16,Mustard,"[-0.027545463293790817, -0.020024826750159264,...",0.840137
33,33,Ground beef,"[0.00046088005183264613, -0.002725133905187249...",0.837529
3,3,Cheese,"[0.010652019642293453, 0.0009668035199865699, ...",0.831982
40,40,Shrimp,"[0.015972528606653214, -0.010936202481389046, ...",0.831027
7,7,Pasta,"[0.02129378356039524, -0.002485408913344145, 0...",0.824768
35,35,Pork chops,"[0.008955670520663261, -0.005776628386229277, ...",0.824415
