In [4]:
from pinecone import Pinecone, ServerlessSpec
from sentence_transformers import SentenceTransformer

  from tqdm.autonotebook import tqdm


# Test Embedding

In [5]:
model = SentenceTransformer('all-mpnet-base-v2')

In [10]:
texts = ["Hello world!", "What is up"]
embeddings = model.encode(texts)

print("Embedding Shape:\n", embeddings.shape)

Embedding Shape:
 (2, 768)


# Data Wrangling

In [7]:
import pandas as pd

In [8]:
df = pd.read_json("../products/products.jsonl", lines=True)
df.head()

Unnamed: 0,name,category,description,ingredients,price,rating,image_path
0,Cappuccino,Coffee,A rich and creamy cappuccino made with freshly...,"[Espresso, Steamed Milk, Milk Foam]",4.5,4.7,cappuccino.jpg
1,Jumbo Savory Scone,Bakery,"Deliciously flaky and buttery, this jumbo savo...","[Flour, Butter, Cheese, Herbs, Baking Powder, ...",3.25,4.3,SavoryScone.webp
2,Latte,Coffee,"Smooth and creamy, our latte combines rich esp...","[Espresso, Steamed Milk, Milk Foam]",4.75,4.8,Latte.jpg
3,Chocolate Chip Biscotti,Bakery,"Crunchy and delightful, this chocolate chip bi...","[Flour, Sugar, Chocolate Chips, Eggs, Almonds,...",2.5,4.6,chocolat_biscotti.jpg
4,Espresso shot,Coffee,"A bold shot of rich espresso, our espresso is ...",[Espresso],2.0,4.9,Espresso_shot.webp


In [11]:
df['text'] =  df['name']+ ": "+ df['description'] + \
                " -- Ingredients: " + df['ingredients'].astype(str) + \
                " -- Price: " + df['price'].astype(str) + \
                " -- rating: " + df['rating'].astype(str)

In [12]:
df['text'].head()

0    Cappuccino: A rich and creamy cappuccino made ...
1    Jumbo Savory Scone: Deliciously flaky and butt...
2    Latte: Smooth and creamy, our latte combines r...
3    Chocolate Chip Biscotti: Crunchy and delightfu...
4    Espresso shot: A bold shot of rich espresso, o...
Name: text, dtype: object

In [16]:
print(df['text'][0])

Cappuccino: A rich and creamy cappuccino made with freshly brewed espresso, steamed milk, and a frothy milk cap. This delightful drink offers a perfect balance of bold coffee flavor and smooth milk, making it an ideal companion for relaxing mornings or lively conversations. -- Ingredients: ['Espresso', 'Steamed Milk', 'Milk Foam'] -- Price: 4.5 -- rating: 4.7


In [17]:
texts = df['text'].to_list()
texts[:3]

["Cappuccino: A rich and creamy cappuccino made with freshly brewed espresso, steamed milk, and a frothy milk cap. This delightful drink offers a perfect balance of bold coffee flavor and smooth milk, making it an ideal companion for relaxing mornings or lively conversations. -- Ingredients: ['Espresso', 'Steamed Milk', 'Milk Foam'] -- Price: 4.5 -- rating: 4.7",
 "Jumbo Savory Scone: Deliciously flaky and buttery, this jumbo savory scone is filled with herbs and cheese, creating a mouthwatering experience. Perfect for a hearty snack or a light lunch, it pairs beautifully with your favorite coffee or tea. -- Ingredients: ['Flour', 'Butter', 'Cheese', 'Herbs', 'Baking Powder', 'Salt'] -- Price: 3.25 -- rating: 4.3",
 "Latte: Smooth and creamy, our latte combines rich espresso with velvety steamed milk, creating a perfect balance of flavor and texture. Enjoy it as a comforting treat any time of day, whether you're starting your morning or taking a midday break. -- Ingredients: ['Espresso

In [19]:
with open("../products/about_us.txt", 'r') as f:
    about_us = f.read()

about_us = "Airlangga's Coffee About Us Section: " + about_us

texts.append(about_us)

In [None]:
with open("../products/menu.txt", 'r') as f:
    menu = f.read()

menu = "Menu Items: " + menu

texts.append(menu)

In [22]:
texts[-2:]

["Airlangga's Coffee About Us Section: Welcome to Airlangga's Coffee, your neighborhood coffee shop located in the heart of Kemang, Jakarta. At Airlangga's, we believe that coffee is more than just a drink—it’s an experience, a moment of joy, and a way to connect with others.\n\nOur Story\nFounded in 2015, Airlangga's started as a small family-owned café with one mission: to share the love of quality, ethically-sourced coffee with our community.\n\nAirlangga's passion for travel and coffee led him on a journey across Borneo, where he handpicked partnerships with small farms and cooperatives. We ensure that every cup we brew tells a story of dedication and care, from farm to table. Our beans are roasted in-house to bring out unique flavors that reflect the regions where they were grown.\n\nDelivery & Locations Served\nIn addition to offering a cozy place to enjoy coffee in our café, we proudly deliver to Greenwich Village, SoHo, West Village, and Lower Manhattan. Whether you’re at home,

In [23]:
texts

["Cappuccino: A rich and creamy cappuccino made with freshly brewed espresso, steamed milk, and a frothy milk cap. This delightful drink offers a perfect balance of bold coffee flavor and smooth milk, making it an ideal companion for relaxing mornings or lively conversations. -- Ingredients: ['Espresso', 'Steamed Milk', 'Milk Foam'] -- Price: 4.5 -- rating: 4.7",
 "Jumbo Savory Scone: Deliciously flaky and buttery, this jumbo savory scone is filled with herbs and cheese, creating a mouthwatering experience. Perfect for a hearty snack or a light lunch, it pairs beautifully with your favorite coffee or tea. -- Ingredients: ['Flour', 'Butter', 'Cheese', 'Herbs', 'Baking Powder', 'Salt'] -- Price: 3.25 -- rating: 4.3",
 "Latte: Smooth and creamy, our latte combines rich espresso with velvety steamed milk, creating a perfect balance of flavor and texture. Enjoy it as a comforting treat any time of day, whether you're starting your morning or taking a midday break. -- Ingredients: ['Espresso

# Generate Embeddings

In [24]:
embeddings = model.encode(texts)
embeddings.shape

(20, 768)

# Push Embeddings to Pinecone

In [12]:
from os import getenv
from dotenv import load_dotenv
load_dotenv()

pinecone_api = getenv("PINECONE_API_KEY")

pc = Pinecone(pinecone_api)

In [31]:
index_name = "coffeeshop"

pc.create_index(
    name=index_name,
    dimension=768,
    spec=ServerlessSpec(
        cloud="aws",
        region="us-east-1"
    )
)

In [32]:
import time

In [47]:
while not pc.describe_index(index_name).status['ready']:
    time.sleep(1)

index = pc.Index(index_name)

vectors = []
for text, embedding in zip(texts, embeddings):
    id_ = text.split(":", 1)[0].strip()
    vectors.append(
        {
            "id": id_,
            "values": embedding.tolist(),
            "metadata": {"text": text}
        }
    )

index.upsert(vectors, "ns1")

{'upserted_count': 20}

# Pinecone Query Test

In [51]:
text = "is Cappucino lactose-free?"
embedding = model.encode(text).tolist()

results = index.query(
    namespace="ns1",
    vector=embedding,
    top_k=3,
    include_values=False,
    include_metadata=True
)

In [52]:
results

{'matches': [{'id': 'Cappuccino',
              'metadata': {'text': 'Cappuccino: A rich and creamy cappuccino '
                                   'made with freshly brewed espresso, steamed '
                                   'milk, and a frothy milk cap. This '
                                   'delightful drink offers a perfect balance '
                                   'of bold coffee flavor and smooth milk, '
                                   'making it an ideal companion for relaxing '
                                   'mornings or lively conversations. -- '
                                   "Ingredients: ['Espresso', 'Steamed Milk', "
                                   "'Milk Foam'] -- Price: 4.5 -- rating: 4.7"},
              'score': 0.57724,
              'values': []},
             {'id': 'Latte',
              'metadata': {'text': 'Latte: Smooth and creamy, our latte '
                                   'combines rich espresso with velvety '
                       

# Test Embedding and Query again...

In [24]:
model_id = 'sentence-transformers/all-mpnet-base-v2'

In [1]:
from dotenv import load_dotenv
from os import getenv
load_dotenv()

hf_token = getenv("HUGGINGFACE_TOKEN")

In [33]:
text = "What is the price of Latte?"

In [None]:
from huggingface_hub import InferenceClient

client = InferenceClient(
    provider="hf-inference",
    api_key=hf_token
)

result = client.feature_extraction(
    text,
    model=model_id
)

In [35]:
result.shape

[0.05961970239877701,
 0.02424035780131817,
 -0.03364836424589157,
 0.012166108004748821,
 0.016975505277514458,
 -0.007732566446065903,
 -0.023220505565404892,
 -0.027835432440042496,
 0.03215807303786278,
 0.027302389964461327,
 0.008432893082499504,
 0.08172070980072021,
 0.013028438203036785,
 0.09350969642400742,
 -0.0038549616001546383,
 -0.03309900686144829,
 0.03708656132221222,
 0.017706986516714096,
 0.03436070680618286,
 0.024867618456482887,
 -0.017529703676700592,
 0.004071589093655348,
 0.006070970557630062,
 0.03639230132102966,
 0.04265230521559715,
 -0.024989798665046692,
 -0.041875455528497696,
 0.014513058587908745,
 0.0267497431486845,
 -0.040692903101444244,
 0.046621035784482956,
 -0.00874544307589531,
 -0.07158326357603073,
 0.005051965359598398,
 2.396885292910156e-06,
 -0.014517401345074177,
 -0.008473948575556278,
 -6.0851267335237935e-05,
 -0.05577762424945831,
 -0.026751700788736343,
 0.05079662799835205,
 0.07668475806713104,
 -0.02345472015440464,
 0.00250

In [16]:
text = "do you have packged chocolate?"
embedding = model.encode(text).tolist()

index = pc.Index("coffeeshop")

results = index.query(
    namespace="ns1",
    vector=embedding,
    top_k=3,
    include_values=False,
    include_metadata=True
)

In [17]:
results

{'matches': [{'id': 'Dark chocolate (Packaged Chocolate)',
              'metadata': {'text': 'Dark chocolate (Packaged Chocolate): Rich '
                                   'and indulgent dark chocolate in convenient '
                                   'packaged form, perfect for gifting or '
                                   'enjoying anytime. Made with premium cocoa '
                                   'for a smooth, intense flavor experience. '
                                   "-- Ingredients: ['Cocoa Mass', 'Sugar', "
                                   "'Cocoa Butter', 'Vanilla Extract'] -- "
                                   'Price: 3.0 -- rating: 4.7'},
              'score': 0.495185852,
              'values': []},
             {'id': 'Dark chocolate',
              'metadata': {'text': 'Dark chocolate: Rich and indulgent, our '
                                   'dark chocolate drinking chocolate is made '
                                   'with premium cocoa. This luxu