In [1]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from db import fetch_table_data, get_engine, get_list_products
# from db import get_list_products

In [8]:
DB_FILE = "sqlite:///D:/Study/Projects/BakedBot/baked_bot/backend/bakedbot.db"
engine = get_engine(DB_FILE)
top_n = 5
product_id = 1

In [3]:
products = get_list_products(engine, mode="default")

inside fetch table data
2025-02-21 21:27:23,277 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2025-02-21 21:27:23,278 INFO sqlalchemy.engine.Engine SELECT * FROM products
2025-02-21 21:27:23,278 INFO sqlalchemy.engine.Engine [generated in 0.00089s] ()
2025-02-21 21:27:23,280 INFO sqlalchemy.engine.Engine ROLLBACK


In [4]:
products

[{'id': 1,
  'name': 'Valerian Root Infusion',
  'type': 'snack',
  'description': 'A soothing herbal blend designed for relaxation.',
  'effects': ['mental clarity', 'energy boost'],
  'ingredients': ['Valerian Root', 'Peppermint', 'Hibiscus', 'Green Tea'],
  'price': 48.43,
  'sales_data': "{'units_sold': 140, 'last_month_revenue': 3047.9}"},
 {'id': 2,
  'name': 'Milk Thistle Infusion',
  'type': 'supplement',
  'description': 'A revitalizing drink for energy and focus.',
  'effects': ['immune boost', 'antioxidant-rich'],
  'ingredients': ['Milk Thistle', 'Ashwagandha'],
  'price': 43.19,
  'sales_data': "{'units_sold': 74, 'last_month_revenue': 3383.85}"},
 {'id': 3,
  'name': 'Hibiscus Infusion',
  'type': 'beverage',
  'description': 'A digestive aid made from natural herbs.',
  'effects': ['heart health', 'circulation'],
  'ingredients': ['Hibiscus', 'Elderberry', 'Ashwagandha'],
  'price': 35.27,
  'sales_data': "{'units_sold': 292, 'last_month_revenue': 1875.35}"},
 {'id': 4,


In [21]:
product_texts = [f"{p['description']} {p['effects']} {p['ingredients']}" for p in products]
product_texts[:5]

["A soothing herbal blend designed for relaxation. ['mental clarity', 'energy boost'] ['Valerian Root', 'Peppermint', 'Hibiscus', 'Green Tea']",
 "A revitalizing drink for energy and focus. ['immune boost', 'antioxidant-rich'] ['Milk Thistle', 'Ashwagandha']",
 "A digestive aid made from natural herbs. ['heart health', 'circulation'] ['Hibiscus', 'Elderberry', 'Ashwagandha']",
 "A soothing herbal blend designed for relaxation. ['relaxation', 'stress relief'] ['Lemon Balm', 'Thyme']",
 "A powerful supplement for immunity boost. ['digestive support', 'soothing'] ['Ginseng', 'Sage', 'Ashwagandha']"]

In [22]:
len(product_texts)

30

In [23]:
# Convert text data to TF-IDF vectors
vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix = vectorizer.fit_transform(product_texts)

In [24]:
tfidf_matrix

<Compressed Sparse Row sparse matrix of dtype 'float64'
	with 340 stored elements and shape (30, 69)>

In [25]:
# Get index of the input product
product_index = next((i for i, p in enumerate(products) if p["id"] == product_id), None)

In [26]:
print(product_index)

0


In [27]:
tfidf_matrix[product_index], tfidf_matrix

(<Compressed Sparse Row sparse matrix of dtype 'float64'
 	with 15 stored elements and shape (1, 69)>,
 <Compressed Sparse Row sparse matrix of dtype 'float64'
 	with 340 stored elements and shape (30, 69)>)

In [28]:
# Compute cosine similarity
similarity_scores = cosine_similarity(tfidf_matrix[product_index], tfidf_matrix).flatten()

print(similarity_scores.shape)
print(similarity_scores)

(30,)
[1.         0.10006407 0.08182265 0.36896322 0.112679   0.36391734
 0.0299142  0.14894337 0.17159886 0.1776411  0.14321949 0.04581553
 0.08231759 0.53870917 0.30712078 0.20497792 0.20371127 0.12913272
 0.         0.42201261 0.03529465 0.12851907 0.38356634 0.03972504
 0.1690972  0.33037712 0.19720846 0.04655641 0.20719309 0.35738608]


In [29]:

# Get top N similar product indices (excluding itself)
similar_indices = similarity_scores.argsort()[-(top_n+1):-1][::-1]

print(similar_indices)

[13 19 22  3  5]


In [30]:
# Get recommended products
recommendations = [{"id": products[i]["id"], 
                    "name": products[i]["name"], 
                    "score": float(similarity_scores[i])} for i in similar_indices]

In [34]:
input_product = {"id": products[product_index]["id"], 
                    "name": products[product_index]["name"]}
print("recommendations for ", input_product)
recommendations

recommendations for  {'id': 1, 'name': 'Valerian Root Infusion'}


[{'id': 14, 'name': 'Valerian Root Infusion', 'score': 0.538709172638395},
 {'id': 20, 'name': 'Hibiscus Infusion', 'score': 0.4220126099448059},
 {'id': 23, 'name': 'Sage Infusion', 'score': 0.3835663423182648},
 {'id': 4, 'name': 'Lemon Balm Infusion', 'score': 0.3689632198729137},
 {'id': 6, 'name': 'Sage Infusion', 'score': 0.36391733927055997}]

In [35]:
product_texts

["A soothing herbal blend designed for relaxation. ['mental clarity', 'energy boost'] ['Valerian Root', 'Peppermint', 'Hibiscus', 'Green Tea']",
 "A revitalizing drink for energy and focus. ['immune boost', 'antioxidant-rich'] ['Milk Thistle', 'Ashwagandha']",
 "A digestive aid made from natural herbs. ['heart health', 'circulation'] ['Hibiscus', 'Elderberry', 'Ashwagandha']",
 "A soothing herbal blend designed for relaxation. ['relaxation', 'stress relief'] ['Lemon Balm', 'Thyme']",
 "A powerful supplement for immunity boost. ['digestive support', 'soothing'] ['Ginseng', 'Sage', 'Ashwagandha']",
 "A digestive aid made from natural herbs. ['mental clarity', 'energy boost'] ['Sage', 'Turmeric']",
 "A powerful supplement for immunity boost. ['better sleep', 'calming'] ['Milk Thistle', 'Thyme', 'Sage']",
 "A digestive aid made from natural herbs. ['heart health', 'circulation'] ['Green Tea', 'Ginger']",
 "A powerful supplement for immunity boost. ['better sleep', 'calming'] ['Ginseng', 'T