In [1]:
%load_ext autoreload
%autoreload 2

import pandas as pd
from superlinked import framework as sl

from superlinked_app import index, query
from superlinked_app.config import settings

settings.validate_processed_dataset_exists()

  from .autonotebook import tqdm as notebook_tqdm
[32m2024-12-11 18:15:50.870[0m | [1mINFO    [0m | [36msuperlinked_app.config[0m:[36m<module>[0m:[36m9[0m - [1mLoading '.env' file from: /Users/pauliusztin/Documents/01_projects/hands-on-retrieval/.env[0m


In [2]:
source: sl.InMemorySource = sl.InMemorySource(
    index.product,
    parser=sl.DataFrameParser(schema=index.product, mapping={index.product.id: "asin"}),
)
executor = sl.InMemoryExecutor(sources=[source], indices=[index.product_index])
app = executor.run()

In [3]:
df = pd.read_json(settings.PROCESSED_DATASET_PATH, lines=True)
df.head()

Unnamed: 0,asin,type,title,description,price,review_rating,review_count
0,B07WP4RXHY,product,YUEPIN U-Tube Clamp 304 Stainless Steel Hose P...,Product Description Specification: Material: 3...,9.99,4.7,54
1,B07VRZTK2N,product,"Apron for Women, Waterproof Adjustable Bib Coo...",,11.99,4.0,152
2,B07V2F5SN1,product,DIY 5D Diamond Painting by Number Kit for Adul...,Product Description 5D DIY Diamond Painting is...,9.99,4.6,378
3,B08GM4LMKN,product,Órgano bucal de armónica trémolo de 24 agujero...,Descripción del producto Especificación: Tipo ...,25.63,4.6,9
4,B00MNLQQ7K,product,"Design Toscano QM2787100 Darby, the Forest Faw...",,40.72,4.7,274


In [4]:
len(df)

300

In [5]:
source.put([df])

pd.set_option("display.max_colwidth", 500)

## Query books using filters

In [6]:
results = app.query(
    query.filter_query,
    natural_query="books with a price lower than 100",
    limit=3,
)
results.knn_params

{'description_weight': 1.0,
 'stars_maximizer_weight': 0.0,
 'price_minimizer_weights': 0.0,
 'limit': 3,
 'natural_query': 'books with a price lower than 100',
 'filter_by_type': 'book',
 'query_text': 'books',
 'rating_bigger_than': None,
 'price_smaller_than': 100.0,
 'radius_param': None,
 'description_similar_clause_weight': 1.0}

In [7]:
results.to_pandas()

Unnamed: 0,type,title,description,review_rating,review_count,price,id,similarity_score,rank
0,book,"Stables: Beautiful Paddocks, Horse Barns, and Tack Rooms",,4.7,100,53.1,0847833143,0.532175,0
1,book,The Norton Introduction to Literature,,4.5,662,76.5,039393893X,0.532175,1
2,book,An Actor's Work: A Student's Diary,,4.5,105,32.35,041542223X,0.532175,2


In [8]:
results = app.query(
    query.filter_query,
    natural_query="books with a price lower than 100 and a rating bigger than 4",
    limit=3,
)
results.knn_params

{'description_weight': 1.0,
 'stars_maximizer_weight': 0.0,
 'price_minimizer_weights': 0.0,
 'limit': 3,
 'natural_query': 'books with a price lower than 100 and a rating bigger than 4',
 'filter_by_type': 'book',
 'query_text': 'books',
 'rating_bigger_than': 4.0,
 'price_smaller_than': 100.0,
 'radius_param': None,
 'description_similar_clause_weight': 1.0}

In [9]:
results.to_pandas()

Unnamed: 0,type,title,description,review_rating,review_count,price,id,similarity_score,rank
0,book,"Stables: Beautiful Paddocks, Horse Barns, and Tack Rooms",,4.7,100,53.1,0847833143,0.532175,0
1,book,The Norton Introduction to Literature,,4.5,662,76.5,039393893X,0.532175,1
2,book,An Actor's Work: A Student's Diary,,4.5,105,32.35,041542223X,0.532175,2


## Query books leveraving semantic meaning

In [10]:
results = app.query(
    query.semantic_query,
    natural_query="books with a price lower than 100",
    limit=3,
)
results.knn_params

{'description_weight': 0.0,
 'stars_maximizer_weight': 0.0,
 'price_minimizer_weights': 1.0,
 'limit': 3,
 'natural_query': 'books with a price lower than 100',
 'filter_by_type': 'book',
 'query_text': 'books',
 'query_price': 100.0,
 'query_review_rating': 0.0,
 'radius_param': None,
 'description_similar_clause_weight': 1.0,
 'price_similar_clause_weight': 1.0,
 'review_rating_similar_clause_weight': 1.0}

In [11]:
results.to_pandas()

Unnamed: 0,type,title,description,review_rating,review_count,price,id,similarity_score,rank
0,book,All Aboard! New York: A City Primer,,4.6,74,9.99,1423640748,0.999877,0
1,book,"Feminist Baby (Feminist Baby, 4)",,4.5,623,10.59,1484778588,0.999862,1
2,book,"The Mindful Dragon: A Dragon Book about Mindfulness. Teach Your Dragon To Be Mindful. A Cute Children Story to Teach Kids about Mindfulness, Focus and Peace. (My Dragon Books)",,4.7,623,11.69,1948040107,0.999831,2


In [12]:
results = app.query(
    query.semantic_query,
    natural_query="books with a price lower than 100 and a rating bigger than 4",
    limit=3,
)
results.knn_params

{'description_weight': 0.0,
 'stars_maximizer_weight': 1.0,
 'price_minimizer_weights': 1.0,
 'limit': 3,
 'natural_query': 'books with a price lower than 100 and a rating bigger than 4',
 'filter_by_type': 'book',
 'query_text': '',
 'query_price': 100.0,
 'query_review_rating': 4.0,
 'radius_param': None,
 'description_similar_clause_weight': 0.0,
 'price_similar_clause_weight': 1.0,
 'review_rating_similar_clause_weight': 1.0}

In [13]:
results.to_pandas()

Unnamed: 0,type,title,description,review_rating,review_count,price,id,similarity_score,rank
0,book,"The Mindful Dragon: A Dragon Book about Mindfulness. Teach Your Dragon To Be Mindful. A Cute Children Story to Teach Kids about Mindfulness, Focus and Peace. (My Dragon Books)",,4.7,623,11.69,1948040107,0.99645,0
1,book,"Build Your Running Body (A Total-Body Fitness Plan for All Distance Runners, from Milers to Ultramarathoners—Run Farther, Faster, and Injury-Free)",,4.7,573,13.49,161519102X,0.996422,1
2,book,"Stables: Beautiful Paddocks, Horse Barns, and Tack Rooms",,4.7,100,53.1,0847833143,0.994796,2


In [14]:
results = app.query(
    query.semantic_query,
    natural_query="Return the top 5 books (along with their review count and price) with the highest reviews rating.",
    limit=3,
)
results.knn_params

{'description_weight': 0.0,
 'stars_maximizer_weight': 1.0,
 'price_minimizer_weights': 0.5,
 'limit': 3,
 'natural_query': 'Return the top 5 books (along with their review count and price) with the highest reviews rating.',
 'filter_by_type': 'book',
 'query_text': 'books',
 'query_price': 0.0,
 'query_review_rating': 5.0,
 'radius_param': None,
 'description_similar_clause_weight': 1.0,
 'price_similar_clause_weight': 1.0,
 'review_rating_similar_clause_weight': 1.0}

In [15]:
results.to_pandas()

Unnamed: 0,type,title,description,review_rating,review_count,price,id,similarity_score,rank
0,book,"The Mindful Dragon: A Dragon Book about Mindfulness. Teach Your Dragon To Be Mindful. A Cute Children Story to Teach Kids about Mindfulness, Focus and Peace. (My Dragon Books)",,4.7,623,11.69,1948040107,0.944246,0
1,book,"Build Your Running Body (A Total-Body Fitness Plan for All Distance Runners, from Milers to Ultramarathoners—Run Farther, Faster, and Injury-Free)",,4.7,573,13.49,161519102X,0.944228,1
2,book,"Stables: Beautiful Paddocks, Horse Barns, and Tack Rooms",,4.7,100,53.1,0847833143,0.9432,2


## Find similar books based on a given book

In [16]:
df[df["asin"] == "B07WP4RXHY"]["description"]

0    Product Description Specification: Material: 304 Stainless Steel,100% New Rubber Color: Silver Shape: U Shape Quantity: 10 Pieces Note: Note: Since the size above is measured by hand, the size of the actual item you received could be slightly different from the size above. Product Description Specification: Material: 304 Stainless Steel,100% New Rubber Color: Silver Shape: U Shape Quantity: 10 Pieces Note: Note: Since the size above is measured by hand, the size of the actual item you receiv...
Name: description, dtype: object

In [17]:
results = app.query(
    query.similar_items_query,
    natural_query="similar books to B07WP4RXHY with a rating bigger than 4.5",
    limit=3,
)
results.knn_params

{'description_weight': 1.0,
 'stars_maximizer_weight': 1.0,
 'price_minimizer_weights': 1.0,
 'limit': 3,
 'natural_query': 'similar books to B07WP4RXHY with a rating bigger than 4.5',
 'filter_by_type': None,
 'query_text': 'similar books',
 'rating_bigger_than': 4.5,
 'price_smaller_than': None,
 'product_id': 'B07WP4RXHY',
 'radius_param': None,
 'description_similar_clause_weight': 1.0,
 'with_vector_id_weight_param': 1.0}

In [18]:
results.to_pandas()

Unnamed: 0,type,title,description,review_rating,review_count,price,id,similarity_score,rank
0,product,"YUEPIN U-Tube Clamp 304 Stainless Steel Hose Pipe Cable Strap Clips With Rubber Cushioned (1-21/32""(42mm)-10pcs)","Product Description Specification: Material: 304 Stainless Steel,100% New Rubber Color: Silver Shape: U Shape Quantity: 10 Pieces Note: Note: Since the size above is measured by hand, the size of the actual item you received could be slightly different from the size above. Product Description Specification: Material: 304 Stainless Steel,100% New Rubber Color: Silver Shape: U Shape Quantity: 10 Pieces Note: Note: Since the size above is measured by hand, the size of the actual item you receiv...",4.7,54,9.99,B07WP4RXHY,0.939683,0
1,product,AERZETIX - Maza/Maceta/Martillo Cuadrado Clásico - 1000g x 282mm - Cabeza de Acero - Mango Metálico - para Tallador/Escultor/Albañil de Piedra - Cantera/Cincel/Hormigón/Roca/Granito/Mármol - C46145,,5.0,2,28.99,B08VMRC5V6,0.917831,1
2,product,LOCOLO 46Pcs Mixed Women Make Up Lipstick Dress Shoes Purse Hat Charms for Women Girl Hot Fashion Charm Pendants for Jewelry Making,,4.7,27,4.93,B08BFVFL6T,0.915856,2
