In [2]:
import pandas as pd
import superlinked
import asyncio
import boto3
from botocore.exceptions import  ClientError
import instructor
from openai import OpenAI, AsyncOpenAI
import json
from superlinked import framework as sl

In [53]:
#load food_db
food_df = pd.read_parquet('../data/sr_legacy_food_db.parquet')
df = food_df.sample(4000)
categories = food_df.food_category.drop_duplicates().to_list()



In [106]:
nutrients_cols = ['Energy', 'Protein', 'Carbohydrate, by difference','Total lipid (fat)' ]

In [133]:
# Define schema
class FoodItem(sl.Schema):
    fdc_id : sl.IdField
    description : sl.String
    food_category : sl.String
    Energy : sl.Float
    Protein : sl.Float

# Creat
food_item = FoodItem()
description_space = sl.TextSimilaritySpace(text=food_item.description, model="all-MiniLM-L6-v2")
food_category_text_space = sl.TextSimilaritySpace(text=food_item.food_category, model="all-MiniLM-L6-v2")
food_category_categorical_space = sl.CategoricalSimilaritySpace(category_input=food_item.food_category, categories=categories)

energy_space = sl.NumberSpace(food_item.Energy, min_value= 0, max_value = 1000, mode = sl.Mode.SIMILAR)
index = sl.Index([description_space, food_category_text_space, food_category_categorical_space, energy_space])

In [134]:



# Construct a query to find similar questions in the FAQ
query = (
    sl.Query(index)
    .find(food_item)
    .similar(description_space, sl.Param("query_text"))
    .select_all()
)

weighted_query = (
    sl.Query(
        index,
        weights={
            description_space: sl.Param("desc_weight"),
            food_category_text_space: sl.Param("cat_weight")
        }
    )
    .find(food_item)
    .similar(description_space, sl.Param("query_text"))
    .similar(food_category_text_space, sl.Param("query_text"))
    .select_all()
)

categorical_query = (
    sl.Query(index)
    .find(food_item)
    .similar(food_category_categorical_space.category, sl.Param("query_categories"))
    .select_all()
)

number_query = (
    sl.Query(index)
    .find(food_item)
    .similar(energy_space, sl.Param("energy intake per 100g"))
    .select_all()
)

# Create an in-memory data source 
source = sl.InMemorySource(food_item)

# Set up an in-memory executor with the source and index
executor = sl.InMemoryExecutor(sources=[source], indices=[index])

# Run the application using the executor
app = executor.run()






In [135]:
# Insert the sampled data into the source
source.put(
    df[["fdc_id", "description", "food_category", "Energy"]].to_dict(orient="records")
)


Categorial query

In [136]:

category_query = categories[3]
result = app.query(
    categorical_query,
    query_categories=['Fruits and Fruit Juices']
)

result = sl.PandasConverter.to_pandas(result)
food_category_result = result[result.similarity_score ==1]
food_category_result.head(10)

Unnamed: 0,description,food_category,Energy,id,similarity_score
0,"Guavas, strawberry, raw",Fruits and Fruit Juices,69.0,173045,1.0
1,"Pineapple, canned, juice pack, drained",Fruits and Fruit Juices,60.0,167767,1.0
2,"Plums, canned, purple, light syrup pack, solid...",Fruits and Fruit Juices,63.0,169952,1.0
3,"Pomegranates, raw",Fruits and Fruit Juices,83.0,169134,1.0
4,"Nance, frozen, unsweetened",Fruits and Fruit Juices,73.0,167789,1.0
5,"Carissa, (natal-plum), raw",Fruits and Fruit Juices,62.0,173952,1.0
6,"Strawberries, canned, heavy syrup pack, solids...",Fruits and Fruit Juices,92.0,168172,1.0
7,"Plums, canned, heavy syrup, drained",Fruits and Fruit Juices,89.0,168183,1.0
8,"Fruit juice smoothie, ODWALLA, strawberry banana",Fruits and Fruit Juices,48.0,168213,1.0
9,"Plums, dried (prunes), stewed, with added sugar",Fruits and Fruit Juices,124.0,167752,1.0


Weighted queries

In [91]:

result = app.query(
    weighted_query,
    query_text="apple pie",
    desc_weight=2,
    cat_weight= 0
)

sl.PandasConverter.to_pandas(result)


Unnamed: 0,description,food_category,id,similarity_score
0,"Pie, apple, prepared from recipe",Baked Products,175012,0.796199
1,"Pie, peach",Baked Products,175020,0.773308
2,"Pie, fried pies, fruit",Baked Products,172784,0.759287
3,"Pie, Dutch Apple, Commercially Prepared",Baked Products,167522,0.683891
4,"Pie, apple, commercially prepared, enriched flour",Baked Products,175011,0.669173
...,...,...,...,...
3995,"Infant formula, MEAD JOHNSON, Enfamil for Supp...",Baby Foods,172291,0.021923
3996,"Toddler formula, MEAD JOHNSON, Nutramigen Todd...",Baby Foods,173526,0.005264
3997,"Chrysanthemum leaves, raw",Vegetables and Vegetable Products,168491,-0.000790
3998,"Infant formula, MEAD JOHNSON, Enfamil Enspire ...",Baby Foods,173523,-0.009503


In [None]:
result = app.query(
    weighted_query,
    query_text="apple pie",
    desc_weight=2,
    cat_weight= 0
)

sl.PandasConverter.to_pandas(result)

Numeric querying

In [148]:
numeric_query = (
    sl.Query(index,
               weights={
            description_space: sl.Param("desc_weight"),
            energy_space: sl.Param("energy_weight")
        }
    )
    .find(food_item)
    .similar(description_space, sl.Param("query_text"))
    .similar(energy_space, sl.Param("energy_intake_per_100g"))
    .select_all()
)

result = app.query(
    numeric_query,
    query_text="apple",
    energy_intake_per_100g= 50,
    desc_weight= 10000,
    energy_weight= 1
)

sl.PandasConverter.to_pandas(result).head(30)




Unnamed: 0,description,food_category,Energy,id,similarity_score
0,"Apples, raw, without skin",Fruits and Fruit Juices,48.0,171689,0.337793
1,"Rose-apples, raw",Fruits and Fruit Juices,25.0,168171,0.333606
2,"Babyfood, juice, orange and apple",Baby Foods,43.0,171355,0.333096
3,"APPLEBEE'S, coleslaw",Restaurant Foods,120.0,169020,0.330702
4,"Babyfood, fruit, apple and blueberry, junior",Baby Foods,62.0,170958,0.326676
5,"Babyfood, juice, apple and peach",Baby Foods,43.0,171352,0.326523
6,"Custard-apple, (bullock's-heart), raw",Fruits and Fruit Juices,101.0,171725,0.322348
7,"Apples, raw, fuji, with skin (Includes foods f...",Fruits and Fruit Juices,63.0,167793,0.315588
8,"Apples, raw, without skin, cooked, boiled",Fruits and Fruit Juices,53.0,173928,0.315452
9,"McDONALD'S, BIG MAC (without Big Mac Sauce)",Fast Foods,234.0,172067,0.312221
