In this notebook we are going to get a better understanding of simple queries using Superlinked

In [44]:
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

In [86]:
import os
data_path = os.path.abspath('..')+'/data/'
pokedex_df = pd.read_csv(data_path+"pokedex.csv").drop(columns=["Unnamed: 0"])
pokedex_df['ability_1'].fillna('None', inplace=True)
pokedex_df['ability_2'].fillna('None', inplace=True)
pd.set_option("display.max_colwidth", 100)

In [46]:
from superlinked.framework.common.schema.schema import schema
from superlinked.framework.common.schema.schema_object import (
    String,Float
)
from superlinked.framework.common.schema.id_schema_object import IdField
from superlinked.framework.dsl.space.categorical_similarity_space import (
    CategoricalSimilaritySpace,
)
from superlinked.framework.dsl.space.text_similarity_space import TextSimilaritySpace
from superlinked.framework.dsl.space.number_space import NumberSpace, Mode
from superlinked.framework.dsl.source.in_memory_source import InMemorySource
from superlinked.framework.common.parser.dataframe_parser import DataFrameParser
from superlinked.framework.dsl.index.index import Index
from superlinked.framework.dsl.query.param import Param
from superlinked.framework.dsl.query.query import Query
from superlinked.framework.dsl.executor.in_memory.in_memory_executor import (
    InMemoryExecutor,
    InMemoryApp,
)

In [84]:
pokedex_df

Unnamed: 0,name,color,habitat,shape,poke_type,capture_chance,height,weight,ability_0,ability_1,ability_2,sprite,description
0,bulbasaur,green,grassland,quadruped,grass,0.18,7,69,overgrow,chlorophyll,,https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/1.png,green quadruped bulbasaur who is of grass type and lives in grassland.
1,ivysaur,green,grassland,quadruped,grass,0.18,10,130,overgrow,chlorophyll,,https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/2.png,green quadruped ivysaur who is of grass type and lives in grassland.
2,venusaur,green,grassland,quadruped,grass,0.18,20,1000,overgrow,chlorophyll,,https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/3.png,green quadruped venusaur who is of grass type and lives in grassland.
3,charmander,red,mountain,upright,fire,0.18,6,85,blaze,solar-power,,https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/4.png,red upright charmander who is of fire type and lives in mountain.
4,charmeleon,red,mountain,upright,fire,0.18,11,190,blaze,solar-power,,https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/5.png,red upright charmeleon who is of fire type and lives in mountain.
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1018,gouging-fire,brown,mountain,quadruped,fire,0.04,35,5900,protosynthesis,,,https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/1020.png,brown quadruped gouging-fire who is of fire type and lives in mountain.
1019,raging-bolt,yellow,grassland,humanoid,electric,0.04,52,4800,protosynthesis,,,https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/1021.png,yellow humanoid raging-bolt who is of electric type and lives in grassland.
1020,iron-boulder,gray,mountain,humanoid,rock,0.04,15,1625,quark-drive,,,https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/1022.png,gray humanoid iron-boulder who is of rock type and lives in mountain.
1021,iron-crown,blue,rough-terrain,quadruped,steel,0.04,16,1560,quark-drive,,,https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/1023.png,blue quadruped iron-crown who is of steel type and lives in rough-terrain.


Let us start with basic schema. We want to find a pokemon based on color, habitat, and type.

In [48]:
@schema
class PokeSchema:
    color: String
    habitat: String
    poke_type: String
    id: IdField

pokemon = PokeSchema()

In [49]:
categories = pokedex_df["color"].unique()
print(categories)
habitats = pokedex_df["habitat"].unique()
print(habitats)
poke_types = pokedex_df["poke_type"].unique()
print(poke_types)

['green' 'red' 'blue' 'white' 'brown' 'yellow' 'purple' 'pink' 'gray'
 'black']
['grassland' 'mountain' 'waters-edge' 'forest' 'rough-terrain' 'cave'
 'urban' 'sea' 'rare']
['grass' 'fire' 'water' 'bug' 'normal' 'poison' 'electric' 'ground'
 'fairy' 'fighting' 'psychic' 'rock' 'ghost' 'ice' 'dragon' 'dark' 'steel'
 'flying']


All fields of our schema are categorical, so we will use categorical similarity space. It is based on

 lists we have created above.

In [67]:
"grassland" in habitats

True

In [50]:
color_space = CategoricalSimilaritySpace(
    category_input=pokemon.color, categories=categories
)
habitat_space = CategoricalSimilaritySpace(
    category_input=pokemon.habitat, categories=habitats
)
poke_type_space = CategoricalSimilaritySpace(
    category_input=pokemon.poke_type, categories=poke_types
)


In [51]:
poke_index = Index(
    spaces=[
        color_space,
        habitat_space,
        poke_type_space,
    ]
)

Then we need to create an in-memory source and executor to try out your configuration.

In [52]:
df_parser = DataFrameParser(schema=pokemon)
source: InMemorySource = InMemorySource(pokemon, parser=df_parser)
executor: InMemoryExecutor = InMemoryExecutor(
    sources=[source],
    indices=[poke_index],
)
app: InMemoryApp = executor.run()
source.put([pokedex_df])

get_results will get us slice of dataset based on our query.

In [70]:
pokedex_df.habitat.value_counts()

habitat
forest           222
grassland        191
mountain         157
urban            115
sea               82
waters-edge       81
cave              76
rough-terrain     65
rare              34
Name: count, dtype: int64

In [53]:
def get_results(result):
    ids = [int(entry.stored_object['id']) for entry in result.entries]
    res = pokedex_df[pokedex_df["id"].isin(ids)]
    return res

With this simple query, we are searching for pokemons in all the fields we have.

In [54]:
query = (
    Query(
        poke_index,
        weights={
            color_space: Param("color_weight"),
            habitat_space: Param("habitat_weight"),
            poke_type_space: Param("poke_type_weight"),
        },
    )
    .find(pokemon)
    .similar(color_space.category, Param("color"))  
    .similar(habitat_space.category, Param("habitat")) 
    .similar(poke_type_space.category, Param("poke_type")) 
    .limit(Param("limit"))
)

Say, we want to catch Pikachu. Let look for Pikachu in our pokedex. (His id is 25)

In [55]:
pokedex_df.iloc[24]

id                                                                                             25
name                                                                                      pikachu
color                                                                                      yellow
habitat                                                                                    forest
shape                                                                                   quadruped
poke_type                                                                                electric
capture_chance                                                                               0.75
height                                                                                          4
weight                                                                                         60
ability_0                                                                                  static
ability_1           

So he is an electric living in a forest yellow pokemon. So we put the parameters in our query to get it.

In [68]:
query_params = {
    "color_weight": 0,
    "habitat_weight": 1000000,
    "poke_type_weight": 0,
    "color": "yellow",
    "habitat": "grassland",
    "poke_type": 'electric',
}

result = app.query(query, limit=10, **query_params)
result.to_pandas()

Unnamed: 0,color,habitat,poke_type,ability_0,id,similarity_score
0,yellow,forest,electric,static,25,1.0
1,yellow,forest,electric,static,26,1.0
2,yellow,forest,electric,static,172,1.0
3,yellow,grassland,electric,static,125,0.75
4,yellow,grassland,electric,static,181,0.75


Ok. We want to have all *chu pokemons. I assume they are all electric, live in a forest and all have at least static ability. So we know that they are from one "family" let us try to get them with similarity one to the other.

In [57]:
@schema
class PokeSchema:
    color: String
    habitat: String
    poke_type: String
    ability_0: String
    id: IdField

pokemon = PokeSchema()

In [58]:
abilities = pokedex_df['ability_0'].unique()

In [59]:
color_space = CategoricalSimilaritySpace(
    category_input=pokemon.color, categories=categories
)
habitat_space = CategoricalSimilaritySpace(
    category_input=pokemon.habitat, categories=habitats
)
type_space = CategoricalSimilaritySpace(
    category_input=pokemon.poke_type, categories=poke_types
)
ability_space = CategoricalSimilaritySpace(
    category_input=pokemon.ability_0, categories=abilities
)

In [60]:
poke_index = Index(
    spaces=[
        color_space,
        habitat_space,
        type_space,
        ability_space,
    ]
)

In [61]:
df_parser = DataFrameParser(schema=pokemon)
source: InMemorySource = InMemorySource(pokemon, parser=df_parser)
executor: InMemoryExecutor = InMemoryExecutor(
    sources=[source],
    indices=[poke_index],
)
app: InMemoryApp = executor.run()
source.put([pokedex_df])

In [62]:
query = Query(poke_index).find(pokemon).with_vector(pokemon, id_param="25").limit(5)
result = app.query(query, limit=5)
# get_results(result)
result.to_pandas()

Unnamed: 0,color,habitat,poke_type,ability_0,id,similarity_score
0,yellow,forest,electric,static,25,1.0
1,yellow,forest,electric,static,26,1.0
2,yellow,forest,electric,static,172,1.0
3,yellow,grassland,electric,static,125,0.75
4,yellow,grassland,electric,static,181,0.75


In [63]:
get_results(result)

Unnamed: 0,id,name,color,habitat,shape,poke_type,capture_chance,height,weight,ability_0,ability_1,ability_2,sprite,description
24,25,pikachu,yellow,forest,quadruped,electric,0.75,4,60,static,lightning-rod,,https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/25.png,yellow quadruped pikachu who is of electric type and lives in forest.
25,26,raichu,yellow,forest,upright,electric,0.29,8,300,static,lightning-rod,,https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/26.png,yellow upright raichu who is of electric type and lives in forest.
124,125,electabuzz,yellow,grassland,upright,electric,0.18,11,300,static,vital-spirit,,https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/125.png,yellow upright electabuzz who is of electric type and lives in grassland.
171,172,pichu,yellow,forest,quadruped,electric,0.75,3,20,static,lightning-rod,,https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/172.png,yellow quadruped pichu who is of electric type and lives in forest.
180,181,ampharos,yellow,grassland,upright,electric,0.18,14,615,static,plus,,https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/181.png,yellow upright ampharos who is of electric type and lives in grassland.
