In [1]:
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

  from pandas.core import (


In [2]:
import os
data_path = os.path.abspath('..')+'/data/'
pokedex_df = pd.read_csv(data_path+"pokedex_updated.csv").drop(columns=["Unnamed: 0"])
pokedex_df['ability_1'].fillna('None', inplace=True)
pokedex_df['ability_2'].fillna('None', inplace=True)
pd.set_option("display.max_colwidth", 100)

In [3]:
from superlinked.framework.common.schema.schema import schema
from superlinked.framework.common.schema.schema_object import (
    String,
    Float,
)
from superlinked.framework.common.schema.id_schema_object import IdField
from superlinked.framework.dsl.space.number_space import NumberSpace
from superlinked.framework.dsl.space.categorical_similarity_space import (
    CategoricalSimilaritySpace,
)
from superlinked.framework.dsl.source.in_memory_source import InMemorySource
from superlinked.framework.common.parser.dataframe_parser import DataFrameParser
from superlinked.framework.common.embedding.number_embedding import Mode
from superlinked.framework.dsl.index.index import Index
from superlinked.framework.dsl.query.param import Param
from superlinked.framework.dsl.query.query import Query
from superlinked.framework.dsl.executor.in_memory.in_memory_executor import (
    InMemoryExecutor,
    InMemoryApp,
)

Let us start with basic schema. We want to find a pokemon based on color, habitat, and type.

In [None]:
@schema
class PokeSchema:
    color: String
    habitat: String
    poke_type: String
    id: IdField

pokemon = PokeSchema()

In [4]:
categories = pokedex_df["color"].unique()
print(categories)
habitats = pokedex_df["habitat"].unique()
print(habitats)
poke_types = pokedex_df["poke_type"].unique()
print(poke_types)

['green' 'red' 'blue' 'white' 'brown' 'yellow' 'purple' 'pink' 'gray'
 'black']
['grassland' 'mountain' 'waters-edge' 'forest' 'rough-terrain' 'cave'
 'urban' 'sea' 'rare']
['grass' 'fire' 'water' 'bug' 'normal' 'poison' 'electric' 'ground'
 'fairy' 'fighting' 'psychic' 'rock' 'ghost' 'ice' 'dragon' 'dark' 'steel'
 'flying']


All fields of our schema are categorical, so we will use categorical similarity space. It is based on lists we have created above.

In [None]:
color_space = CategoricalSimilaritySpace(
    category_input=pokemon.color, categories=categories
)
habitat_space = CategoricalSimilaritySpace(
    category_input=pokemon.habitat, categories=habitats
)
type_space = CategoricalSimilaritySpace(
    category_input=pokemon.poke_type, categories=poke_types
)


In [None]:
poke_index = Index(
    spaces=[
        color_space,
        habitat_space,
        type_space,
    ]
)

Then we need to create an in-memory source and executor to try out your configuration.

In [None]:
df_parser = DataFrameParser(schema=pokemon)
source: InMemorySource = InMemorySource(pokemon, parser=df_parser)
executor: InMemoryExecutor = InMemoryExecutor(
    sources=[source],
    indices=[poke_index],
)
app: InMemoryApp = executor.run()
source.put([pokedex_df])

get_results will get us slice of dataset based on our query.

In [35]:
def get_results(result):
    ids = [int(entry.stored_object['id']) for entry in result.entries]
    res = pokedex_df[pokedex_df["id"].isin(ids)]
    return res

With this simple query, we are searching for pokemons in all the fields we have.

In [None]:
query = (
    Query(
        poke_index,
        weights={
            color_space: Param("color_weight"),
            habitat_space: Param("habitat_weight"),
            type_space: Param("type_weight"),
        },
    )
    .find(pokemon)
    .similar(color_space.category, Param("color"))  
    .similar(habitat_space.category, Param("habitat")) 
    .similar(type_space.category, Param("type")) 
    .limit(Param("limit"))
)

Say, we want to catch Pikachu. Let look for Pikachu in our pokedex. (His id is 25)

In [None]:
pokedex_df.iloc[24]

So he is an electric living in a forest yellow pokemon. So we put the parameters in our query to get it

In [None]:
query_params = {
    "color_weight": 1,
    "habitat_weight": 0,
    "type_weight": 1,
    "color": "yellow",
    "habitat": "grassland",
    "type": 'electric',
}

result = app.query(query, limit=5, **query_params)
get_results(result)

Ok. We want to have all *chu pokemons. I assume they are all electric, live in a forest and all have at least static ability. So we know that they are from one "family" let us try to get them with similarity one to the other.

In [29]:
@schema
class PokeSchema:
    color: String
    habitat: String
    poke_type: String
    ability_0: String
    id: IdField

pokemon = PokeSchema()

In [30]:
abilities = pokedex_df['ability_0'].unique()

In [31]:
color_space = CategoricalSimilaritySpace(
    category_input=pokemon.color, categories=categories
)
habitat_space = CategoricalSimilaritySpace(
    category_input=pokemon.habitat, categories=habitats
)
type_space = CategoricalSimilaritySpace(
    category_input=pokemon.poke_type, categories=poke_types
)
ability_space = CategoricalSimilaritySpace(
    category_input=pokemon.ability_0, categories=abilities
)

In [32]:
poke_index = Index(
    spaces=[
        color_space,
        habitat_space,
        type_space,
        ability_space,
    ]
)

In [46]:
df_parser = DataFrameParser(schema=pokemon)
source: InMemorySource = InMemorySource(pokemon, parser=df_parser)
executor: InMemoryExecutor = InMemoryExecutor(
    sources=[source],
    indices=[poke_index],
)
app: InMemoryApp = executor.run()
source.put([pokedex_df])

In [44]:
query = Query(poke_index).find(pokemon).with_vector(pokemon, id_param="25")
result = app.query(query)
get_results(result)

Unnamed: 0,id,name,color,habitat,shape,poke_type,capture_chance,height,weight,ability_0,ability_1,ability_2,sprite
0,1,bulbasaur,green,grassland,quadruped,grass,0.18,7,69,overgrow,chlorophyll,,https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/1.png
1,2,ivysaur,green,grassland,quadruped,grass,0.18,10,130,overgrow,chlorophyll,,https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/2.png
2,3,venusaur,green,grassland,quadruped,grass,0.18,20,1000,overgrow,chlorophyll,,https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/3.png
3,4,charmander,red,mountain,upright,fire,0.18,6,85,blaze,solar-power,,https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/4.png
4,5,charmeleon,red,mountain,upright,fire,0.18,11,190,blaze,solar-power,,https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/5.png
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1020,1021,raging-bolt,yellow,grassland,humanoid,electric,0.04,52,4800,protosynthesis,,,https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/1021.png
1021,1022,iron-boulder,gray,mountain,humanoid,rock,0.04,15,1625,quark-drive,,,https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/1022.png
1022,1023,iron-crown,blue,rough-terrain,quadruped,steel,0.04,16,1560,quark-drive,,,https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/1023.png
1023,1024,terapagos,blue,grassland,quadruped,normal,1.00,2,65,tera-shift,,,https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/1024.png
