In [1]:
import pandas as pd
pd.options.display.max_columns = None
pd.options.display.max_colwidth = 100

from neo4j import GraphDatabase

# Connect to Neo4j
uri = "bolt://localhost:7687"
driver = GraphDatabase.driver(uri, auth=("neo4j", "h4ck3r20o8"))

In [2]:
pokemon = pd.read_parquet("../data/pokemonDB_dataset.parquet")

In [3]:
def clear_neo4j_database(driver):
    with driver.session() as session:
        session.run("MATCH (n) DETACH DELETE n")

# Clear the Neo4j database
clear_neo4j_database(driver)

In [4]:
pokemon.head()

Unnamed: 0,Pokemon,Type,Species,Height,Weight,Abilities,EV Yield,Catch Rate,Base Friendship,Base Exp,Growth Rate,Egg Groups,Gender,Egg Cycles,HP Base,HP Min,HP Max,Attack Base,Attack Min,Attack Max,Defense Base,Defense Min,Defense Max,Special Attack Base,Special Attack Min,Special Attack Max,Special Defense Base,Special Defense Min,Special Defense Max,Speed Base,Speed Min,Speed Max,Description,vectorProperty
0,Abomasnow,"Grass, Ice",Frost Tree Pokémon,2.2 m (7′03″),135.5 kg (298.7 lbs),"1. Snow Warning, Soundproof (hidden ability)","1 Attack, 1 Sp. Atk","60 (7.8% with PokéBall, full HP)",50 (normal),173,Slow,"Grass, Monster","50% male, 50% female","20 (4,884–5,140 steps)",90,290,384,92,170,311,75,139,273,92,170,311,85,157,295,60,112,240,Abomasnow!,"[0.038857594, -0.058271974, -0.007995857, 0.0019075231, -0.025210328, -0.0021116694, -0.02446174..."
1,Mega Abomasnow,"Grass, Ice",Frost Tree Pokémon,2.7 m (8′10″),185.0 kg (407.9 lbs),1. Snow Warning,"1 Attack, 1 Sp. Atk","60 (7.8% with PokéBall, full HP)",50 (normal),208,Slow,"Grass, Monster","50% male, 50% female","20 (4,884–5,140 steps)",90,290,384,132,242,399,105,193,339,132,242,399,105,193,339,30,58,174,"Mega Abomasnow is a powerful, Snow/Ice-type Pokémon that was initially unavailable in the mainli...","[-0.07787469, -0.015716149, -0.059320956, 0.01225309, 0.024122389, 0.019718071, -0.0056506824, -..."
2,Abra,Psychic,Psi Pokémon,0.9 m (2′11″),19.5 kg (43.0 lbs),"1. Synchronize, 2. Inner Focus, Magic Guard (hidden ability)",1 Sp. Atk,"200 (26.1% with PokéBall, full HP)",50 (normal),62,Medium Slow,Human-Like,"75% male, 25% female","20 (4,884–5,140 steps)",25,160,254,20,40,152,15,31,141,105,193,339,55,103,229,90,166,306,"Abra is the eighth-generation Pokémon introduced in the 2020 Nintendo Switch game, Pokémon Legen...","[-0.0466162, 0.00047304656, -0.038456887, 0.021807672, 0.01696757, 0.028532835, -0.035014164, -0..."
3,Absol,Dark,Disaster Pokémon,1.2 m (3′11″),47.0 kg (103.6 lbs),"1. Pressure, 2. Super Luck, Justified (hidden ability)",2 Attack,"30 (3.9% with PokéBall, full HP)",35 (lower than normal),163,Medium Slow,Field,"50% male, 50% female","25 (6,169–6,425 steps)",65,240,334,130,238,394,60,112,240,75,139,273,60,112,240,75,139,273,Here is a brief summary about Absol:\n\nAbsol is a Pokémon species that first appears in the thi...,"[-0.025951244, -0.017945759, 0.006587942, 0.023525393, 0.010941006, 0.004109759, -0.012078777, -..."
4,Mega Absol,Dark,Disaster Pokémon,1.2 m (3′11″),49.0 kg (108.0 lbs),1. Magic Bounce,2 Attack,"30 (3.9% with PokéBall, full HP)",35 (lower than normal),198,Medium Slow,Field,"50% male, 50% female","25 (6,169–6,425 steps)",65,240,334,150,274,438,60,112,240,115,211,361,60,112,240,115,211,361,Mega Absol is a fictional Pokémon species that was introduced in 2013 as part of the Pokémon fra...,"[-0.062107593, -0.008471994, -0.009498222, 0.022504155, -0.013371752, 0.03767074, 0.0037788497, ..."


### Pokemon

In [5]:
def insert_pokemon_data(tx, row):
    
    query = """
    MERGE (p:Pokemon {name: $pokemon})
    SET p.height = $height,
        p.weight = $weight,
        p.ev_yield = $ev_yield,
        p.catch_rate = $catch_rate,
        p.base_friendship = $base_friendship,
        p.base_exp = $base_exp,
        p.growth_rate = $growth_rate,
        p.egg_cycles = $egg_cycles,
        p.hp_base = $hp_base,
        p.hp_min = $hp_min,
        p.hp_max = $hp_max,
        p.attack_base = $attack_base,
        p.attack_min = $attack_min,
        p.attack_max = $attack_max,
        p.defense_base = $defense_base,
        p.defense_min = $defense_min,
        p.defense_max = $defense_max,
        p.special_attack_base = $special_attack_base,
        p.special_attack_min = $special_attack_min,
        p.special_attack_max = $special_attack_max,
        p.special_defense_base = $special_defense_base,
        p.special_defense_min = $special_defense_min,
        p.special_defense_max = $special_defense_max,
        p.speed_min = $speed_min,
        p.speed_max = $speed_max,
        p.description = $description,
        p.vector_property=$vector_property
    """
    tx.run(query, 
           pokemon=row['Pokemon'], 
           height=row['Height'], 
           weight=row['Weight'], 
           ev_yield=row['EV Yield'], 
           catch_rate=row['Catch Rate'], 
           base_friendship=row['Base Friendship'], 
           base_exp=row['Base Exp'], 
           growth_rate=row['Growth Rate'], 
           egg_cycles=row['Egg Cycles'], 
           hp_base=row['HP Base'], 
           hp_min=row['HP Min'], 
           hp_max=row['HP Max'], 
           attack_base=row['Attack Base'], 
           attack_min=row['Attack Min'], 
           attack_max=row['Attack Max'], 
           defense_base=row['Defense Base'], 
           defense_min=row['Defense Min'], 
           defense_max=row['Defense Max'], 

           special_attack_base=row['Special Attack Base'], 
           special_attack_min=row['Special Attack Min'],
           special_attack_max=row['Special Attack Max'],
           special_defense_base=row['Special Defense Base'],
           special_defense_min=row['Special Defense Min'],
           special_defense_max=row['Special Defense Max'],

           speed_min=row['Speed Min'], 
           speed_max=row['Speed Max'],
           description=row['Description'],
           vector_property=row['vectorProperty']
           )

# Insert data into Neo4j
with driver.session() as session:
    for index, row in pokemon.iterrows():
        session.execute_write(insert_pokemon_data, row)


### Pokemon -> Egg Group


In [6]:
pokemon_egg_groups = pokemon['Egg Groups'].str.split(', ', expand=True).stack().reset_index(level=1, drop=True)
pokemon_expanded_egg_groups = pokemon.drop('Egg Groups', axis=1).join(pokemon_egg_groups.rename('Egg Group'))

def insert_egg_group_data(tx, pokemon, egg_group):
    query = """
    MATCH (p:Pokemon {name: $pokemon})
    MERGE (e:EggGroup {name: $egg_group})
    MERGE (p)-[:HAS_GENETIC_TIE]->(e)
    """
    tx.run(query, pokemon=pokemon, egg_group=egg_group)

# Insert egg group data into Neo4j
with driver.session() as session:
    for index, row in pokemon_expanded_egg_groups.iterrows():
        session.execute_write(insert_egg_group_data, row['Pokemon'], row['Egg Group'])

### Pokemon -> Gender

In [7]:
pokemon_genders = pokemon['Gender'].str.split(', ', expand=True).stack().reset_index(level=1, drop=True)
pokemon_expanded_genders = pokemon.drop('Gender', axis=1).join(pokemon_genders.rename('Gender'))

def insert_gender_data(tx, pokemon, gender):
    query = """
    MATCH (p:Pokemon {name: $pokemon})
    MERGE (g:Gender {name: $gender})
    MERGE (p)-[:HAS_GENDER]->(g)
    """
    tx.run(query, pokemon=pokemon, gender=gender)

# Insert gender data into Neo4j
with driver.session() as session:
    for index, row in pokemon_expanded_genders.iterrows():
        session.execute_write(insert_gender_data, row['Pokemon'], row['Gender'])


### Pokemon -> Abilities

In [8]:
pokemon_abilities = pokemon['Abilities'].str.split(', ', expand=True).stack().reset_index(level=1, drop=True)
pokemon_abilities = pokemon_abilities.str.replace(r'^\d+\.\s*', '', regex=True)
pokemon_expanded_abilities = pokemon.drop('Abilities', axis=1).join(pokemon_abilities.rename('Ability'))

def insert_ability_data(tx, pokemon, ability):
    query = """
    MATCH (p:Pokemon {name: $pokemon})
    MERGE (a:Ability {name: $ability})
    MERGE (p)-[:HAS_ABILITY]->(a)
    """
    tx.run(query, pokemon=pokemon, ability=ability)

# Insert ability data into Neo4j
with driver.session() as session:
    for index, row in pokemon_expanded_abilities.filter(['Pokemon', 'Ability']).iterrows():
        session.execute_write(insert_ability_data, row['Pokemon'], row['Ability'])

### Pokemon -> Species

In [9]:
pokemon['Species'] = pokemon['Species'].str.replace(' Pokémon', '')
species_df = pokemon.filter(['Species']).drop_duplicates()

# Define a function to insert species data into Neo4j
def insert_species_data(tx, species):
    query = """
    MERGE (s:Species {name: $species})
    """
    tx.run(query, species=species)

# Insert species data into Neo4j
with driver.session() as session:
    for index, row in species_df.iterrows():
        session.execute_write(insert_species_data, row['Species'])

### Species -> Type

In [10]:
pokemon_types = pokemon['Type'].str.split(', ', expand=True).stack().reset_index(level=1, drop=True)
pokemon_expanded = pokemon.drop('Type', axis=1).join(pokemon_types.rename('Type'))

def insert_species_type_data(tx, species, pokemon):
    query = """
    MATCH (s:Species {name: $species})
    MERGE (p:Pokemon {name: $pokemon})
    MERGE (p)-[:BELONGS_TO]->(s)
    """
    tx.run(query, species=species, pokemon=pokemon)

def insert_pokemon_type_data(tx, pokemon, type_):
    query = """
    MATCH (p:Pokemon {name: $pokemon})
    MERGE (t:Type {name: $type_})
    MERGE (p)-[:HAS_TYPE]->(t)
    """
    tx.run(query, pokemon=pokemon, type_=type_)

# Insert species, type, and pokemon-type data into Neo4j
with driver.session() as session:
    for index, row in pokemon_expanded.filter(['Species', 'Type', 'Pokemon']).iterrows():
        session.execute_write(insert_species_type_data, row['Species'], row['Pokemon'])
        session.execute_write(insert_pokemon_type_data, row['Pokemon'], row['Type'])


In [11]:
# Query the type and species of Pikachu from Neo4j
def query_pikachu_type_and_species(tx):
    query = """
    MATCH (p:Pokemon {name: 'Pikachu'})-[:HAS_TYPE]->(t:Type),
          (p)-[:BELONGS_TO]->(s:Species)
    RETURN t.name AS Type, s.name AS Species
    """
    result = tx.run(query)
    return result.single()

# Execute the query and print the result
with driver.session() as session:
    pikachu_data = session.execute_read(query_pikachu_type_and_species)
    if pikachu_data:
        print(f"Pikachu Type: {pikachu_data['Type']}, Species: {pikachu_data['Species']}")
    else:
        print("Pikachu data not found.")

Pikachu Type: Electric, Species: Mouse


In [12]:
pokemon['PokemonFile'] = pokemon['Pokemon'].str.replace(' ', '_')
pokemon.head()

Unnamed: 0,Pokemon,Type,Species,Height,Weight,Abilities,EV Yield,Catch Rate,Base Friendship,Base Exp,Growth Rate,Egg Groups,Gender,Egg Cycles,HP Base,HP Min,HP Max,Attack Base,Attack Min,Attack Max,Defense Base,Defense Min,Defense Max,Special Attack Base,Special Attack Min,Special Attack Max,Special Defense Base,Special Defense Min,Special Defense Max,Speed Base,Speed Min,Speed Max,Description,vectorProperty,PokemonFile
0,Abomasnow,"Grass, Ice",Frost Tree,2.2 m (7′03″),135.5 kg (298.7 lbs),"1. Snow Warning, Soundproof (hidden ability)","1 Attack, 1 Sp. Atk","60 (7.8% with PokéBall, full HP)",50 (normal),173,Slow,"Grass, Monster","50% male, 50% female","20 (4,884–5,140 steps)",90,290,384,92,170,311,75,139,273,92,170,311,85,157,295,60,112,240,Abomasnow!,"[0.038857594, -0.058271974, -0.007995857, 0.0019075231, -0.025210328, -0.0021116694, -0.02446174...",Abomasnow
1,Mega Abomasnow,"Grass, Ice",Frost Tree,2.7 m (8′10″),185.0 kg (407.9 lbs),1. Snow Warning,"1 Attack, 1 Sp. Atk","60 (7.8% with PokéBall, full HP)",50 (normal),208,Slow,"Grass, Monster","50% male, 50% female","20 (4,884–5,140 steps)",90,290,384,132,242,399,105,193,339,132,242,399,105,193,339,30,58,174,"Mega Abomasnow is a powerful, Snow/Ice-type Pokémon that was initially unavailable in the mainli...","[-0.07787469, -0.015716149, -0.059320956, 0.01225309, 0.024122389, 0.019718071, -0.0056506824, -...",Mega_Abomasnow
2,Abra,Psychic,Psi,0.9 m (2′11″),19.5 kg (43.0 lbs),"1. Synchronize, 2. Inner Focus, Magic Guard (hidden ability)",1 Sp. Atk,"200 (26.1% with PokéBall, full HP)",50 (normal),62,Medium Slow,Human-Like,"75% male, 25% female","20 (4,884–5,140 steps)",25,160,254,20,40,152,15,31,141,105,193,339,55,103,229,90,166,306,"Abra is the eighth-generation Pokémon introduced in the 2020 Nintendo Switch game, Pokémon Legen...","[-0.0466162, 0.00047304656, -0.038456887, 0.021807672, 0.01696757, 0.028532835, -0.035014164, -0...",Abra
3,Absol,Dark,Disaster,1.2 m (3′11″),47.0 kg (103.6 lbs),"1. Pressure, 2. Super Luck, Justified (hidden ability)",2 Attack,"30 (3.9% with PokéBall, full HP)",35 (lower than normal),163,Medium Slow,Field,"50% male, 50% female","25 (6,169–6,425 steps)",65,240,334,130,238,394,60,112,240,75,139,273,60,112,240,75,139,273,Here is a brief summary about Absol:\n\nAbsol is a Pokémon species that first appears in the thi...,"[-0.025951244, -0.017945759, 0.006587942, 0.023525393, 0.010941006, 0.004109759, -0.012078777, -...",Absol
4,Mega Absol,Dark,Disaster,1.2 m (3′11″),49.0 kg (108.0 lbs),1. Magic Bounce,2 Attack,"30 (3.9% with PokéBall, full HP)",35 (lower than normal),198,Medium Slow,Field,"50% male, 50% female","25 (6,169–6,425 steps)",65,240,334,150,274,438,60,112,240,115,211,361,60,112,240,115,211,361,Mega Absol is a fictional Pokémon species that was introduced in 2013 as part of the Pokémon fra...,"[-0.062107593, -0.008471994, -0.009498222, 0.022504155, -0.013371752, 0.03767074, 0.0037788497, ...",Mega_Absol


### IMAGE EMBEDDINGS

In [13]:
import torch
import torchvision.models as models

# Load pretrained ResNet152 model
model = models.resnet152(weights=True)

# Remove the final fully-connected layer
model = torch.nn.Sequential(*(list(model.children())[:-1]))

# Set the model to evaluation mode
model.eval()



Sequential(
  (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): ReLU(inplace=True)
  (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (4): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)


In [14]:
from torchvision import transforms
from PIL import Image
from urllib.parse import quote

# Define preprocessing steps
preprocess = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], 
                         std=[0.229, 0.224, 0.225])
])

# Define a function to extract embeddings for each Pokemon
def extract_embeddings_for_pokemon(pokemon_name):

    encoded_name = quote(pokemon_name)
    # INSERT_YOUR_CODE
    try:
        image_path = f"../data/pokemon_db/images/{pokemon_name}/{pokemon_name}_new.png"
        # Load and preprocess the image
        image = Image.open(image_path).convert("RGB")
        input_tensor = preprocess(image).unsqueeze(0)
        
        # Extract embeddings
        with torch.no_grad():
            embeddings = model(input_tensor)
        
        # Reshape embeddings to a 1D vector
        return embeddings.squeeze().numpy()
    except FileNotFoundError:
        print(f"Image not found for {pokemon_name}. Continuing to next.")
        return None

In [15]:

def insert_embeddings(tx, pokemon_name, embeddings):
    # Create a Cypher query to insert embeddings
    query = """
    MERGE (p:Pokemon {name: $pokemon_name})
    SET p.resnet152_embeddings = $embeddings
    """
    # Execute the query with the provided parameters
    tx.run(query, pokemon_name=pokemon_name, embeddings=embeddings.tolist())

for index, row in pokemon.iterrows():
    pokemon_name = row['Pokemon']
    pokemon_file = row['PokemonFile']

    embeddings = extract_embeddings_for_pokemon(pokemon_file)
    
    if embeddings is not None:
        with driver.session() as session:
            session.execute_write(insert_embeddings, pokemon_name, embeddings)

Image not found for Castform_Sunny_Form. Continuing to next.
Image not found for Castform_Rainy_Form. Continuing to next.
Image not found for Castform_Snowy_Form. Continuing to next.
Image not found for Dudunsparce_Three-Segment_Form. Continuing to next.
Image not found for Partner_Eevee. Continuing to next.
Image not found for Eternatus_Eternamax. Continuing to next.
Image not found for Farfetch'd. Continuing to next.
Image not found for Galarian_Farfetch'd. Continuing to next.
Image not found for Gouging_Fire. Continuing to next.
Image not found for Gourgeist_Average_Size. Continuing to next.
Image not found for Gourgeist_Small_Size. Continuing to next.
Image not found for Gourgeist_Large_Size. Continuing to next.
Image not found for Gourgeist_Super_Size. Continuing to next.
Image not found for Ash-Greninja. Continuing to next.
Image not found for Hoopa_Confined. Continuing to next.
Image not found for Hoopa_Unbound. Continuing to next.
Image not found for Hydrapple. Continuing to ne

In [16]:
# INSERT_YOUR_CODE
def query_similar_pokemon(tx, target_embeddings, threshold=0.7):
    # Create a Cypher query to find similar Pokemon based on embeddings
    query = """
    MATCH (p:Pokemon)
    WHERE p.resnet152_embeddings IS NOT NULL AND size(p.resnet152_embeddings) > 0
    WITH p, gds.similarity.cosine(p.resnet152_embeddings, $target_embeddings) as cosineSimilarity
    WHERE cosineSimilarity > $threshold
    RETURN p.name AS name, cosineSimilarity
    ORDER BY cosineSimilarity DESC
    """
    # Execute the query with the provided parameters
    result = tx.run(query, target_embeddings=target_embeddings.tolist(), threshold=threshold)
    return [(record["name"], record["cosineSimilarity"]) for record in result]

In [17]:
embeddings = extract_embeddings_for_pokemon("Pikachu")
embeddings

array([0.3280209 , 0.33413607, 0.30726063, ..., 0.5557928 , 0.20502698,
       1.0621434 ], dtype=float32)

In [18]:
# Use the driver to open a session and query for similar Pokemon
with driver.session() as session:
    similar_pokemon = session.execute_write(query_similar_pokemon, embeddings, 0.8)

# Print the similar Pokemon}
for name, cosineSimilarity in similar_pokemon:
    print(f"Pokemon: {name}, Similarity: {cosineSimilarity}")

Pokemon: Pikachu, Similarity: 1.0
Pokemon: Yamper, Similarity: 0.8555292161064459
Pokemon: Morpeko Full Belly Mode, Similarity: 0.8518354312264668
Pokemon: Hypno, Similarity: 0.8507935540153664
Pokemon: Victini, Similarity: 0.84760011679035
Pokemon: Togetic, Similarity: 0.8448431821699647
Pokemon: Pikipek, Similarity: 0.8432506596441814
Pokemon: Shinx, Similarity: 0.8431750285398371
Pokemon: Dewott, Similarity: 0.8407603617987368
Pokemon: Sneasel, Similarity: 0.8386212021440854
Pokemon: Mimikyu, Similarity: 0.8380398921286724
Pokemon: Dolliv, Similarity: 0.8376376152546866
Pokemon: Ribombee, Similarity: 0.8371844480595237
Pokemon: Pawmo, Similarity: 0.8367705307977559
Pokemon: Ducklett, Similarity: 0.836618694411502
Pokemon: Pachirisu, Similarity: 0.83391949853133
Pokemon: Bunnelby, Similarity: 0.833163415985421
Pokemon: Scream Tail, Similarity: 0.8327483128216615
Pokemon: Basculegion Female, Similarity: 0.8326311729767073
Pokemon: Sylveon, Similarity: 0.8322797373731796
Pokemon: Shelm