In [1]:
import os
import warnings

# move directory to the root of this repo
os.chdir('\\'.join(os.getcwd().split('\\')[:-2]))
warnings.simplefilter("ignore")

import pandas as pd
import numpy as np
from simtag.filter import simtag_filter
from sklearn.neighbors import NearestNeighbors




In [2]:
# import raw data
df = pd.read_parquet('notebooks/steam-games/games.parquet').dropna()
df['Tags'] = df['Tags'].apply(lambda x : x.split(','))
df['Genres'] = df['Genres'].apply(lambda x : x.split(','))
df = df.drop(['game_vector', 'game_indices', 'Score', 'Recommendations'], axis=1)

# extract raw lists
sample_list = df['Tags'].values.tolist()

In [3]:
# initiate engine
engine = simtag_filter(
	sample_list=sample_list,
	model_name='sentence-transformers/all-MiniLM-L6-v2'
)

In [4]:
M, valid_tags, tag_pointers = engine.compute_optimal_M()
engine.load_M(M, tag_pointers, covariate_transformation='dot_product')

100%|██████████| 446/446 [00:08<00:00, 52.48it/s]


# dot_product

In [5]:
# prepare search
sample_vectors = engine.encode_samples(sample_list)
index_covariate = engine.compute_search_indexes(sample_vectors, k=5)

processing samples: 100%|██████████| 41895/41895 [00:26<00:00, 1565.39it/s]


### covariate search

In [6]:
query_tag_list = [ 'Shooter', 'Dark Fantasy1', 'Sci-fi']
indices, search_results = engine.covariate_search(index_covariate, sample_list, query_tag_list=query_tag_list, allow_new_tags=True, print_new_tags=True, k=3)
for k in search_results:
	print(k)

Dark Fantasy1 -> ['Dark Fantasy']
['RPG', 'Modern', 'Shooter', 'Top-Down Shooter', 'CRPG', 'Hero Shooter', '3D', '3D Vision', 'Third Person', 'Mechs', 'Action RPG', 'Sci-fi', 'Cyberpunk', 'Surreal', 'Dark', 'Story Rich', 'Linear', 'Female Protagonist', 'Combat', 'PvE']
['Action', 'RPG', 'Action RPG', 'Shooter', 'Dungeon Crawler', 'Exploration', 'Looter Shooter', 'Third Person', 'Aliens', 'Cyberpunk', 'Dark', 'Futuristic', 'Horror', 'Post-apocalyptic', 'Robots', 'Sci-fi', 'Combat', 'Singleplayer', 'Early Access']
['RPG', 'Action', 'Adventure', 'Indie', 'Dark Fantasy', 'Isometric', 'Fantasy', 'Hack and Slash', 'Action RPG', 'Dungeon Crawler', 'Visual Novel', 'Gore', 'Dystopian', 'Narration', "Shoot 'Em Up", 'Arena Shooter', 'Shooter', 'Third-Person Shooter']
['Early Access', 'Top-Down Shooter', 'Shooter', 'Realistic', 'Sci-fi', 'Rogue-like', 'Action', 'Post-apocalyptic', 'Resource Management', 'Action RPG', 'Third-Person Shooter', 'Bullet Hell', 'Third Person', 'Aliens', 'Futuristic', 'M

In [7]:
query_tag_dict = {
	'Voxel' : 0.8,
	'Shooter' : 0.2,
	'Open World' : 0.6,
}
indices, search_results = engine.covariate_search(index_covariate, sample_list, query_tag_dict=query_tag_dict, allow_new_tags=True, print_new_tags=True, k=3)
for k in search_results:
	print(k)

['Adventure', 'Action', 'Simulation', 'Open World', 'Survival', 'Voxel', 'Sci-fi', 'Early Access']
['Open World Survival Craft', 'Adventure', 'Indie', 'Building', 'Open World', 'Sandbox', 'Survival', 'Singleplayer', 'Crafting', 'Exploration', 'Level Editor', 'Atmospheric', 'Pixel Graphics', 'Multiplayer', 'Voxel']
['Action', 'Simulation', 'Sandbox', 'Strategy', 'Indie', 'Open World', 'Voxel', 'Multiplayer', 'City Builder', 'Building']
['Action', 'Indie', 'Simulation', 'Adventure', 'Space', 'Sandbox', 'Building', 'Sci-fi', 'Early Access', 'Open World', 'Voxel', 'Futuristic', 'Multiplayer', 'Space Sim', 'Physics']
['Puzzle', 'Physics', '3D', 'Exploration', 'Point & Click', 'Retro', 'Open World', 'Indie', 'Building', 'Colorful', 'Singleplayer', 'Puzzle-Platformer', 'Cartoony', 'Voxel', 'Old School', 'Pixel Graphics']
