In [1]:
import os
import warnings

# move directory to the root of this repo
os.chdir('\\'.join(os.getcwd().split('\\')[:-2]))
warnings.simplefilter("ignore")

import pandas as pd
import numpy as np
from simtag.filter import simtag_filter
from sklearn.neighbors import NearestNeighbors
from sentence_transformers import SentenceTransformer




In [2]:
# import raw data
df = pd.read_parquet('notebooks/steam-games/games.parquet').dropna()
df['Tags'] = df['Tags'].apply(lambda x : x.split(','))
df['Genres'] = df['Genres'].apply(lambda x : x.split(','))
df = df.drop(['game_vector', 'game_indices', 'Score', 'Recommendations'], axis=1)

# extract raw lists
sample_list = df['Tags'].values.tolist()

In [3]:
# we instatiate the model first, so we can assign it to multiple copies of the engine
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2', device='cpu')

# initiate engine
engine = simtag_filter(
	model=model
)

In [4]:
M, valid_tags, pointers = engine.compute_optimal_M(sample_list)
engine.load_M(M, pointers, covariate_transformation='dot_product')

100%|██████████| 446/446 [00:07<00:00, 57.26it/s]


# dot_product

In [5]:
# prepare search
sample_vectors = engine.encode_samples(sample_list, quantize_samples=True, quantization_left_percentile=1)
index_covariate = engine.compute_search_indexes(sample_vectors, k=10)

processing samples: 100%|██████████| 41895/41895 [00:24<00:00, 1690.97it/s]


In [6]:
query_tag_list = ['Horror', 'Scifi']
indices, search_results = engine.covariate_search(index_covariate, sample_list, query_tag_list=query_tag_list, allow_new_tags=True, print_new_tags=True, k=3)
for k in search_results:
	print(k)

Scifi -> ['Sci-fi']
['Indie', 'Horror', 'Dystopian', 'Psychological Horror', 'Sci-fi', 'Survival Horror']
['Adventure', 'VR', 'Horror', 'Sci-fi', 'Mystery']
['Indie', 'Horror', 'Sci-fi', 'Survival Horror', 'Stealth', 'Atmospheric']
['Indie', 'Strategy', 'Horror', 'First-Person', 'Survival Horror', 'Thriller', 'Singleplayer', 'Psychological Horror', 'Sci-fi', 'Futuristic']
['Action', 'Indie', 'Adventure', 'Retro', 'Horror', 'Open World', 'Psychological Horror', 'Sci-fi', 'Survival Horror', 'Colorful', 'Pixel Graphics', 'Space Sim']
['Horror', 'Nudity', 'Gore', 'Action', 'Adventure', 'Violent', 'Indie', 'Survival Horror', 'Sci-fi', 'FPS']
['Indie', 'Action', 'Adventure', 'RPG', 'Horror', 'Survival Horror', 'Story Rich', 'Singleplayer', 'Sci-fi', 'Psychological', 'Zombies']
['Indie', 'Adventure', 'Horror', 'Survival Horror', 'Psychological Horror', 'First-Person']
['Indie', 'Horror', 'Action', 'Adventure', 'Survival', 'Sci-fi', 'VR', 'FPS', 'Space', 'Survival Horror', 'Rogue-like']
['Acti

In [7]:
# store encoded samples
# engine.npy_save(sample_vectors, 'notebooks/steam-games/samples_encoded')

# retrieve encoded samples
# sample_vectors = engine.npy_load('notebooks/steam-games/samples_encoded')

### covariate search

In [8]:
query_tag_dict = {
	'Voxel' : 0.8,
	'Shooter' : 0.2,
	'Open World' : 0.6,
}
indices, search_results = engine.covariate_search(index_covariate, sample_list, query_tag_dict=query_tag_dict, allow_new_tags=True, print_new_tags=True, k=3)
for k in search_results:
	print(k)

['Indie', 'Adventure', 'Space', 'Sci-fi', 'Open World', 'Procedural Generation']
['Space', 'Simulation', 'Indie', 'Action', 'Sandbox', 'Sci-fi', 'Singleplayer', 'Building', 'Space Sim', 'Adventure', 'Early Access', 'Open World']
['Adventure', '2D', 'Indie', 'Singleplayer', 'Pixel Graphics', 'Simulation', 'Runner', 'Hidden Object', 'Voxel', 'Magic', 'Atmospheric', 'Exploration']
['Action', 'Indie', 'Multiplayer', 'Voxel']
['Action', 'Simulation', 'Open World', '3D', 'First-Person', 'Singleplayer']
['Action', 'FPS', 'Singleplayer', 'First-Person', 'Shooter', 'Fast-Paced', 'Open World', 'Old School', '3D', 'Aliens', 'Mars']
['Strategy', 'Indie', 'Voxel', 'City Builder', 'God Game']
['Visual Novel', 'Parkour', 'Multiple Endings', 'Voxel', 'Adventure', 'Singleplayer', 'Indie']
['Action', 'Adventure', 'Platformer', 'Pixel Graphics', 'Voxel']
['Simulation', 'Indie', 'Open World', 'Sandbox', 'Flight']
