In [1]:
import os

# move directory to the root of this repo
os.chdir('\\'.join(os.getcwd().split('\\')[:-1]))

import pandas as pd
import numpy as np
from simtag.filter import simtag_filter

# setup the library

### import data

In [18]:
# import raw data
df = pd.read_parquet('notebooks/files/games.parquet').dropna()
df['Tags'] = df['Tags'].apply(lambda x : x.split(','))
df['Genres'] = df['Genres'].apply(lambda x : x.split(','))
df = df.drop(['game_vector', 'game_indices', 'Score', 'Recommendations'], axis=1)

# extract raw lists
sample_list = df['Tags'].values.tolist()

### process data

In [3]:
# initiate recommender
engine = simtag_filter(sample_list)

In [3]:
# if not existing, compute M
engine.compute_M()
engine.M.to_parquet('notebooks/files/M.parquet')
engine.M

Processing tags: 100%|██████████| 446/446 [1:18:52<00:00, 10.61s/it]


Unnamed: 0,Local Co-Op,Twin Stick Shooter,Silent Protagonist,Fantasy,Quick-Time Events,Foreign,Software Training,Competitive,Destruction,Trading,...,Music,Animation & Modeling,Runner,Mahjong,Logic,Artificial Intelligence,Pirates,Hero Shooter,Turn-Based,Underground
Local Co-Op,1.000000,0.067914,0.000646,0.033097,0.003688,0.001928,0.000000,0.042175,0.016940,0.002354,...,0.010276,0.000000,0.006455,0.0,0.008354,0.005068,0.006517,0.004714,0.012040,0.003621
Twin Stick Shooter,0.067914,1.000000,0.001572,0.006761,0.002786,0.000000,0.000000,0.010657,0.018112,0.000000,...,0.012966,0.000000,0.001799,0.0,0.000504,0.004695,0.001274,0.014085,0.000000,0.002674
Silent Protagonist,0.000646,0.001572,1.000000,0.002857,0.000000,0.005051,0.000000,0.001449,0.001613,0.000000,...,0.008794,0.000000,0.003008,0.0,0.006549,0.000000,0.000000,0.000000,0.002535,0.003311
Fantasy,0.033097,0.006761,0.002857,1.000000,0.004826,0.005515,0.000000,0.010073,0.006402,0.004958,...,0.009498,0.001587,0.010876,0.0,0.017552,0.006589,0.008365,0.004160,0.098415,0.007415
Quick-Time Events,0.003688,0.002786,0.000000,0.004826,1.000000,0.007143,0.000000,0.000000,0.004280,0.000000,...,0.004535,0.002457,0.006711,0.0,0.009975,0.004267,0.004762,0.011820,0.001203,0.005208
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Artificial Intelligence,0.005068,0.004695,0.000000,0.006589,0.004267,0.004769,0.006329,0.012613,0.016393,0.010417,...,0.003247,0.002646,0.007326,0.0,0.019669,1.000000,0.001297,0.010390,0.005489,0.005464
Pirates,0.006517,0.001274,0.000000,0.008365,0.004762,0.000000,0.000000,0.002387,0.006536,0.027140,...,0.000000,0.000000,0.002457,0.0,0.002974,0.001297,1.000000,0.002028,0.006395,0.004444
Hero Shooter,0.004714,0.014085,0.000000,0.004160,0.011820,0.000000,0.000000,0.014388,0.014379,0.002012,...,0.004193,0.002088,0.009828,0.0,0.001183,0.010390,0.002028,1.000000,0.000576,0.008811
Turn-Based,0.012040,0.000000,0.002535,0.098415,0.001203,0.003785,0.000000,0.007253,0.002490,0.005797,...,0.000912,0.000583,0.000486,0.0,0.008609,0.005489,0.006395,0.000576,1.000000,0.002959


In [4]:
# if already existing, load M
engine.M = pd.read_parquet('notebooks/files/M.parquet')

In [5]:
# prepare search
sample_vectors = engine.encode_samples(sample_list)
nbrs = engine.compute_nbrs(sample_vectors, k=5)

processing samples: 100%|██████████| 41895/41895 [00:10<00:00, 3987.25it/s]


# semantic tag search

### naive

In [9]:
query_tag_list = [
    'Horror',
    'Combat', 
    'Open World'
]

# perform search
query_vector = engine.encode_query(query_tag_list=query_tag_list, j=5)
indices, search_results = engine.soft_tag_filtering(nbrs, sample_list, query_vector)
search_results[0]

['Open World',
 'First-Person',
 'Zombies',
 'Psychological Horror',
 'Survival',
 'Horror',
 'Survival Horror',
 'Dark',
 'Story Rich',
 'Combat',
 'Emotional',
 'Drama',
 'Thriller',
 'Puzzle',
 'Mystery',
 'Singleplayer',
 'Exploration',
 'Investigation',
 'Linear',
 '3D']

### weighted

In [8]:
query_tag_dict = {
    'Shooter' : 0.3,
    'Open World' : 0.7,
}

# perform search
query_vector = engine.encode_query(query_tag_dict=query_tag_dict, j=5)
indices, search_results = engine.soft_tag_filtering(nbrs, sample_list, query_vector)
search_results[0]

['Adventure', 'Indie', 'Action', 'RPG', 'Survival', 'Open World', 'Shooter']

# validation

In [31]:
query_tag_list = [
    'Simulation', 
    'Exploration',
    'Open World',
]
result_index = 1

# semantic search
query_vector = engine.encode_query(query_tag_list=query_tag_list, negative_score=False, j=5)
soft_indices, soft_filter_results = engine.soft_tag_filtering(nbrs, sample_list, query_vector)
soft_raw_scores, soft_mean_scores = engine.compute_neighbor_scores(soft_filter_results[result_index], query_tag_list, remove_max=False)

# traditional search
hard_indices, hard_filter_results = engine.hard_tag_filtering(sample_list, query_tag_list)
hard_raw_scores, hard_mean_scores = engine.compute_neighbor_scores(hard_filter_results[result_index], query_tag_list, remove_max=False)

### lookup data

In [32]:
# semantic tag filtering
df.iloc[soft_indices]

Unnamed: 0,index,Name,Release date,Estimated owners,Peak CCU,Price,About the game,Total_reviews,Genres,Tags
21494,35033,Copoka,"Feb 15, 2017",20000 - 50000,0,4.99,Copoka is an open-world exploratory indie game...,231,"[Casual, Indie]","[Casual, Indie, Exploration, Atmospheric, Flig..."
3220,5247,Escape: Sierra Leone,"Dec 5, 2016",0 - 20000,0,9.99,This game is currently a Beta in Early Access ...,46,"[Adventure, Simulation, Early Access]","[Adventure, Early Access, Simulation, Survival..."
28459,46557,Homeless Simulator,"Apr 24, 2019",20000 - 50000,0,0.49,Homeless Simulator is a homeless person simula...,215,"[Adventure, Indie, Simulation]","[Simulation, Indie, Adventure, Survival, Explo..."
12465,20230,Chinese Train Trip,"Mar 31, 2022",0 - 20000,1,3.99,First-person exploration. Genre: adventure. Tr...,8,"[Adventure, Indie, Simulation]","[Adventure, Simulation, Sandbox, Walking Simul..."
37968,63058,Call of the Wild: The Angler™,"Aug 31, 2022",50000 - 100000,4344,29.99,From the creators of theHunter: Call of the Wi...,2411,"[Adventure, Casual, Simulation, Sports]","[Simulation, Fishing, Casual, Sports, Open Wor..."


In [34]:
# traditional tag filtering
df.iloc[hard_indices]

Unnamed: 0,index,Name,Release date,Estimated owners,Peak CCU,Price,About the game,Total_reviews,Genres,Tags
13,22,Aerofly FS 2 Flight Simulator,"Nov 20, 2017",100000 - 200000,19,37.49,Aerofly FS 2 lets you explore the world of fly...,1898,"[Action, Indie, Racing, Simulation]","[Flight, Simulation, VR, Racing, Physics, Open..."
41,57,Forza Horizon 4,"Mar 9, 2021",2000000 - 5000000,7571,59.99,Dynamic seasons change everything at the world...,137634,[Racing],"[Racing, Open World, Driving, Multiplayer, Onl..."
60,96,Oxygen Not Included,"Jul 30, 2019",2000000 - 5000000,7507,24.99,In the space-colony simulation game Oxygen Not...,85916,"[Indie, Simulation]","[Colony Sim, Base-Building, Survival, Resource..."
145,223,Ships 2017,"Oct 19, 2016",20000 - 50000,0,14.99,Take control over 3 special purpose naval vess...,177,"[Indie, Simulation]","[Simulation, Indie, Free to Play, Adventure, S..."
382,617,JCB Pioneer: Mars,"Sep 1, 2017",0 - 20000,0,24.99,WHAT IS JCB PIONEER: MARS? JCB Pioneer: Mars d...,143,"[Action, Adventure, Indie, Simulation, Early A...","[Early Access, Simulation, Action, Adventure, ..."
...,...,...,...,...,...,...,...,...,...,...
41747,83359,廃集落探索,"Nov 17, 2023",0 - 20000,1,4.99,Abandoned Village Exploration This is a first-...,7,[Indie],"[Horror, Psychological, Dark, Walking Simulato..."
41752,83397,Under A New Sun,"Nov 13, 2023",0 - 20000,31,16.99,The plan was simple - the ship would wake you ...,41,"[Adventure, Indie, Simulation, Early Access]","[Open World Survival Craft, Survival, Open Wor..."
41815,83843,Sledders,"Dec 5, 2023",0 - 20000,306,24.99,Shred your sled in open-world backcountries. S...,293,"[Racing, Simulation, Sports, Early Access]","[Simulation, Snow, Offroad, Racing, Sports, Mo..."
41817,83846,Soulash 2,"Dec 4, 2023",0 - 20000,108,16.19,"In a realm where gods have perished, a new era...",58,"[Adventure, Indie, RPG, Strategy, Early Access]","[Early Access, RPG, Sandbox, Procedural Genera..."


### visualize flattened results

In [35]:
engine.show_results(query_tag_list, soft_raw_scores, soft_filter_results[result_index], visualization_type='mean', power=0.4, title=f'{query_tag_list}', visualize=True, return_html=False)
engine.show_results(query_tag_list, hard_raw_scores, hard_filter_results[result_index], visualization_type='mean', power=0.4, title=f'{query_tag_list}', visualize=True, return_html=False)

### visualize granular results

In [36]:
engine.show_results(query_tag_list, soft_raw_scores, soft_filter_results[result_index], visualization_type='raw', power=0.4, title=f'{query_tag_list}', visualize=True, return_html=False)
print()
engine.show_results(query_tag_list, hard_raw_scores, hard_filter_results[result_index], visualization_type='raw', power=0.4, title=f'{query_tag_list}', visualize=True, return_html=False)


