In [1]:
import os

# move directory to the root of this repo
os.chdir('\\'.join(os.getcwd().split('\\')[:-1]))

import pandas as pd
import numpy as np
from simtag.filter import simtag_filter

# setup the library

### import data

In [2]:
# import raw data
df = pd.read_parquet('notebooks/files/games.parquet').dropna()
df['Tags'] = df['Tags'].apply(lambda x : x.split(','))
df['Genres'] = df['Genres'].apply(lambda x : x.split(','))

# extract raw lists
sample_list = df['Tags'].values.tolist()

### process data

In [3]:
# initiate recommender
recommender = simtag_filter(sample_list)

In [3]:
# if not existing, compute M
recommender.compute_M()
recommender.M.to_parquet('notebooks/files/M.parquet')
recommender.M

Processing tags: 100%|██████████| 446/446 [1:18:52<00:00, 10.61s/it]


Unnamed: 0,Local Co-Op,Twin Stick Shooter,Silent Protagonist,Fantasy,Quick-Time Events,Foreign,Software Training,Competitive,Destruction,Trading,...,Music,Animation & Modeling,Runner,Mahjong,Logic,Artificial Intelligence,Pirates,Hero Shooter,Turn-Based,Underground
Local Co-Op,1.000000,0.067914,0.000646,0.033097,0.003688,0.001928,0.000000,0.042175,0.016940,0.002354,...,0.010276,0.000000,0.006455,0.0,0.008354,0.005068,0.006517,0.004714,0.012040,0.003621
Twin Stick Shooter,0.067914,1.000000,0.001572,0.006761,0.002786,0.000000,0.000000,0.010657,0.018112,0.000000,...,0.012966,0.000000,0.001799,0.0,0.000504,0.004695,0.001274,0.014085,0.000000,0.002674
Silent Protagonist,0.000646,0.001572,1.000000,0.002857,0.000000,0.005051,0.000000,0.001449,0.001613,0.000000,...,0.008794,0.000000,0.003008,0.0,0.006549,0.000000,0.000000,0.000000,0.002535,0.003311
Fantasy,0.033097,0.006761,0.002857,1.000000,0.004826,0.005515,0.000000,0.010073,0.006402,0.004958,...,0.009498,0.001587,0.010876,0.0,0.017552,0.006589,0.008365,0.004160,0.098415,0.007415
Quick-Time Events,0.003688,0.002786,0.000000,0.004826,1.000000,0.007143,0.000000,0.000000,0.004280,0.000000,...,0.004535,0.002457,0.006711,0.0,0.009975,0.004267,0.004762,0.011820,0.001203,0.005208
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Artificial Intelligence,0.005068,0.004695,0.000000,0.006589,0.004267,0.004769,0.006329,0.012613,0.016393,0.010417,...,0.003247,0.002646,0.007326,0.0,0.019669,1.000000,0.001297,0.010390,0.005489,0.005464
Pirates,0.006517,0.001274,0.000000,0.008365,0.004762,0.000000,0.000000,0.002387,0.006536,0.027140,...,0.000000,0.000000,0.002457,0.0,0.002974,0.001297,1.000000,0.002028,0.006395,0.004444
Hero Shooter,0.004714,0.014085,0.000000,0.004160,0.011820,0.000000,0.000000,0.014388,0.014379,0.002012,...,0.004193,0.002088,0.009828,0.0,0.001183,0.010390,0.002028,1.000000,0.000576,0.008811
Turn-Based,0.012040,0.000000,0.002535,0.098415,0.001203,0.003785,0.000000,0.007253,0.002490,0.005797,...,0.000912,0.000583,0.000486,0.0,0.008609,0.005489,0.006395,0.000576,1.000000,0.002959


In [4]:
# if already existing, load M
recommender.M = pd.read_parquet('notebooks/files/M.parquet')

In [5]:
# prepare search
sample_vectors = recommender.encode_samples(sample_list)
nbrs = recommender.compute_nbrs(sample_vectors, k=5)

processing samples: 100%|██████████| 41895/41895 [00:11<00:00, 3780.21it/s]


# soft tag search

### combined covariate encoding

In [6]:
query_tag_list = [
    'Horror',
    'Combat', 
    'Open World'
]

# perform search
query_vector = recommender.encode_query(query_tag_list=query_tag_list, j=5)
search_results = recommender.soft_tag_filtering(nbrs, sample_list, query_vector)
search_results[1]

['Survival Horror',
 'Open World',
 'Hunting',
 'FPS',
 'Exploration',
 'Horror',
 'Atmospheric',
 'Shooter',
 'Nonlinear',
 'Action',
 'Survival',
 'Supernatural',
 'Singleplayer',
 'First-Person',
 'Combat',
 'Early Access',
 'Psychological Horror',
 'Loot',
 'Dark',
 'Inventory Management']

### weighted combined covariate encoding

In [7]:
query_tag_dict = {
    'Shooter' : 0.3,
    'Open World' : 0.7,
}

# perform search
query_vector = recommender.encode_query(query_tag_dict=query_tag_dict, j=5)
search_results = recommender.soft_tag_filtering(nbrs, sample_list, query_vector)
search_results[0]

['Adventure', 'Indie', 'Action', 'RPG', 'Survival', 'Open World', 'Shooter']

# validation

In [8]:
import statistics

query_tag_list = [
    'Simulation', 
    'Exploration',
    'Open World'
]
result_index = 0

# soft search
query_vector = recommender.encode_query(query_tag_list=query_tag_list, negative_score=False, j=5)
soft_filter_results = recommender.soft_tag_filtering(nbrs, sample_list, query_vector)
soft_raw_scores, soft_mean_scores = recommender.compute_neighbor_scores(soft_filter_results[result_index], query_tag_list, remove_max=False)

# hard search
hard_filter_results = recommender.hard_tag_filtering(sample_list, query_tag_list)
hard_raw_scores, hard_mean_scores = recommender.compute_neighbor_scores(hard_filter_results[result_index], query_tag_list, remove_max=False)

### visualize flattened results

In [9]:
recommender.show_results(query_tag_list, soft_raw_scores, soft_filter_results[result_index], visualization_type='mean', power=0.4)
recommender.show_results(query_tag_list, hard_raw_scores, hard_filter_results[result_index], visualization_type='mean', power=0.4)

### visualize granular results

In [10]:
recommender.show_results(query_tag_list, soft_raw_scores, soft_filter_results[result_index], visualization_type='raw', power=0.4)
print()
recommender.show_results(query_tag_list, hard_raw_scores, hard_filter_results[result_index], visualization_type='raw', power=0.4)


