In [13]:
import json
import numpy as np
import ipywidgets as widgets
from ipywidgets import interact
from sklearn.metrics.pairwise import cosine_similarity

In [14]:
def get_similarity_matrix(vectors: dict):
    vectors = list(zip(*vectors.items()))
    
    X = np.array(vectors[1])
    Y = cosine_similarity(X)

    # Creating a mask so argmax does not cause product to return itself.
    n = len(Y)
    Y_mask = [list([False] * n) for y in Y]
    for i in range(n):
        Y_mask[i][i] = True
    Y = np.ma.MaskedArray(Y, Y_mask)
    
    return Y

In [15]:
filename = 'data/word_vectors.json'
with open(filename, 'r') as file:
    word_vectors = json.load(file)
    
word_similarity = get_similarity_matrix(word_vectors)

In [16]:
filename = 'data/tag_vectors.json'
with open(filename, 'r') as file:
    tag_vectors = json.load(file)
    
tag_similarity = get_similarity_matrix(tag_vectors)

In [17]:
index, options = zip(*enumerate(tag_vectors))
@interact(x=zip(options, index))
def f(x):
    similarity_index = x
    best_match_index = np.argmax(tag_similarity[x])
    similarity_score = np.max(tag_similarity[x])
    return (similarity_index, best_match_index, similarity_score)

interactive(children=(Dropdown(description='x', options=(('282010', 0), ('70', 1), ('1640', 2), ('1630', 3), (…

In [18]:
filename = 'data/processed_games.json'
with open(filename, 'r') as file:
    games = json.load(file)

In [19]:
lookup = list(tag_vectors.keys())[2]
for game in games:
    if game['id'] == lookup:
        print(game)

{'publisher': 'Strategy First', 'genres': ['Strategy'], 'app_name': "Disciples II: Gallean's Return", 'sentiment': 'Mostly Positive', 'title': "Disciples II: Gallean's Return", 'url': 'http://store.steampowered.com/app/1640/Disciples_II_Galleans_Return/', 'release_date': '2006-07-06', 'tags': ['Turn-Based Strategy', 'Strategy', 'Classic', 'Atmospheric', 'Dark Fantasy', 'Turn-Based', 'Fantasy', 'Tactical', 'RPG', 'Gothic', 'Great Soundtrack', 'Multiplayer', 'Isometric'], 'reviews_url': 'http://steamcommunity.com/app/1640/reviews/?browsefilter=mostrecent&p=1', 'specs': ['Single-player', 'Multi-player', 'Co-op'], 'id': '1640', 'developer': 'Strategy First'}


In [21]:
ind = np.argpartition(tag_similarity[70], -4)[-4:]
tag_similarity[70][ind]

masked_array(data=[0.7142857142857142, 0.8451542547285166,
                   0.857142857142857, --],
             mask=[False, False, False,  True],
       fill_value=1e+20)