In [1]:
from sqlalchemy import create_engine
import numpy as np
import pandas as pd
import pickle
import json

from sklearn.preprocessing import MinMaxScaler, RobustScaler
from sklearn.neighbors import NearestNeighbors

# Load data
eng = create_engine('sqlite:///data/boardgames.db', echo=False)
nn = pickle.load(open('0827test.p', 'rb'))
raw = pd.read_sql_query("SELECT * FROM boardgames", eng).drop(['index', 'designer', 'publisher'], axis=1)



In [125]:
processed.avgrating.describe()

count    17575.000000
mean         0.652484
std          0.092524
min          0.000000
25%          0.640695
50%          0.645855
75%          0.662628
max          1.000000
Name: avgrating, dtype: float64

In [126]:
processed.columns[17:]

Index(['Take That', 'Card Drafting', 'Simultaneous Action Selection',
       'Trick-taking', 'Pick-up and Deliver', 'Press Your Luck',
       'Player Elimination', 'Pattern Recognition', 'Modular Board',
       'Roll / Spin and Move', 'Tile Placement', 'Simulation',
       'Hand Management', 'Partnerships', 'Trading', 'Role Playing',
       'Set Collection', 'Route/Network Building', 'Area Movement', 'Memory',
       'Secret Unit Deployment', 'Action / Movement Programming',
       'Variable Phase Order', 'Action Point Allowance System',
       'Point to Point Movement', 'Campaign / Battle Card Driven',
       'Betting/Wagering', 'Stock Holding', 'Area Control / Area Influence',
       'Commodity Speculation', 'Cooperative Play', 'Dice Rolling',
       'Auction/Bidding', 'Area Enclosure', 'Grid Movement', 'Hex-and-Counter',
       'Rock-Paper-Scissors', 'Crayon Rail System', 'Variable Player Powers',
       'Pattern Building', 'Voting', 'Territory Building',
       'American Civil War'

In [118]:
def preprocess(df):
    # Perform scaling and grab relevant features
    relevant = ['id', 'name', 'ratingscount', 'avgrating', 'published',
       'minplayers', 'maxplayers', 'best', 'recommended', 'not_recommended',
       'playingtime', 'minplaytime', 'maxplaytime', 'minage', 'suggestedage',
       'language_dependence']

    outliers = ['published','best','maxplayers','maxplaytime','minplaytime','not_recommended','playingtime','ratingscount','recommended']
    normal = ['language_dependence','minage','minplayers','suggestedage','avgrating']

    df[relevant] = df[relevant].apply(lambda x: x.fillna(x.median()) if x.dtype != np.dtype('O') else x,axis=0)

    robust = RobustScaler()
    df[outliers] = robust.fit_transform(df[outliers])
    minmax = MinMaxScaler()
    df[normal] = minmax.fit_transform(df[normal])

    return df

In [150]:
processed.avgrating.describe()

count    17575.000000
mean         0.652484
std          0.092524
min          0.000000
25%          0.640695
50%          0.645855
75%          0.662628
max          1.000000
Name: avgrating, dtype: float64

In [4]:
def dropcols(df):
    # Return only relevant features for KNN
    to_drop = ['id', 'name', 'description', 'avgrating']
    return df.drop(to_drop, axis=1)

In [5]:
def get_test_array(names):
    # Aggregate data for list of names to seed recommendation
    inputs = dropcols(processed[processed['name'].isin(names)])
    return inputs.mean().values.reshape(1, -1)

In [305]:
def get_nearest(names, mechanics, n=20):
    # Grab info for given games
    if names:
        input_array = get_test_array(names)
        # Find the nearest neighbors
        dists, neighbors = nn.kneighbors(input_array, n+len(names))
        dists = dists.tolist()[0]
        neighbors = neighbors.tolist()[0]
        neighborhood = pd.DataFrame(np.array([dists,neighbors]).T,columns=['distance','id'])
        # Scale distances by inverse of avgrating
        weights = processed.query('id == @neighbors')[['id','avgrating']]
        if mechanics:
            # Prefer games with matching mechanics
            mech_games = filter_mechanics(mechanics)
            weights.apply(lambda x: x['avgrating']*10 if x['id'] in mech_games else x['avgrating'],axis=1)
        # Sort results by new scaled distance
        neighborhood['distance']= pd.merge(neighborhood,weights,on='id').apply(lambda x: x['distance']/(x['avgrating']+.01),axis=1)
        neighborhood.sort_values('distance',inplace=True)
        # Return results not in the given names
        return list(filter(lambda g: g['name'] not in names, [game_json[int(game_id)] for game_id in list(neighborhood['id'])]))[:5]
    elif mechanics:
        # Filters games based on given mechanics
        mech_games = filter_mechanics(mechanics)
        # Finds top 3 rated games with those mechanics
        best_mech = processed.query('id == @mech_games').sort_values('avgrating', ascending=False)['id'].head(3).values
        return list(filter(lambda g: g['id'] in best_mech, game_json))

In [None]:
game_json

In [306]:
get_nearest([],['Dice'])

[ 84876 171623  73439]


[{'id': 73439,
  'language_dependence': 1,
  'best': 3,
  'name': 'Troyes',
  'designer': None,
  'recommended': 2,
  'description': "In Troyes, recreate four centuries of history of this famous city of the Champagne region of France.  Each player manages their segment of the population (represented by a horde of dice) and their hand of cards, which represent the three primary domains of the city:  religious, military, and civil.  Players can also offer cash to their opponents' populace in order to get a little moonlighting out of them&mdash;anything for more fame!&#10;&#10;Make your underlings:&#10;&#10;     work on the cathedral&#10;     combat misfortune&#10;     bustle about the city&#10;     and other such tasks that are below your family's stature&#10;&#10;&#10;&#10;&#10;Many editions of Troyes released in 2016 or later include bonus cards originally released on their own, so they have a separate listing in the BGG database.&#10;&#10;",
  'publisher': None,
  'not_recommended': 1

In [273]:
get_nearest(['Catan','Codenames'],[])

      distance       id
0    40.789907   8226.0
4    62.214918   2356.0
1    62.756017   9523.0
3    80.718074   7777.0
2    80.889597   4797.0
5    93.465330   8689.0
6    97.861143   7726.0
8   106.996586     12.0
7   119.403417    709.0
9   124.370326  14026.0
10  135.864952   2110.0
11         NaN   9577.0
12         NaN   5894.0
13         NaN  11325.0
14         NaN  12283.0
15         NaN    415.0
16         NaN  13707.0
17         NaN   8601.0
18         NaN  13464.0
19         NaN  13365.0
20         NaN   7528.0
21         NaN  10658.0


[{'id': 36218,
  'language_dependence': 3,
  'best': 3,
  'name': 'Dominion',
  'designer': None,
  'recommended': 2,
  'description': "(from the back of the box:)&#10;&#10;&quot;You are a monarch, like your parents before you, a ruler of a small pleasant kingdom of rivers and evergreens. Unlike your parents, however, you have hopes and dreams! You want a bigger and more pleasant kingdom, with more rivers and a wider variety of trees. You want a Dominion! In all directions lie fiefs, freeholds, and feodums. All are small bits of land, controlled by petty lords and verging on anarchy. You will bring civilization to these people, uniting them under your banner.&#10;&#10;But wait! It must be something in the air; several other monarchs have had the exact same idea. You must race to get as much of the unclaimed land as possible, fending them off along the way. To do this you will hire minions, construct buildings, spruce up your castle, and fill the coffers of your treasury. Your parents w

In [92]:
weights = processed.query('id == @neighbors')[['id','avgrating']]

In [113]:
weights

Unnamed: 0,id,avgrating
11,12,9.398441
612,709,-0.117067


In [131]:
neighbors_test = [results[1] for results in sorted([(resident[0]/weight, resident[1]) for resident, weight in list(zip(list(zip(dists,neighbors)),weights.avgrating))],key=lambda x: x[0])]

In [109]:
dists_test

(-35.193948948297006, 709)

In [132]:
neighbors_test

[709, 12]

In [56]:
processed.query('id == @neighbors')[['id','avgrating']]

Unnamed: 0,id,avgrating
11,12,9.398441
612,709,-0.117067


In [72]:
processed.query('id == @neighbors')[['id','avgrating']].apply(lambda x: x['avgrating']/10 if x['id']==12 else x['avgrating'],axis=1)


11     0.939844
612   -0.117067
dtype: float64

In [55]:
for row in processed.query('id == @neighbors')[['id','avgrating']]:
    print (row)

id
avgrating


In [76]:
def filter_mechanics(mechanics, df = processed):
    for mechanic in mechanics:
        df = df.query(mechanic+'==1')
    return df['id'].values.tolist()

In [8]:
with open('data.json') as f:
    game_json = json.load(f)

In [123]:
processed = preprocess(raw)

In [124]:
processed.head()

Unnamed: 0,id,name,description,ratingscount,avgrating,published,minplayers,maxplayers,best,recommended,...,Adventure,Puzzle,Novel-based,Humor,Real-time,Racing,Fantasy,Print & Play,Exploration,Dice
0,1,Die Macher,Die Macher is a game about seven sequential po...,14.832808,0.834453,-1.352941,0.3,0.5,1.0,2.0,...,0,0,0,0,0,0,0,0,0,0
1,2,Dragonmaster,Dragonmaster is a trick-taking card game based...,1.290221,0.676818,-1.647059,0.3,0.0,0.5,1.0,...,0,0,0,0,0,0,1,0,0,0
2,3,Samurai,"Part of the Knizia tile-laying trilogy, Samura...",42.643533,0.843664,-0.647059,0.2,0.0,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0
3,4,Tal der Könige,When you see the triangular box and the luxuri...,0.659306,0.664694,-1.0,0.2,0.0,0.0,2.0,...,0,0,0,0,0,0,0,0,0,0
4,5,Acquire,"In Acquire, each player strategically invests ...",53.690852,0.835071,-2.647059,0.2,1.0,0.5,1.0,...,0,0,0,0,0,0,0,0,0,0


In [16]:
mechanics=['Adventure','Puzzle']

In [52]:
filter_mechanics(processed,mechanics)

[10765,
 11531,
 31133,
 33959,
 38430,
 42361,
 47170,
 83330,
 127312,
 127398,
 146508,
 161297,
 169984,
 174805,
 185196,
 198287,
 205059,
 207243,
 207991,
 214484,
 218421,
 229965,
 231618,
 234378,
 234439,
 235465,
 242317,
 244769,
 258451]