# Board Game Recommender

### Import libraries and Data

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy import sparse
from sklearn.metrics.pairwise import pairwise_distances, cosine_distances, cosine_similarity

### Import Full dataframe (5M Reviews)

In [2]:
games_df_full = pd.read_csv('./data/clean/games_df.csv', index_col=0)

  mask |= (ar1 == a)


In [3]:
games_df_full.head(1)

Unnamed: 0,user,rating,comment,ID,name,minplayers,maxplayers,playingtime,usersrated,average
8631330,86018,7.0,,112686,Epic Spell Wars of the Battle Wizards: Duel at...,2,6,30,5312,6.5401


In [4]:
games_df_full.shape

(5000000, 10)

### Create a subset of data for recomender due to memory issues/file sizes

In [5]:
# subset of full dataset for recommender
games_df = games_df_full[['name', 'ID', 'user', 'rating']]

In [6]:
#Due to memory issues when creating pivot tables below, reduce the size of the dataframe
#Keep only games with more than 500 reviews
#modified from: https://stackoverflow.com/questions/51079543/pandas-groupby-apply-vs-transform-with-specific-functions
games_df_subset = games_df[games_df.groupby(['name'])['rating'].transform('count') >= 500]
games_df_subset.shape

(3754734, 4)

In [7]:
#Sample 100,000 reviews
games_df_subset = games_df_subset.sample(n=100000, replace=False, random_state=123)
games_df_subset.shape

(100000, 4)

In [8]:
games_df_subset.head(2)

Unnamed: 0,name,ID,user,rating
452249,Ticket to Ride,9209,327272,9.0
7815624,Timeline: Inventions,85256,192313,5.5


In [9]:
games_df_subset.user.nunique()

62815

In [10]:
games_df_subset.name.nunique()

1743

### Prepare recommender using cosine distances

In [11]:
#pivot table from the books_df dataframe
pivot_df = pd.pivot_table(games_df_subset, index='name', columns='user', values='rating')
#create sparse matrix
sparse_df = sparse.csr_matrix(pivot_df.fillna(0))
#calculate cosine distances for similarities
recommender = pairwise_distances(sparse_df, metric='cosine')
#export as a dataframe
rec_df = pd.DataFrame(recommender, columns=pivot_df.index, index=pivot_df.index)

In [12]:
# get average rating, number of players, playing time, etc to filter by in app, group by game ID
games_info = games_df_full.groupby('name').mean()
games_info.drop(columns=['user'], inplace=True)
games_info.rename(columns={'rating': 'average_rating'}, inplace=True)
games_info.head(2)

rec_df = pd.merge(left = rec_df, right= games_info, right_index = True, left_index = True)

### Define book Recommender function

In [13]:
def game_recommender(game_title, average_rating = 7.5, minplayers = -np.inf, playingtime = np.inf):
    '''Users can input a game title, specify what the average rating should be above, specify minplayers and playingtime'''
    rec_df2 = rec_df[rec_df['average_rating'] >= average_rating]
    rec_df3 = rec_df2[rec_df2['minplayers'] >= minplayers]
    rec_df4 = rec_df3[rec_df3['playingtime'] <= playingtime]
    return 1- rec_df4[game_title].sort_values()[1:6]

### Example Recommendations

In [16]:
game_recommender('Monopoly')

name
Watergate                            0.014971
Paths of Glory                       0.013943
Heroes of Normandie                  0.012718
Freedom: The Underground Railroad    0.012394
Roll Player                          0.012015
Name: Monopoly, dtype: float64

- No similar titles in "Fans also like" section of boardgamegeek.com

In [21]:
game_recommender('Terraforming Mars')

name
Nusfjord                          0.027515
Batman: Gotham City Chronicles    0.013334
Sword & Sorcery                   0.013018
Wingspan                          0.012968
Lords of Waterdeep                0.012334
Name: Terraforming Mars, dtype: float64

Similar titles in "Fans also like" section of boardgamegeek.com:
- Wingspan

In [23]:
game_recommender('7 Wonders')

name
Millennium Blades                               0.014159
Summoner Wars: Guild Dwarves vs Cave Goblins    0.013410
Dominion                                        0.012938
Star Wars: Armada                               0.012329
Feudum                                          0.012152
Name: 7 Wonders, dtype: float64

Similar titles in "Fans also like" section of boardgamegeek.com:
- Dominion

In [14]:
game_recommender('Cosmic Encounter')

name
Smash Up: It's Your Fault!                      0.014927
Pipeline                                        0.014022
Clank! In! Space!: A Deck-Building Adventure    0.013601
Cyclades                                        0.013400
Tichu                                           0.012918
Name: Cosmic Encounter, dtype: float64

- No similar titles in "Fans also like" section of boardgamegeek.com

Accounting for limited similarities in boardgamegeek's webiste for "fans also like":
- Dataset had to be scaled down considerably due to memory issues and file size.
- Recommender does not take into account type of board game - future work may include adding a board game "style" filter.

### Export for streamlit app

In [15]:
#add game title, re-arrange for streamlit app
rec_df.insert(0, "game_title", rec_df.index)

# save rec_df for use in streamlit app
rec_df.to_pickle('./streamlit_app/data/games.pk1')