In [109]:
import pandas as pd
import numpy as np

from sklearn.metrics.pairwise import (cosine_similarity, 
                                     euclidean_distances,
                                     cosine_distances)

In [110]:
#importing game names
names = pd.read_csv('./data/recommender_data/names.csv')
names = names.set_index('BGGId')
names = names.drop(columns=['Unnamed: 0'])

In [111]:
#importing the product data
games = pd.read_csv('./data/recommender_data/product_data.csv')
#setting the index to BGGId
games = games.set_index('BGGId')
#dropping columsn that I don't want to model on, or that are all 0's
games = games.drop(columns=['AvgRating', 
                            'BGGId.1', 
                            'Once-Per-Game Abilities',
                             'Pattern Recognition',
                             'Lose a Turn',
                             'Programmed Movement',
                             'Deduction',
                             'Sudden Death Ending',
                             'Highest-Lowest Scoring',
                             'Increase Value of Unchosen Resources',
                             'Ratio / Combat Results Table',
                             'Player Judge',
                             'Chit-Pull System',
                             'Three Dimensional Movement',
                             'Stat Check Resolution',
                             'Action Timer',
                             'Multiple Maps',
                             'Hidden Roles',
                             'Tug of War',
                             'Chaining',
                             'Ladder Climbing',
                             'Predictive Bid',
                             'Critical Hits and Failures',
                             'Interrupts',
                             'Zone of Control',
                             'Bribery',
                             'Area-Impulse',
                             'Measurement Movement',
                             'Map Reduction',
                             'Resource to Move',
                             'Mancala',
                             'Hidden Movement',
                             'Deck Construction',
                             "Prisoner's Dilemma",
                             'Movement Template',
                             'Slide/Push',
                             'Targeted Clues',
                             'Command Cards',
                             'Grid Coverage',
                             'Relative Movement',
                             'Action/Event',
                             'I Cut, You Choose',
                             'Die Icon Resolution',
                             'Elapsed Real Time Ending',
                             'Advantage Token',
                             'Roles with Asymmetric Information',
                             'Traitor Game',
                             'Moving Multiple Units',
                             'Semi-Cooperative Game',
                             'Communication Limits',
                             'Time Track',
                             'Speed Matching',
                             'Cube Tower',
                             'Impulse Movement',
                             'Delayed Purchase',
                             'Single Loser Game',
                             'Matching',
                             'Induction',
                             'Physical Removal',
                             'Narrative Choice / Paragraph',
                             'Pieces as Map',
                             'Follow',
                             'Finale Ending',
                             'Order Counters',
                             'Passed Action Token',
                             'King of the Hill',
                             'Action Retrieval',
                             'Force Commitment',
                             'Rondel',
                             'Automatic Resource Growth',
                             'Legacy Game'
                           ])
#filling nulls with 0s
games = games.fillna(0)

#removing colums that apply to fewer than 5 games
games = games.drop(columns = [
                             'Alliances',
                             'Trick-taking',
                             'Action Points',
                             'Investment',
                             'Market',
                             'Square Grid',
                             'Stock Holding',
                             'Victory Points as a Resource',
                             'Enclosure',
                             'Pattern Building',
                             'Melding and Splaying',
                             'Income',
                             'Race',
                             'Random Production',
                             'Variable Set-up',
                             'Action Queue',
                             'Bias',
                             'Scenario / Mission / Campaign Game',
                             'Voting',
                             'Events',
                             'Paper-and-Pencil',
                             'Player Elimination',
                             'Role Playing',
                             'Movement Points',
                             'Simulation',
                             'Variable Phase Order',
                             'Commodity Speculation',
                             'Cooperative Game',
                             'Connections',
                             'Memory',
                             'Score-and-Reset Game',
                             'Layering',
                             'Map Addition',
                             'Secret Unit Deployment',
                             'Campaign / Battle Card Driven',
                             'Tech Trees / Tech Tracks',
                             'Action Drafting',
                             'Minimap Resolution',
                             'Map Deformation',
                             'Bingo',
                             'Crayon Rail System',
                             'Line Drawing',
                             'Pattern Movement',
                             'Static Capture',
                             'Different Dice Movement',
                             'Solo / Solitaire Game',
                             'Line of Sight',
                             'End Game Bonuses',
                             'Worker Placement',
                             'Real-Time',
                             'Ownership',
                             'Kill Steal',
                             'Track Movement',
                             'TableauBuilding',
                             'Hidden Victory Points',
                             'Card Play Conflict Resolution',
                             'Storytelling',
                             'Catch the Leader',
                             'Re-rolling and Locking',
                             'Loans',
                             'Deck, Bag, and Pool Building',
                             'Move Through Deck',
                             'Contracts',
                             'Dexterity',
                             'Physical'
])

In [112]:
#calculating the cosine similarity scores between games
cos_df = pd.DataFrame(cosine_similarity(games, games),
                     columns=games.index,
                     index=games.index)

In [113]:
#renaming the index and columns of the recommender df for ease of use

#creating a list of all the game names, using the names dataframe
game_names = []
for col in list(cos_df.columns):
    game_names.append(list(names[names.index == col]['Name'])[0])

#resetting the column names
cos_df.columns = game_names

#setting the index as game name instead of game ID
cos_df['Name'] = game_names
cos_df = cos_df.set_index('Name')

Our datafile is still too large to upload to github. We do not need every single similarity score for every board game, so let's limit our dataframe to only the top 200 game recommendations for each game

In [114]:
cos_df.head()

Unnamed: 0_level_0,Die Macher (1986),Samurai (1998),Acquire (1964),Cathedral (1978),El Caballero (1998),Elfenland (1998),Bohnanza (1997),Ra (1999),Catan (1995),Basari (1998),...,Furnace (2021),Imperium: Classics (2021),Rajas of the Ganges: The Dice Charmers (2020),MicroMacro: Crime City (2020),Faiyum (2020),The King Is Dead: Second Edition (2020),Descent: Legends of the Dark (2021),The Crew: Mission Deep Sea (2021),Terraforming Mars: Ares Expedition (2021),Red Rising (2021)
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Die Macher (1986),1.0,0.974303,0.968171,0.982801,0.821004,0.992246,0.952997,0.967183,0.9431,0.864775,...,0.950044,0.873862,0.792977,0.99924,0.743616,0.940526,0.901663,0.923541,0.985115,0.984753
Samurai (1998),0.974303,1.0,0.999655,0.918647,0.67297,0.994549,0.996706,0.999551,0.99367,0.73213,...,0.857244,0.743095,0.637811,0.978824,0.574995,0.842018,0.781625,0.816091,0.923047,0.922226
Acquire (1964),0.968171,0.999655,1.0,0.907956,0.653364,0.991471,0.998488,0.999992,0.996274,0.713996,...,0.843444,0.725336,0.617383,0.973114,0.553418,0.827573,0.765106,0.800635,0.912639,0.911767
Cathedral (1978),0.982801,0.918647,0.907956,1.0,0.910186,0.95482,0.883599,0.9064,0.868459,0.941678,...,0.990869,0.946562,0.890164,0.980061,0.850701,0.986613,0.963386,0.978024,0.999887,0.999908
El Caballero (1998),0.821004,0.67297,0.653364,0.910186,1.0,0.746415,0.610776,0.650507,0.585636,0.996203,...,0.957707,0.994988,0.998735,0.810068,0.991919,0.965533,0.986771,0.976329,0.905683,0.906583


In [115]:
cos_df_2 = pd.DataFrame()

In [116]:
#reordering each column, and populating the column with names instead of scores
for col in cos_df.columns:
    cos_df[col] = list(cos_df.sort_values(col, ascending=False).index)  

#limiting the dataframe to only the top 100 recommendations
cos_df=cos_df.head(100)

#dropping the first row because it is redundant
cos_df = cos_df.tail(-1)

In [117]:
#writing our recommender DF to a csv
cos_df.to_csv('./data/recommender_data/product_based_recommender_df.csv')