In [31]:
import pandas as pd
import numpy as np

from sklearn.metrics.pairwise import (cosine_similarity, 
                                        euclidean_distances, 
                                        cosine_distances, 
                                        pairwise_distances)

from scipy import sparse

In [32]:
#importing game names
names = pd.read_csv('./data/recommender_data/names.csv')
names = names.set_index('BGGId')
names = names.drop(columns=['Unnamed: 0'])

In [33]:
#importing the ratings dataframe
ratings = pd.read_csv('./data/recommender_data/rating_data.csv')
#setting the index to the game ID
ratings=ratings.set_index('BGGId')

In [34]:
#creating a sparse matrix from the dataframe
sparse=sparse.csr_matrix(ratings.fillna(0))

In [35]:
#getting the pairwise distances (cos similarity) for every game based on reviews
dists = pairwise_distances(sparse, metric='cosine')

In [36]:
#creating the recommender df
recommender_df = pd.DataFrame(dists, columns=ratings.index, index=ratings.index)

In [37]:
#renaming the index and columns of the recommender df for ease of use

#creating a list of all the game names, using the names dataframe
game_names = []
for col in list(recommender_df.columns):
    game_names.append(list(names[names.index == col]['Name'])[0])

#resetting the column names
recommender_df.columns = game_names

#setting the index as game name instead of game ID
recommender_df['Name'] = game_names
recommender_df = recommender_df.set_index('Name')

In [38]:
#reordering each column, and populating the column with names instead of scores
for col in recommender_df.columns:
    recommender_df[col] = list(recommender_df.sort_values(col).index)  

#limiting the dataframe to only the top 100 recommendations
recommender_df=recommender_df.head(100)

#dropping the first row because it is redundant
recommender_df = recommender_df.tail(-1)

In [39]:
#exporting the recommender DF to csv
recommender_df.to_csv('./data/recommender_data/review_based_recommender_df.csv')