# Recommender System for Board Games from [BoardGameGeek.com](https://boardgamegeek.com/)



1.   Iatrou Manos
2.   Papageorgiou Vasileios
3. Sykianakis Xaralambos




# Dataset Description



*   Games File
*   User Ratings File
*   Mechanics File
*   Themes File



In [1]:
import pandas as pd
import numpy as np
import os
import glob
from pathlib import Path
from scipy.sparse import csr_matrix
from scipy.sparse import coo_matrix
from sklearn.decomposition import TruncatedSVD
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import LabelEncoder
from tqdm import tqdm
import matplotlib.pyplot as plt

#from google.colab import drive
#drive.mount('/content/drive')

In [48]:
def get_description(df):
    def categorize(row):
        categories = []
        for col in df.columns[15:]:
            if row[col] > 0:
                category = col
                categories.append(category)
        return ','.join(categories) if categories else 'Other'
    return df.apply(categorize,axis=1)

In [53]:
def downcasting_types(df: pd.DataFrame):
  for column in df.columns:
    colType = df[column].dtype
    if colType == 'float64' :
      df[column] = pd.to_numeric(df[column], downcast='float')
    elif colType == 'int64' :
      df[column] = pd.to_numeric(df[column], downcast='integer')
  return df

In [38]:
def preprocess(df, file_name):
    """
    Args:
    df (DataFrame): The DataFrame to be preprocessed.
    file_name (str): The name of the file corresponding to the DataFrame.

    Returns:
    DataFrame: The preprocessed DataFrame.
    """
    
    if file_name == "games":
        columns_to_keep = ['BGGId','GameWeight','MfgPlaytime','NumAlternates','NumExpansions','NumImplementations','Kickstarted',
                   'Cat:Thematic','Cat:Strategy','Cat:War','Cat:Family','Cat:CGS','Cat:Abstract','Cat:Party','Cat:Childrens']
        df = df[columns_to_keep].copy()
        renaming_dict = {col: col.replace('Cat:', '') if col.startswith('Cat:')
                 else col
                 for col in df.columns}
        df.rename(columns=renaming_dict,inplace=True)
    elif file_name == "themes":
        renaming_dict = {col: col.replace('Theme_', '') if col.startswith('Theme_') 
                 else col
                 for col in df.columns}
        df.rename(columns=renaming_dict,inplace=True)
    elif file_name == "mechanics":
        pass
    elif file_name == "user_ratings":
        df['Rating'] = df['Rating'].apply(lambda x: 1 if x>6 else -1)
        label_encoder = LabelEncoder()
        df['uid'] = label_encoder.fit_transform(df['Username'])
        df.drop('Username',axis=1,inplace=True)
        df = df.groupby('uid').filter(lambda x: len(x) >= 300).groupby('BGGId').filter(lambda x: len(x) >= 100)
    
    return df

In [60]:
def load_train(path: str):
    """
    Load all CSV files in the path folder, preprocess them, and merge certain DataFrames.

    Args:
    path (str): The path to the folder containing CSV files.

    Returns:
    DataFrame: A single merged DataFrame of the preprocessed files.
    """
    folder = Path(path)
    if not folder.exists() or not folder.is_dir():
        raise FileNotFoundError(f"{path} directory was not found")

    dataframes = {}
    for file in folder.glob('*.csv'):
        file_name = file.stem
        df = pd.read_csv(file)

        if file_name in ['games', 'themes', 'mechanics', 'user_ratings']:
            df = preprocess(df, file_name)
        else:
            print(f"Warning: No preprocessing defined for file '{file_name}'. Loaded without preprocessing.")

        dataframes[file_name] = df

    # Merging the DataFrames
    # Adjust the following lines according to your actual key columns and required types of joins
    merged_df = pd.merge(dataframes['games'], dataframes['mechanics'], on='BGGId', how='inner')
    games_df = pd.merge(merged_df, dataframes['themes'], on='BGGId', how='inner')
    games_df['Details'] = get_description(games_df)
    columns_to_drop = dataframes['themes'].columns.tolist() + dataframes['mechanics'].columns.tolist()
    games_df = games_df.drop(columns=columns_to_drop,axis=1)
    user_ratings_df = dataframes.get('user_ratings', None)
    user_ratings_df = downcasting_types(user_ratings_df)
    games_df = downcasting_types(games_df)
    return user_ratings_df, games_df

In [61]:
user_ratings_df,games_df = load_train(r'C:\Users\maniat\OneDrive - Lyse AS\Desktop\personal\MSc\Customer Analytics\Recommender Classification\Recommender_Files')

In [62]:
games_df

Unnamed: 0,GameWeight,MfgPlaytime,NumAlternates,NumExpansions,NumImplementations,Kickstarted,Thematic,Strategy,War,Family,CGS,Abstract,Party,Childrens,Details
0,4.3206,240,2,0,0,0,0,1,0,0,0,0,0,0,"Alliances,Area Majority / Influence,Auction/Bi..."
1,1.9630,30,0,0,2,0,0,1,0,0,0,0,0,0,"Trick-taking,Fantasy"
2,2.4859,60,6,0,1,0,0,1,0,0,0,0,0,0,"Area Majority / Influence,Hand Management,Hexa..."
3,2.6667,60,0,0,0,0,0,0,0,0,0,0,0,0,"Area Majority / Influence,Auction/Bidding,Set ..."
4,2.5031,90,6,2,0,0,0,1,0,0,0,0,0,0,"Hand Management,Tile Placement,Investment,Mark..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21920,0.0000,40,0,0,0,0,0,0,0,0,0,0,0,0,"Auction/Bidding,Trick-taking"
21921,1.6667,45,0,0,1,0,0,0,0,0,0,0,0,0,"Area Majority / Influence,Hand Management,Race..."
21922,4.5625,1,0,0,0,1,0,0,0,0,0,0,0,0,"Dice Rolling,Solo / Solitaire Game,Die Icon Re..."
21923,1.0000,15,0,0,0,0,0,0,0,0,0,0,0,0,"Dice Rolling,Pattern Building"


In [58]:
for col in games_df.columns:
    print(col)

BGGId
GameWeight
MfgPlaytime
NumAlternates
NumExpansions
NumImplementations
Kickstarted
Thematic
Strategy
War
Family
CGS
Abstract
Party
Childrens
Alliances
Area Majority / Influence
Auction/Bidding
Dice Rolling
Hand Management
Simultaneous Action Selection
Trick-taking
Hexagon Grid
Once-Per-Game Abilities
Set Collection
Tile Placement
Action Points
Investment
Market
Square Grid
Stock Holding
Victory Points as a Resource
Enclosure
Pattern Building
Pattern Recognition
Modular Board
Network and Route Building
Point to Point Movement
Melding and Splaying
Negotiation
Trading
Push Your Luck
Income
Race
Random Production
Variable Set-up
Roll / Spin and Move
Variable Player Powers
Action Queue
Bias
Grid Movement
Lose a Turn
Programmed Movement
Scenario / Mission / Campaign Game
Voting
Events
Paper-and-Pencil
Player Elimination
Role Playing
Movement Points
Simulation
Variable Phase Order
Area Movement
Commodity Speculation
Cooperative Game
Deduction
Sudden Death Ending
Connections
Highest-Lowes