In [245]:
import os
import random
from dotenv import load_dotenv
from sqlalchemy import create_engine
import pandas as pd
import numpy as np
import pickle
from sklearn.neighbors import NearestNeighbors
from fuzzywuzzy import process

In [4]:
load_dotenv()

True

In [5]:
boardgames = pd.read_csv('../data/boardgames_extend.csv', index_col='id')

In [303]:
MAX_ID = pd.read_sql('SELECT MAX(boardgameid) FROM boardgames', engine).loc[0].values[0]
MAX_ID

340909

In [6]:
users = pd.read_csv('../data/users.csv')

In [7]:
ratings = pd.read_csv('../data/ratings_cleaned.csv')

In [8]:
with open('../models/knn_model_cosine.pickle', 'rb') as file:
    model = pickle.load(file)

with open('../models/nmf_50.pickle', 'rb') as file:
    nmf = pickle.load(file)

with open('../models/P_50', 'rb') as file:
    P = np.load(file)    

In [9]:
uri = os.getenv('LOCAL_POSTGRES_BOARDGAMEGEEKS_URI')

engine = create_engine(uri, echo=False)

In [10]:
def lookup_boardgame(ids):    
    '''
    converts boardgame ids into boardgame names
    '''
    return boardgames.loc[ids, 'name'].tolist()

def list_to_query(ids):
    ids_str = [str(i) for i in ids]
    ids_str = ','.join(ids_str)
    return ids_str

def lookup_boardgame_sql(ids):
    ids_str = list_to_query(ids)
    query = f'SELECT boardgamename FROM boardgames WHERE boardgameid IN ({ids_str});'
    request = pd.read_sql(query, engine)
    return request['boardgamename'].tolist()

In [11]:
def lookup_user_id(user_name):
    '''
    returns the user id of a user
    '''
    user_id = users[users['user_name']==user_name]['user_id'].tolist()[0]
    return user_id

def lookup_user_id_sql(user_name):
    '''
    returns the user id of a user
    '''
    query = f'''SELECT userid FROM users WHERE username = '{user_name}';'''
    user_id = pd.read_sql(query, engine )['userid'][0]
    return user_id

In [12]:
def create_user_ratings(user_name):
    '''
    returns a dataframe with rated boardgames for a specified user
    '''
    user_id = lookup_user_id(user_name)
    user = ratings[ratings['user_id']==user_id]
    return user

def create_user_ratings_sql(user_name):
    '''
    returns a dataframe with rated boardgames for a specified user
    '''
    user_id = lookup_user_id_sql(user_name)
    query = f'''SELECT boardgameid,rating, userid FROM ratings WHERE userid = {user_id};'''
    user = pd.read_sql(query, engine)
    return user

In [13]:
def create_user_vector(user_name):
    '''
    returns a 1d array of the ratings of one user
    unrated boardgames = 0
    '''
    user = create_user_ratings(user_name)
    vector_length = ratings['boardgame_id'].max()
    vector = np.repeat(0, vector_length+1)
    vector[user['boardgame_id']] = user['ratings']
    return vector

def create_user_vector_sql(user_name):
    '''
    returns a 1d array of the ratings of one user
    unrated boardgames = 0
    '''
    user = create_user_ratings_sql(user_name)
    query = '''SELECT MAX(boardgameid) FROM boardgames;'''
    vector_length = pd.read_sql(query, engine)['max'][0]
    vector = np.repeat(0, vector_length+1)
    vector[user['boardgameid']] = user['rating']
    return vector

In [14]:
def values_to_list(df, column_name):
    categories = []
    for i in df[df[column_name].notna()].iterrows():
        categories = categories + i[1][column_name].split(', ')
    categories = list(dict.fromkeys(categories))
    categories.sort()
    return categories

def values_to_list_sql(column_name):
    categories = []
    query = f'''SELECT {column_name} FROM boardgames;'''
    df = pd.read_sql(query, engine)
    for i in df[df[column_name].notna()].iterrows():
        categories = categories + i[1][column_name].split(', ')
    categories = list(dict.fromkeys(categories))
    categories.sort()
    return categories

In [15]:
def user_rated_boardgames(user_name):
    query = f'''
        SELECT boardgames.boardgameid, boardgames.boardgamename, ratings.rating FROM boardgames
        JOIN ratings ON ratings.boardgameid = boardgames.boardgameid
        JOIN users ON users.userid = ratings.userid
        WHERE users.username = '{user_name}'
        ORDER BY ratings.rating DESC;
        '''
    df = pd.read_sql(query, engine)
    return df['boardgameid'].tolist(), df['boardgamename'].tolist(), df['rating'].tolist()

In [16]:
def neighbor_recommender(user_name):
    '''
    returns a list of boardgame recommendations
    '''
    user_ratings = create_user_ratings(user_name)
    user_vector = create_user_vector(user_name)
    #create neigbors of user
    distances, neighbor_ids = model.kneighbors([user_vector], n_neighbors=10)
    print(neighbor_ids)
    print(distances)
    neighbor_filter = ratings['user_id'].isin(neighbor_ids[0][1:])
    #create mean retings of games, rated by the neighbors
    neighbor_ratings = ratings[neighbor_filter].groupby('boardgame_id').mean()
    #sort rated games by mean rating
    neighbor_top = neighbor_ratings['ratings'].sort_values(ascending=False)
    #remove games which user rated already
    played_filter = ~neighbor_top.index.isin(user_ratings['boardgame_id'])
    recommend_ids = neighbor_top[played_filter].index
    return boardgames.loc[recommend_ids]['name'].tolist()[:20]

def neighbor_recommender_sql(user_name):
    '''
    returns a list of boardgame recommendations
    '''
    user_ratings = create_user_ratings_sql(user_name)
    user_vector = create_user_vector_sql(user_name)
    distances, neighbor_ids = model.kneighbors([user_vector], n_neighbors=10)
    neighbor_ids = list_to_query(neighbor_ids[0])
    boardgame_ids, boardgame_names, ratings = user_rated_boardgames(user_name)
    boardgame_ids = list_to_query(boardgame_ids)
    query = f'''
        SELECT boardgames.boardgamename, AVG(ratings.rating) FROM ratings 
        JOIN boardgames ON boardgames.boardgameid = ratings.boardgameid
        WHERE ratings.userid IN({neighbor_ids}) AND ratings.boardgameid NOT IN ({boardgame_ids})
        GROUP BY boardgames.boardgamename
        ORDER BY avg DESC
        LIMIT 20;
        '''
    return pd.read_sql(query, engine)['boardgamename'].tolist()


In [57]:
def knn_nmf_recommender(user_name):
    user_vector = create_user_vector(user_name)
    knn = NearestNeighbors(metric='cosine')
    knn.fit(P)
    vector_transformed = nmf.transform([user_vector])
    vector_transformed = vector_transformed.T.reshape(50,)
    distances, neighbor_ids = knn.kneighbors([vector_transformed], n_neighbors=20)
    print(neighbor_ids)
    user_ratings = create_user_ratings(user_name)
    played_by_user = user_ratings['boardgame_id']
    neighbor_ratings = ratings[ratings['user_id'].isin(neighbor_ids[0][1:])]
    neighbor_ratings = neighbor_ratings[(~neighbor_ratings['boardgame_id'].isin(played_by_user))]
    value_counts = pd.DataFrame(neighbor_ratings['boardgame_id'].value_counts())
    value_counts.columns = ['count']
    frequently_played = value_counts[value_counts['count']>=5].index
    recommendations = neighbor_ratings[neighbor_ratings['boardgame_id'].isin(frequently_played)].groupby('boardgame_id').mean().sort_values('ratings', ascending=False).head(15).index
    return boardgames.loc[recommendations]['name']

def knn_nmf_recommender_sql(user_name):
        user_vector = create_user_vector_sql(user_name)
        knn = NearestNeighbors(metric='cosine')
        knn.fit(P)
        vector_transformed = nmf.transform([user_vector])
        vector_transformed = vector_transformed.T.reshape(50,)
        distances, neighbor_ids = knn.kneighbors([vector_transformed], n_neighbors=20)
        print(neighbor_ids)
        neighbor_ids = list_to_query(neighbor_ids[0])
        user_ratings = create_user_ratings_sql(user_name)
        boardgame_ids, boardgame_names, ratings = user_rated_boardgames(user_name)
        boardgame_ids = list_to_query(boardgame_ids)
        query = f'''
                SELECT COUNT(ratings.boardgameid), boardgames.boardgamename, AVG(ratings.rating) FROM ratings 
                JOIN boardgames ON boardgames.boardgameid = ratings.boardgameid
                WHERE ratings.userid IN({neighbor_ids}) AND ratings.boardgameid NOT IN ({boardgame_ids}) 
                GROUP BY boardgames.boardgamename
                HAVING count(*)>4
                ORDER BY avg DESC
                LIMIT 15
                '''
        return pd.read_sql(query, engine)

In [310]:
def nmf_recommender(user_name):
    user_vector = create_user_vector(user_name)
    user_ratings = create_user_ratings(user_name)
    P = nmf.transform([user_vector])
    Q = nmf.components_
    predictions = np.dot(P, Q)
    #prediction include ids from 0 to length of user_vector
    #pseudo ids for constructing a dataframe
    pseudo_ids = list(range(0,len(user_vector)))
    df = pd.DataFrame(predictions, columns=pseudo_ids)
    recommendations_all = df.T.sort_values(0, ascending=False)
    recommendations_all = recommendations_all.reset_index()
    recommendations_all.columns = ['pseudo_id', 'pred_rating']
    #merging with boardgames dataframe keeps only existing boardgameids
    boardgames_merge = boardgames[['name']].reset_index()
    prediction_df = pd.merge(boardgames_merge[['id','name']], recommendations_all, left_on='id', right_on='pseudo_id', how='left')
    prediction_df = prediction_df.set_index('id')
    played_by_user = user_ratings['boardgame_id']
    played_filter = ~prediction_df.index.isin(played_by_user)
    return prediction_df.loc[played_filter].sort_values('pred_rating', ascending=False).head(15)
    #return prediction_df

def nmf_recommender_sql(user_name):
    query = f'''
        SELECT ratings.boardgameid, ratings.rating, ratings.userid FROM ratings
        JOIN users ON users.userid = ratings.userid
        WHERE users.username = '{user_name}'
        '''
    user = pd.read_sql(query, engine)
    user_vector = np.repeat(0, MAX_ID+1)
    user_vector[user['boardgameid']] = user['rating']
    P = nmf.transform([user_vector])
    Q = nmf.components_
    predictions = np.dot(P, Q)    
    #prediction include ids from 0 to length of user_vector
    #pseudo ids for constructing a dataframe
    pseudo_ids = list(range(0,len(user_vector)))
    df = pd.DataFrame(predictions, columns=pseudo_ids)
    #sort values: existing ids will be first, non-existing ids last (due to 0 value)
    recommendations_all = df.T.sort_values(0, ascending=False)
    recommendations_all = recommendations_all.reset_index()
    recommendations_all.columns = ['pseudo_id', 'pred_rating']
    boardgame_ids = user['boardgameid']
    #filter already played ids from recommendations_all
    recommendations_all = recommendations_all[~recommendations_all['pseudo_id'].isin(boardgame_ids)]
    recommendation_ids = recommendations_all.head(15)['pseudo_id']
    recommendation_ids = list_to_query(recommendation_ids)
    print(recommendation_ids)
    query = f'''
            SELECT boardgameid, boardgamename FROM boardgames
            WHERE boardgameid IN({recommendation_ids})
            '''
    recommendations = pd.read_sql(query, engine)
    return recommendations

In [311]:
nmf_recommender_sql('Ser0')

40834,183394,284083,163412,236457,201808,286096,205637,290448,233867,230802,104557,193738,146508,244522


Unnamed: 0,boardgameid,boardgamename
0,230802,Azul
1,163412,Patchwork
2,183394,Viticulture Essential Edition
3,193738,Great Western Trail
4,40834,Dominion: Intrigue
5,205637,Arkham Horror: The Card Game
6,201808,Clank!: A Deck-Building Adventure
7,146508,T.I.M.E Stories
8,233867,Welcome To...
9,284083,The Crew: The Quest for Planet Nine


In [304]:
query = '''
        SELECT ratings.boardgameid, ratings.rating, ratings.userid FROM ratings
        JOIN users ON users.userid = ratings.userid
        WHERE users.username = 'Ser0'
        '''
user = pd.read_sql(query, engine)
user.head()

Unnamed: 0,boardgameid,rating,userid
0,30549,5.0,40701
1,68448,8.0,40701
2,36218,6.0,40701
3,178900,7.0,40701
4,167791,9.0,40701


In [305]:
vector = np.repeat(0, MAX_ID+1)
vector[user['boardgameid']] = user['rating']

In [309]:
user_rated_boardgames('Ser0')

([188920,
  199792,
  245654,
  167355,
  277700,
  306882,
  246228,
  224517,
  205059,
  63268,
  217372,
  200632,
  167791,
  284108,
  244521,
  242639,
  272533,
  232219,
  135213,
  73664,
  266192,
  120677,
  181304,
  311193,
  218603,
  68448,
  194594,
  173346,
  180974,
  247763,
  318084,
  250458,
  264220,
  291859,
  283155,
  318977,
  244608,
  230914,
  40692,
  178900,
  237251,
  91536,
  263918,
  45315,
  192661,
  162886,
  1465,
  2655,
  283294,
  251247,
  206915,
  204583,
  214293,
  264055,
  207830,
  299169,
  235375,
  155203,
  169786,
  48979,
  193214,
  216465,
  284435,
  299946,
  303733,
  172225,
  202477,
  21790,
  314530,
  258104,
  128,
  160477,
  295486,
  202977,
  51811,
  147020,
  287938,
  277927,
  36218,
  31260,
  320,
  237182,
  147151,
  239188,
  66690,
  4095,
  217085,
  227072,
  31920,
  121073,
  274533,
  10686,
  282171,
  262208,
  260757,
  6657,
  181161,
  39801,
  305985,
  305986,
  3119,
  232417,
  300129,
 

In [286]:
def values_to_list(df, column_name):
    categories = []
    for i in df[df[column_name].notna()].iterrows():
        categories = categories + i[1][column_name].split(', ')
    categories = list(dict.fromkeys(categories))
    categories.sort()
    return categories

def ohe_user_boardgames(user_name, column, weight=False):
    '''
    returns a one-hot-encoded matrix of parameters in column of games played by user
    if weight = True, the encoding gets weighted by the rating
    '''
    games_ohe={}
    user_id = users[users['user_name']==user_name]['user_id'].tolist()[0]
    user_ratings = ratings[ratings['user_id']==user_id].set_index('boardgame_id')
    user_boardgames = boardgames.loc[user_ratings.index]
    user_boardgames = user_boardgames[user_boardgames[column].notna()]
    user_categories = values_to_list(user_boardgames, column)    
    for i in user_boardgames.iterrows():
        game_vector = [0]*len(user_categories)
        for c in i[1][column].split(', '):
            index = user_categories.index(c)
            if weight == True:
                game_vector[index]=1 * user_ratings.loc[i[0]]['ratings']
            else: 
                game_vector[index]=1
        games_ohe[i[0]] = game_vector
    df = pd.DataFrame(games_ohe)
    df = df.transpose()
    df.columns = user_categories
    if 'Deck' in user_categories:
        df['Deck, Bag and Pool Building'] = df['Deck']
        df.drop(columns=['Deck', 'Bag', 'and Pool Building'], inplace=True)
    if 'I Cut' in user_categories:
        df['I Cut, You Choose'] = df['I Cut']
        df.drop(columns=['I Cut', 'You Choose'], inplace=True)
    return user_categories

def ohe_user_boardgames_sql(user_name, column, weight=False):
    '''
    returns a one-hot-encoded matrix of parameters in column of games played by user
    if weight = True, the encoding gets weighted by the rating
    '''
    games_ohe={}
    query = f'''
            SELECT boardgames.{column}, ratings.rating FROM boardgames
            JOIN ratings on ratings.boardgameid = boardgames.boardgameid
            JOIN users on users.userid = ratings.userid
            WHERE users.username = '{user_name}'
            '''
    df_column = pd.read_sql(query, engine)
    df_column = df_column[df_column['machanics'].notna()]
    user_categories = values_to_list(df_column, column)  
    for i in df_column.iterrows():
        game_vector = [0]*len(user_categories)
        for c in i[1][column].split(', '):
            index = user_categories.index(c)
            if weight == True:
                game_vector[index]=1 * i[1]['rating']
            else: 
                game_vector[index]=1
        games_ohe[i[0]] = game_vector
    df = pd.DataFrame(games_ohe)
    df = df.transpose()
    df.columns = user_categories
    if 'Deck' in user_categories:
        df['Deck, Bag and Pool Building'] = df['Deck']
        df.drop(columns=['Deck', 'Bag', 'and Pool Building'], inplace=True)
    if 'I Cut' in user_categories:
        df['I Cut, You Choose'] = df['I Cut']
        df.drop(columns=['I Cut', 'You Choose'], inplace=True)
    return df

In [287]:
ohe_user_boardgames_sql('Ser0', 'machanics', weight=False)

Unnamed: 0,Acting,Action Drafting,Action Points,Action Queue,Action Retrieval,Action Timer,Advantage Token,Area Majority / Influence,Area Movement,Auction/Bidding,...,Turn Order: Pass Order,Turn Order: Progressive,Turn Order: Stat-Based,Variable Phase Order,Variable Player Powers,Variable Set-up,Victory Points as a Resource,Voting,Worker Placement,"Deck, Bag and Pool Building"
0,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,1
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,1,0,0,1,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
120,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
122,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
123,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
125,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [289]:
ohe_user_boardgames_sql('Ser0', 'machanics')

Unnamed: 0,Acting,Action Drafting,Action Points,Action Queue,Action Retrieval,Action Timer,Advantage Token,Area Majority / Influence,Area Movement,Auction/Bidding,...,Turn Order: Pass Order,Turn Order: Progressive,Turn Order: Stat-Based,Variable Phase Order,Variable Player Powers,Variable Set-up,Victory Points as a Resource,Voting,Worker Placement,"Deck, Bag and Pool Building"
0,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,1
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,1,0,0,1,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
120,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
122,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
123,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
125,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [226]:
def lookup_boardgamename(search_query):
    """
    returns boardgamename after comparing user input with boardgamenames
    """
    query = 'SELECT boardgameid, boardgamename FROM boardgames'
    result = pd.read_sql(query, engine)
    match = process.extractOne(search_query, result['boardgamename'])
    boardgame_id = result[result['boardgamename']==match[0]]['boardgameid']
    # [(title, score, movieId), ...]
    return match, boardgame_id

In [236]:
def random_recommender(N):
    '''
    returns a list of N random boardgames
    '''
    rec_ids = []
    for n in range(0,N):
        rec_ids.append(random.choice(boardgames.index))
        recommendations = lookup_boardgame(rec_ids)
    return recommendations

In [314]:
query = f'''
SELECT boardgames.boardgameid, boardgames.categories, boardgames.machanics,
ratings.rating FROM ratings
JOIN users ON users.userid = ratings.userid
JOIN boardgames ON boardgames.boardgameid = ratings.boardgameid
WHERE users.username = 'Ser0'
'''
pd.read_sql(query, engine)

Unnamed: 0,boardgameid,categories,machanics,rating
0,30549,Medical,"Action Points, Cooperative Game, Hand Manageme...",5.0
1,68448,"Ancient, Card Game, City Building, Civilizatio...","Drafting, Hand Management, Set Collection, Sim...",8.0
2,36218,"Card Game, Medieval","Deck, Bag, and Pool Building, Delayed Purchase...",6.0
3,178900,"Card Game, Deduction, Party Game, Spies/Secret...","Communication Limits, Memory, Push Your Luck, ...",7.0
4,167791,"Economic, Environmental, Industry / Manufactur...","Drafting, End Game Bonuses, Hand Management, H...",9.0
...,...,...,...,...
122,303734,Card Game,"Card Drafting, Set Collection",5.0
123,42636,"Card Game, Educational, Party Game, Puzzle, Wo...",Memory,6.0
124,165984,"Bluffing, Card Game, Educational, Party Game, ...",,6.0
125,135213,"Action / Dexterity, Real-time",Pattern Building,9.0


In [283]:
df_column['machanics'].notna()

0       True
1       True
2       True
3       True
4       True
       ...  
122     True
123     True
124    False
125     True
126     True
Name: machanics, Length: 127, dtype: bool

In [239]:
boardgames.columns.boardgameid

AttributeError: 'Index' object has no attribute 'boardgameid'

In [184]:
boardgames.loc[119]

Unnamed: 0                                                    782
rank                                                         1376
name                                                     Kingdoms
links                                     /boardgame/119/kingdoms
num_voters                                                   4675
categories                   Abstract Strategy, Fantasy, Medieval
mechanics                                          Tile Placement
family                         Series: Fantasy Flight Silver Line
expansions                                   Kingdoms: The Wizard
integrations                                                  NaN
designers                                           Reiner Knizia
publishers      Hans im Glück, ADC Blackfire Entertainment, Ar...
Name: 119, dtype: object

## QUERIES
- SELECT * FROM users WHERE username = 'Ser0';
-     SELECT boardgames.boardgamename FROM boardgames JOIN ratings ON ratings.boardgameid = boardgames.boardgameid JOIN users ON users.userid = ratings.userid WHERE users.username = 'Ser0';

In [None]:
pd.read_sql('SELECT * FROM boardgames LIMIT 10', engine)

In [None]:
pd.read_sql('SELECT * FROM ratings LIMIT 10', engine)

In [89]:
boardgames.loc[183394]

Unnamed: 0                                                     53
rank                                                           24
name                                Viticulture Essential Edition
links             /boardgame/183394/viticulture-essential-edition
num_voters                                                  31378
categories                                      Economic, Farming
mechanics       Contracts, Hand Management, Solo / Solitaire G...
family          Components: Control Boards, Country: Italy, Di...
expansions      Tuscany: Special Worker Promo Cards, Viticultu...
integrations                                                  NaN
designers                             Jamey Stegmaier, Alan Stone
publishers      Stonemaier Games, Angry Lion Games, Arclight, ...
Name: 183394, dtype: object

In [None]:
        query = f'''
                SELECT boardgames.boardgamename, AVG(ratings.rating), COUNT(boardgames.boardgamename) FROM ratings 
                JOIN boardgames ON boardgames.boardgameid = ratings.boardgameid
                WHERE ratings.userid IN({neighbor_ids}) AND ratings.boardgameid NOT IN ({boardgame_ids}) 
                GROUP BY boardgames.boardgamename
                HAVING count(*)>1
                ORDER BY avg DESC
                LIMIT 15;
                '''

In [318]:
def get_user_boardgame_ratings(user_name):
    query = f'''
    SELECT boardgames.boardgameid, boardgames.categories, boardgames.machanics,
    ratings.rating, ratings.userid FROM ratings
    JOIN users ON users.userid = ratings.userid
    JOIN boardgames ON boardgames.boardgameid = ratings.boardgameid
    WHERE users.username = '{user_name}'
    '''
    user = pd.read_sql(query, engine)
    return user

def ohe_user_boardgames(user, column, weight=False):
    '''
    returns a one-hot-encoded matrix of parameters in column of games played by user
    if weight = True, the encoding gets weighted by the rating
    '''
    print(f'start ohe of {column}')
    games_ohe={}
    print('start task: create ohe dataframe')
    df_column = user[[column,'rating']]
    df_column = df_column[df_column[column].notna()]
    user_categories = values_to_list(df_column, column)  
    for i in df_column.iterrows():
        game_vector = [0]*len(user_categories)
        for c in i[1][column].split(', '):
            index = user_categories.index(c)
            if weight == True:
                game_vector[index]=1 * i[1]['rating']
            else: 
                game_vector[index]=1
        games_ohe[i[0]] = game_vector
    df = pd.DataFrame(games_ohe)
    df = df.transpose()
    df.columns = user_categories
    print('task finished')
    print('start task: clean column names if required')
    if 'Deck' in user_categories:
        df['Deck, Bag and Pool Building'] = df['Deck']
        df.drop(columns=['Deck', 'Bag', 'and Pool Building'], inplace=True)
    if 'I Cut' in user_categories:
        df['I Cut, You Choose'] = df['I Cut']
        df.drop(columns=['I Cut', 'You Choose'], inplace=True)
    print('task finfihed')
    print(f'one hot encoding of {column}')
    return df

def nmf_recommender(user):
    '''
    takes user dataframe
    return recommendations
    '''
    print('start nmf recommender')
    print('start task: create user vector')
    user_vector = np.repeat(0, MAX_ID+1)
    user_vector[user['boardgameid']] = user['rating']
    print('task finished')
    print('start task: transform user vector')
    P = nmf.transform([user_vector])
    Q = nmf.components_
    predictions = np.dot(P, Q)   
    print('task finished') 
    print('start task: get recommendation ids')
    #prediction include ids from 0 to length of user_vector
    #pseudo ids for constructing a dataframe
    pseudo_ids = list(range(0,len(user_vector)))
    df = pd.DataFrame(predictions, columns=pseudo_ids)
    #sort values: existing ids will be first, non-existing ids last (due to 0 value)
    recommendations_all = df.T.sort_values(0, ascending=False)
    recommendations_all = recommendations_all.reset_index()
    recommendations_all.columns = ['pseudo_id', 'pred_rating']
    boardgame_ids = user['boardgameid']
    #filter already played ids from recommendations_all
    recommendations_all = recommendations_all[~recommendations_all['pseudo_id'].isin(boardgame_ids)]
    recommendation_ids = recommendations_all.head(15)['pseudo_id']
    recommendation_ids = list_to_query(recommendation_ids)
    print('task finished')
    print('start task: get recommendation boardgames from aws')
    query = f'''
            SELECT boardgameid, boardgamename FROM boardgames
            WHERE boardgameid IN({recommendation_ids})
            '''
    recommendations = pd.read_sql(query, engine)
    print('task finished')
    print('nmf recommender finished')
    return recommendations['boardgamename'].tolist()

In [316]:
user = get_user_boardgame_ratings('Ser0')
user.head()

Unnamed: 0,boardgameid,categories,machanics,rating,userid
0,30549,Medical,"Action Points, Cooperative Game, Hand Manageme...",5.0,40701
1,68448,"Ancient, Card Game, City Building, Civilizatio...","Drafting, Hand Management, Set Collection, Sim...",8.0,40701
2,36218,"Card Game, Medieval","Deck, Bag, and Pool Building, Delayed Purchase...",6.0,40701
3,178900,"Card Game, Deduction, Party Game, Spies/Secret...","Communication Limits, Memory, Push Your Luck, ...",7.0,40701
4,167791,"Economic, Environmental, Industry / Manufactur...","Drafting, End Game Bonuses, Hand Management, H...",9.0,40701


In [317]:
ohe_user_boardgames(user, 'categories')

start ohe of categories
start task: create ohe dataframe
task finished
start task: clean column names if required
task finfihed
one hot encoding of categories


Unnamed: 0,Abstract Strategy,Action / Dexterity,Adventure,Age of Reason,Ancient,Animals,Aviation / Flight,Bluffing,Card Game,Children's Game,...,Sports,Territory Building,Trains,Transportation,Travel,Trivia,Video Game Theme,Wargame,Word Game,Zombies
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,1,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,1,0
4,0,0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
122,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
123,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,1,0
124,0,0,0,0,0,0,0,1,1,0,...,0,0,0,0,0,1,0,0,0,0
125,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [319]:
nmf_recommender(user)

start nmf recommender
start task: create user vector
task finished
start task: transform user vector
task finished
start task: get recommendation ids
task finished
start task: get recommendation boardgames from aws
task finished
nmf recommender finished


['Azul',
 'Patchwork',
 'Viticulture Essential Edition',
 'Great Western Trail',
 'Dominion: Intrigue',
 'Arkham Horror: The Card Game',
 'Clank!: A Deck-Building Adventure',
 'T.I.M.E Stories',
 'Welcome To...',
 'The Crew: The Quest for Planet Nine',
 'Architects of the West Kingdom',
 "That's Pretty Clever!",
 'Tapestry',
 'Wingspan: European Expansion',
 'Dominion: Hinterlands']

In [331]:
new_user = {
    'boardgames':['Everdell', 'Ra', 'Cabo'],
    'ratings': [5, 4, 3]
}

In [350]:
test = ['Everdell', 'Ra', 'Cabo']
'\',\''.join(test)

"Everdell','Ra','Cabo"

In [353]:
def create_new_user(new_user):
    boardgamenames = '\',\''.join(new_user['boardgames'])
    query = f'''
    SELECT boardgameid, categories, machanics FROM boardgames
    WHERE boardgamename IN('{boardgamenames}');
    '''
    user = pd.read_sql(query, engine)
    user['rating'] = new_user['ratings']
    return user

In [354]:
create_new_user(new_user)

Unnamed: 0,boardgameid,categories,machanics,rating
0,199792,"Animals, Card Game, City Building, Fantasy","Card Drafting, End Game Bonuses, Hand Manageme...",5
1,12,"Ancient, Mythology","Auction/Bidding, Auction: Once Around, Closed ...",4
2,73664,Card Game,"Hand Management, Memory",3


In [362]:
def get_user_boardgame_ratings(user_name):
    '''
    returns a user dataframe with 'boardgameid', 'rating', 'categories', 'machanics'
    '''
    print('start task: get user data from aws')
    query = f'''
    SELECT boardgames.boardgameid, boardgames.categories, boardgames.machanics,
    ratings.rating FROM ratings
    JOIN users ON users.userid = ratings.userid
    JOIN boardgames ON boardgames.boardgameid = ratings.boardgameid
    WHERE users.username = '{user_name}'
    '''
    user = pd.read_sql(query, engine)
    print('task completed')
    return user

In [363]:
get_user_boardgame_ratings('Ser0')

start task: get user data from aws
task completed


Unnamed: 0,boardgameid,categories,machanics,rating
0,30549,Medical,"Action Points, Cooperative Game, Hand Manageme...",5.0
1,68448,"Ancient, Card Game, City Building, Civilizatio...","Drafting, Hand Management, Set Collection, Sim...",8.0
2,36218,"Card Game, Medieval","Deck, Bag, and Pool Building, Delayed Purchase...",6.0
3,178900,"Card Game, Deduction, Party Game, Spies/Secret...","Communication Limits, Memory, Push Your Luck, ...",7.0
4,167791,"Economic, Environmental, Industry / Manufactur...","Drafting, End Game Bonuses, Hand Management, H...",9.0
...,...,...,...,...
122,303734,Card Game,"Card Drafting, Set Collection",5.0
123,42636,"Card Game, Educational, Party Game, Puzzle, Wo...",Memory,6.0
124,165984,"Bluffing, Card Game, Educational, Party Game, ...",,6.0
125,135213,"Action / Dexterity, Real-time",Pattern Building,9.0


In [364]:
users.sample(10)

Unnamed: 0,user_name,num_ratings,user_id
383770,sang7851,1,383770
49308,GenSuperior,107,49308
273145,suparaddy,4,273145
143276,Mi4koda,28,143276
10456,greenbeehive,341,10456
135526,gustavogn,31,135526
298167,FF5_Conan,2,298167
326665,SleepNose,1,326665
210042,deadagain,11,210042
208482,RainbowRash,12,208482
