In [1]:
import os
from dotenv import load_dotenv
from sqlalchemy import create_engine
import pandas as pd
import numpy as np
import pickle

In [2]:
load_dotenv()

True

In [3]:
boardgames = pd.read_csv('../data/boardgames_extend.csv', index_col='id')

In [4]:
users = pd.read_csv('../data/users.csv')

In [5]:
ratings = pd.read_csv('../data/ratings_cleaned.csv')

In [6]:
with open('../models/knn_model_cosine.pickle', 'rb') as file:
    model = pickle.load(file)

In [7]:
uri = os.getenv('LOCAL_POSTGRES_BOARDGAMEGEEKS_URI')

engine = create_engine(uri, echo=False)

In [8]:
def lookup_boardgame(ids):    
    '''
    converts boardgame ids into boardgame names
    '''
    return boardgames.loc[ids, 'name'].tolist()

def list_to_query(ids):
    ids_str = [str(i) for i in ids]
    ids_str = ','.join(ids_str)
    return ids_str

def lookup_boardgame_sql(ids):
    ids_str = list_to_query(ids)
    query = f'SELECT boardgamename FROM boardgames WHERE boardgameid IN ({ids_str});'
    request = pd.read_sql(query, engine)
    return request['boardgamename'].tolist()

In [17]:
def lookup_user_id(user_name):
    '''
    returns the user id of a user
    '''
    user_id = users[users['user_name']==user_name]['user_id'].tolist()[0]
    return user_id

def lookup_user_id_sql(user_name):
    '''
    returns the user id of a user
    '''
    query = f'''SELECT userid FROM users WHERE username = '{user_name}';'''
    user_id = pd.read_sql(query, engine )['userid'][0]
    return user_id

In [10]:
def create_user_ratings(user_name):
    '''
    returns a dataframe with rated boardgames for a specified user
    '''
    user_id = lookup_user_id(user_name)
    user = ratings[ratings['user_id']==user_id]
    return user

def create_user_ratings_sql(user_name):
    '''
    returns a dataframe with rated boardgames for a specified user
    '''
    user_id = lookup_user_id_sql(user_name)
    query = f'''SELECT boardgameid,rating, userid FROM ratings WHERE userid = {user_id};'''
    user = pd.read_sql(query, engine)
    return user

In [11]:
def create_user_vector(user_name):
    '''
    returns a 1d array of the ratings of one user
    unrated boardgames = 0
    '''
    user = create_user_ratings(user_name)
    vector_length = ratings['boardgame_id'].max()
    vector = np.repeat(0, vector_length+1)
    vector[user['boardgame_id']] = user['ratings']
    return vector

def create_user_vector_sql(user_name):
    '''
    returns a 1d array of the ratings of one user
    unrated boardgames = 0
    '''
    user = create_user_ratings_sql(user_name)
    query = '''SELECT MAX(boardgameid) FROM boardgames;'''
    vector_length = pd.read_sql(query, engine)['max'][0]
    vector = np.repeat(0, vector_length+1)
    vector[user['boardgameid']] = user['rating']
    return vector

In [12]:
def values_to_list(df, column_name):
    categories = []
    for i in df[df[column_name].notna()].iterrows():
        categories = categories + i[1][column_name].split(', ')
    categories = list(dict.fromkeys(categories))
    categories.sort()
    return categories

def values_to_list_sql(column_name):
    categories = []
    query = f'''SELECT {column_name} FROM boardgames;'''
    df = pd.read_sql(query, engine)
    for i in df[df[column_name].notna()].iterrows():
        categories = categories + i[1][column_name].split(', ')
    categories = list(dict.fromkeys(categories))
    categories.sort()
    return categories

In [19]:
def user_rated_boardgames(user_name):
    query = f'''
        SELECT boardgames.boardgameid, boardgames.boardgamename, ratings.rating FROM boardgames
        JOIN ratings ON ratings.boardgameid = boardgames.boardgameid
        JOIN users ON users.userid = ratings.userid
        WHERE users.username = '{user_name}'
        ORDER BY ratings.rating DESC;
        '''
    df = pd.read_sql(query, engine)
    return df['boardgameid'].tolist(), df['boardgamename'].tolist(), df['rating'].tolist()

In [21]:
def neighbor_recommender(user_name):
    '''
    returns a list of boardgame recommendations
    '''
    user_ratings = create_user_ratings(user_name)
    user_vector = create_user_vector(user_name)
    #create neigbors of user
    distances, neighbor_ids = model.kneighbors([user_vector], n_neighbors=10)
    print(neighbor_ids)
    print(distances)
    neighbor_filter = ratings['user_id'].isin(neighbor_ids[0][1:])
    #create mean retings of games, rated by the neighbors
    neighbor_ratings = ratings[neighbor_filter].groupby('boardgame_id').mean()
    #sort rated games by mean rating
    neighbor_top = neighbor_ratings['ratings'].sort_values(ascending=False)
    #remove games which user rated already
    played_filter = ~neighbor_top.index.isin(user_ratings['boardgame_id'])
    recommend_ids = neighbor_top[played_filter].index
    return boardgames.loc[recommend_ids]['name'].tolist()[:20]

def neighbor_recommender_sql(user_name):
    '''
    returns a list of boardgame recommendations
    '''
    user_ratings = create_user_ratings_sql(user_name)
    user_vector = create_user_vector_sql(user_name)
    distances, neighbor_ids = model.kneighbors([user_vector], n_neighbors=10)
    neighbor_ids = list_to_query(neighbor_ids[0])
    boardgame_ids, boardgame_names, ratings = user_rated_boardgames(user_name)
    boardgame_ids = list_to_query(boardgame_ids)
    query = f'''
        SELECT boardgames.boardgamename, AVG(ratings.rating) FROM ratings 
        JOIN boardgames ON boardgames.boardgameid = ratings.boardgameid
        WHERE ratings.userid IN({neighbor_ids}) AND ratings.boardgameid NOT IN ({boardgame_ids})
        GROUP BY boardgames.boardgamename
        ORDER BY avg DESC
        LIMIT 20;
        '''
    return pd.read_sql(query, engine)['boardgamename'].tolist()


In [22]:
neighbor_recommender_sql('Ser0')

['The Lord of the Rings: Journeys in Middle-Earth',
 'Go',
 'Magic: The Gathering',
 'Scythe: The Rise of Fenris',
 'Puerto Rico',
 'Eclipse: Second Dawn for the Galaxy',
 'Clank!: A Deck-Building Adventure',
 'Smart10',
 'Teotihuacan: City of Gods',
 'The Isle of Cats',
 'Oceans',
 'Crokinole',
 'Architects of the West Kingdom',
 'On Mars',
 'Rebound',
 'Jenga',
 'Mage Knight Board Game',
 'Arkham Horror (Third Edition)',
 'Gloomhaven',
 "Tzolk'in: The Mayan Calendar"]

In [None]:
neighbor_recommender('Ser0')

In [None]:
#user_ratings = create_user_ratings_sql('Ser0')
#user_vector = create_user_vector_sql('Ser0')
#distances, neighbor_ids = model.kneighbors([user_vector], n_neighbors=10)
#neighbor_ids = list_to_query(neighbor_ids[0])
#boardgame_ids, boardgame_names, ratings = user_rated_boardgames('Ser0')
#boardgame_ids = list_to_query(boardgame_ids)
#query = f'''
#        SELECT boardgameid, AVG(rating) FROM ratings 
#        WHERE userid IN({neighbor_ids}) AND boardgameid NOT IN ({boardgame_ids})
#        GROUP BY boardgameid
#        ORDER BY avg DESC;
#        '''
#pd.read_sql(query, engine)

Unnamed: 0,boardgamename,avg
0,The Lord of the Rings: Journeys in Middle-Earth,10.0
1,Go,10.0
2,Magic: The Gathering,10.0
3,Scythe: The Rise of Fenris,10.0
4,Puerto Rico,10.0
5,Eclipse: Second Dawn for the Galaxy,10.0
6,Clank!: A Deck-Building Adventure,9.125
7,Smart10,9.0
8,Teotihuacan: City of Gods,9.0
9,The Isle of Cats,9.0


In [None]:
recommendations

In [None]:
list_to_query(neighbor_ids[0])

In [None]:
vector = create_user_vector_sql('Ser0')

In [None]:
vector[155203]

In [None]:
create_user_ratings_sql('Ser0')

In [None]:

    #return df

In [None]:
user_rated_boardgames('Ser0')

## QUERIES
- SELECT * FROM users WHERE username = 'Ser0';
-     SELECT boardgames.boardgamename FROM boardgames JOIN ratings ON ratings.boardgameid = boardgames.boardgameid JOIN users ON users.userid = ratings.userid WHERE users.username = 'Ser0';

In [None]:
pd.read_sql('SELECT * FROM boardgames LIMIT 10', engine)

In [None]:
pd.read_sql('SELECT * FROM ratings LIMIT 10', engine)