In [1]:
# TODO: save hottest game info
# TODO: calculate different score for each type of features and/or filter only some types

In [2]:
from bs4 import BeautifulSoup as bs
import requests
from time import sleep
import pandas as pd
import numpy as np
import cachetools
import pickle
from IPython import display

In [3]:
HOT_BOARDGAME_URL = "https://www.boardgamegeek.com/xmlapi2/hot?type=boardgame"
BOARDGAME_INFO_URL = "https://www.boardgamegeek.com/xmlapi2/thing?id={id}"
USER_COLLECTION_URL = "https://www.boardgamegeek.com/xmlapi2/collection?username={username}"
TOP_N = 5

In [4]:
username = input('Insert your BGG username: ')

Insert your BGG username:  informatica


In [5]:
# simple function for requests execution and BeautifulSoup content conversion
def get_bs_content_from_url(url):
    url_response = requests.get(url)
    url_bs_content = bs(url_response.content, "lxml")
    return url_bs_content

In [6]:
# simple function that, given an id, it returns its features (boardgamecategory, boardgamemechanic...)
def get_boardgame_features(id, additional_info=None):
    if additional_info is None:
        additional_info = []
    features = []
    boardgame_info_response_bs_content = get_bs_content_from_url(BOARDGAME_INFO_URL.format(id=id))
    links = boardgame_info_response_bs_content.find_all("link")
    for link in links:
        features.append(
            {
                "type": link.get("type"),
                "id": link.get("id"),
                "value": link.get("value")
            }
        )
    if len(additional_info) == 0:
        return features
    else:
        return features, *[boardgame_info_response_bs_content.find(a).text for a in additional_info]

In [7]:
# get hot boardgames
# since the hot boardgames section is pretty static during the day, add the result into a TTL canche
# of one element only and 12h of life 
try:
    with open('hot_boardgames.pickle', 'rb') as f:
        cache = pickle.load(f)
        hot_boardgames = cache.get('hot_boardgames', [])
except FileNotFoundError:
    cache = cachetools.TTLCache(ttl=60*60*12, maxsize=1)  # 12hours
    hot_boardgames = []
    
if len(hot_boardgames) == 0:
    print("updating hot_boardgames cache")
    hot_boardgames_bs_content = get_bs_content_from_url(HOT_BOARDGAME_URL)
    items = hot_boardgames_bs_content.find_all("item")
    for item in items:
        features, description, thumbnail = get_boardgame_features(item.get("id"), additional_info=['description', 'thumbnail'])
        hot_boardgames.append(
            {
                "id": item.get("id"),
                "rank": item.get("id"),
                "name": item.find("name").get("value"),
                "features": features,
                "description": description,
                "thumbnail": thumbnail
            }
        )
    
    cache['hot_boardgames'] = hot_boardgames
    with open('hot_boardgames.pickle', 'wb') as f:
        pickle.dump(cache, f)

hot_boardgames_df = pd.DataFrame(hot_boardgames)

updating hot_boardgames cache


In [8]:
hot_boardgames_df.head()

Unnamed: 0,id,rank,name,features,description,thumbnail
0,354934,354934,Legacy of Yu,"[{'type': 'boardgamecategory', 'id': '1050', '...","During the reign of Emperor Yao, the people of...",https://cf.geekdo-images.com/cVE1FZQR3gqdlAmeh...
1,351817,351817,Marvel Zombies: A Zombicide Game,"[{'type': 'boardgamecategory', 'id': '1116', '...","In Marvel Zombies: A Zombicide Game, the hunge...",https://cf.geekdo-images.com/1nxn5PtVk-cbS3tHD...
2,342942,342942,Ark Nova,"[{'type': 'boardgamecategory', 'id': '1089', '...","In Ark Nova, you will plan and design a modern...",https://cf.geekdo-images.com/SoU8p28Sk1s8MSvoM...
3,316090,316090,Vivid Memories,"[{'type': 'boardgamecategory', 'id': '1009', '...",Every stick is a sword. Every bike is a steed....,https://cf.geekdo-images.com/-KMXyStWyla5jh_BR...
4,300217,300217,Merchants of the Dark Road,"[{'type': 'boardgamecategory', 'id': '1020', '...","After half a year of daylight, we must now pre...",https://cf.geekdo-images.com/m0097WIpBdC7tdA-m...


In [10]:
# TODO: add number of liked items found

In [28]:
# get user's collection. Please not that for the first request, you only get a "got it, retry later" response
# so if this is the case, retry again after 5s
# since the user collection is pretty static during the day, add the result into a TTL canche
# of five elements and 12h of life 
try:
    with open('users_collections.pickle', 'rb') as f:
        cache = pickle.load(f)
        liked_boardgames = cache.get(username, [])
except FileNotFoundError:
    cache = cachetools.TTLCache(ttl=60*60*12, maxsize=5)  # 12hours
    liked_boardgames = []

if len(liked_boardgames) == 0:
    print("updating users collections")
    collection_bs_content = get_bs_content_from_url(USER_COLLECTION_URL.format(username=username))
    if collection_bs_content.find("errors"):
        raise AttributeError(f"username '{username}' not found")
    liked_items = collection_bs_content.find_all("item")
    if len(liked_items) == 0:
        sleep(5)
        collection_bs_content = get_bs_content_from_url(USER_COLLECTION_URL.format(username=username))
        liked_items = collection_bs_content.find_all("item")

    # for each boardgame in collection, get the same features we got above for the hottest 
    print(f"found {len(liked_items)} liked boardgames, processing...")
    for liked_item in liked_items:
        liked_boardgames.append(
            {
                "id": liked_item.get("objectid"),
                "name": liked_item.find("name").text,
                "features": get_boardgame_features(liked_item.get("objectid")),
                "numplays": int(liked_item.find("numplays").text)
            }
        )

    cache[username] = liked_boardgames
    with open('users_collections.pickle', 'wb') as f:
        pickle.dump(cache, f)

# if the number of liked boardgames is empty makes no sense to continue but this can be caused by:
# - the user doesn't have any boardgame in their collection, try with another username you know it has a not-empty collection 
# - the user has a big collection, try again later because it may take a while to get it in this 2-steps process  
if len(liked_boardgames) == 0:
    raise ValueError(f"""no liked boardgame for for user '{username}'. This can be caused by: 
    - the user doesn't have any boardgame in their collection, try another
    - the user has a big collection, try later
    """)
    
liked_boardgames_df = pd.DataFrame(liked_boardgames)

updating users collections
found 33 liked boardgames, processing...


In [29]:
liked_boardgames_df.head()

Unnamed: 0,id,name,features,numplays
0,337324,A.D.E.L.E.,"[{'type': 'boardgamecategory', 'id': '1022', '...",0
1,332321,ALIEN: Fate of the Nostromo,"[{'type': 'boardgamecategory', 'id': '1064', '...",0
2,256852,Aliens: Another Glorious Day in the Corps,"[{'type': 'boardgamecategory', 'id': '1047', '...",3
3,344105,Anunnaki: Dawn of the Gods,"[{'type': 'boardgamecategory', 'id': '1050', '...",0
4,181254,Attack on Titan: The Last Stand,"[{'type': 'boardgamecategory', 'id': '1046', '...",2


In [30]:
# merge the two DFs hot_boardgames_df and liked_boardgames_df in a cross join way => each hot bg with every liked bg
# in this way we are ready to calculate the affinity for each couple of boardgames
total_df = hot_boardgames_df.merge(liked_boardgames_df, how='cross', suffixes=('_hot', '_liked'))

In [31]:
total_df.head()

Unnamed: 0,id_hot,rank,name_hot,features_hot,description,thumbnail,id_liked,name_liked,features_liked,numplays
0,354934,354934,Legacy of Yu,"[{'type': 'boardgamecategory', 'id': '1050', '...","During the reign of Emperor Yao, the people of...",https://cf.geekdo-images.com/cVE1FZQR3gqdlAmeh...,337324,A.D.E.L.E.,"[{'type': 'boardgamecategory', 'id': '1022', '...",0
1,354934,354934,Legacy of Yu,"[{'type': 'boardgamecategory', 'id': '1050', '...","During the reign of Emperor Yao, the people of...",https://cf.geekdo-images.com/cVE1FZQR3gqdlAmeh...,332321,ALIEN: Fate of the Nostromo,"[{'type': 'boardgamecategory', 'id': '1064', '...",0
2,354934,354934,Legacy of Yu,"[{'type': 'boardgamecategory', 'id': '1050', '...","During the reign of Emperor Yao, the people of...",https://cf.geekdo-images.com/cVE1FZQR3gqdlAmeh...,256852,Aliens: Another Glorious Day in the Corps,"[{'type': 'boardgamecategory', 'id': '1047', '...",3
3,354934,354934,Legacy of Yu,"[{'type': 'boardgamecategory', 'id': '1050', '...","During the reign of Emperor Yao, the people of...",https://cf.geekdo-images.com/cVE1FZQR3gqdlAmeh...,344105,Anunnaki: Dawn of the Gods,"[{'type': 'boardgamecategory', 'id': '1050', '...",0
4,354934,354934,Legacy of Yu,"[{'type': 'boardgamecategory', 'id': '1050', '...","During the reign of Emperor Yao, the people of...",https://cf.geekdo-images.com/cVE1FZQR3gqdlAmeh...,181254,Attack on Titan: The Last Stand,"[{'type': 'boardgamecategory', 'id': '1046', '...",2


In [32]:
# CORE
# the affinity calculation is based on the following idea: calculate the percentage of the features
# of each hottest game compared against each game the user likes
def calculate_affinity(x, verbose=False):
    hot_boardgame, liked_boardgame = x['features_hot'], x['features_liked']
    n_features = 0
    common_features = []
    liked_boardgame_features_id = [f['value'] for f in liked_boardgame]
    for hot_boardgame_feature in hot_boardgame:
        n_features+=1
        if hot_boardgame_feature['value'] in liked_boardgame_features_id:
            common_features.append(hot_boardgame_feature['value'])
    if n_features>0:
        affinity = len(common_features)/n_features
    else:
        affinity = 0
    return affinity, common_features

In [33]:
# calculate now the affinity for each couple hot_boardgame - liked_boardgame and add to the total_df 
# the corresponding affinity and common features that contributed to obtain that affinity 
total_df[['affinity', 'common_features']] = total_df.apply(calculate_affinity, result_type='expand', axis=1, verbose=True)

In [34]:
total_df.head()

Unnamed: 0,id_hot,rank,name_hot,features_hot,description,thumbnail,id_liked,name_liked,features_liked,numplays,affinity,common_features
0,354934,354934,Legacy of Yu,"[{'type': 'boardgamecategory', 'id': '1050', '...","During the reign of Emperor Yao, the people of...",https://cf.geekdo-images.com/cVE1FZQR3gqdlAmeh...,337324,A.D.E.L.E.,"[{'type': 'boardgamecategory', 'id': '1022', '...",0,0.133333,"[Hand Management, Solo / Solitaire Game]"
1,354934,354934,Legacy of Yu,"[{'type': 'boardgamecategory', 'id': '1050', '...","During the reign of Emperor Yao, the people of...",https://cf.geekdo-images.com/cVE1FZQR3gqdlAmeh...,332321,ALIEN: Fate of the Nostromo,"[{'type': 'boardgamecategory', 'id': '1064', '...",0,0.0,[]
2,354934,354934,Legacy of Yu,"[{'type': 'boardgamecategory', 'id': '1050', '...","During the reign of Emperor Yao, the people of...",https://cf.geekdo-images.com/cVE1FZQR3gqdlAmeh...,256852,Aliens: Another Glorious Day in the Corps,"[{'type': 'boardgamecategory', 'id': '1047', '...",3,0.0,[]
3,354934,354934,Legacy of Yu,"[{'type': 'boardgamecategory', 'id': '1050', '...","During the reign of Emperor Yao, the people of...",https://cf.geekdo-images.com/cVE1FZQR3gqdlAmeh...,344105,Anunnaki: Dawn of the Gods,"[{'type': 'boardgamecategory', 'id': '1050', '...",0,0.2,"[Ancient, Solo / Solitaire Game, Admin: Upcomi..."
4,354934,354934,Legacy of Yu,"[{'type': 'boardgamecategory', 'id': '1050', '...","During the reign of Emperor Yao, the people of...",https://cf.geekdo-images.com/cVE1FZQR3gqdlAmeh...,181254,Attack on Titan: The Last Stand,"[{'type': 'boardgamecategory', 'id': '1046', '...",2,0.0,[]


In [142]:
total_df['weighted_affinity'] = total_df['affinity']*(total_df['numplays']+0.5)
total_df['because_you_also_like'] = total_df.apply(lambda x: (x['name_liked'], x['common_features'], x['weighted_affinity']), axis=1)

ranked_df = total_df.query(f"name_hot not in {liked_boardgames_df.name.to_list()}")\
    .groupby(["id_hot", "name_hot", "thumbnail", "description"])\
    .agg(
        {
            'weighted_affinity': sum, 
            'because_you_also_like': lambda x: sorted(list(x), key=lambda x: x[2], reverse=True)[:3]
        }, result_type='expand')\
    .reset_index()\
    .rename({'weighted_affinity': 'total_affinity'}, axis=1)\
    .sort_values('total_affinity', ascending=False, ignore_index=True)

# # OLD STYLE
# total_df['because_you_also_like'] = total_df.apply(lambda x: (x['name_liked'], x['common_features'], x['affinity']), axis=1)
#
# ranked_df = total_df.query(f"name_hot not in {liked_boardgames_df.name.to_list()}")\
#     .groupby(["id_hot", "name_hot", "thumbnail", "description"])\
#     .agg(
#         {
#             'affinity': max, 
#             'because_you_also_like': lambda x: sorted(list(x), key=lambda x: x[2], reverse=True)[:1]
#         }, result_type='expand')\
#     .reset_index()\
#     .rename({'affinity': 'total_affinity'}, axis=1)\
#     .sort_values('total_affinity', ascending=False, ignore_index=True)

In [146]:
def dispaly_ranked(ranked_df, top_n):
    ranked_df = ranked_df.copy()
    ranked_df['thumbnail'] = ranked_df['thumbnail'].apply(lambda x: f'<img src="{x}" width="500"/>')
    ranked_df['name_hot'] = ranked_df.apply(lambda x: f"<a href='https://boardgamegeek.com/boardgame/{x['id_hot']}'>{x['name_hot']}</a>", axis=1)
    ranked_df['description'] = ranked_df['description'].str.slice(0, 350) + '...'
    ranked_df = ranked_df.drop(['id_hot'], axis=1)
    return display.HTML(ranked_df.head(top_n).to_html(index=True, escape=False, classes='table table-striped'))

In [147]:
dispaly_ranked(ranked_df, 5)

Unnamed: 0,name_hot,thumbnail,description,total_affinity,because_you_also_like
0,Bitoku,,"In Bitoku, the players take on the roles of Bitoku spirits of the forest in their path towards transcendence, with the goal of elevating themselves and becoming the next great spirit of the forest. To do so, they will have the help of the yÅkai, the kodamas and the different pilgrims that accompany them on their path. This is a hand-mana...",13.642857,"[(Clash of Deck, [Fantasy], 3.6071428571428568), (Takenoko, [Set Collection, Country: Japan, Theme: Trees and Forests], 3.321428571428571), (Unmatched: Cobble & Fog, [Fantasy, Hand Management, IELLO], 0.9642857142857142)]"
1,Ark Nova,,"In Ark Nova, you will plan and design a modern, scientifically managed zoo. With the ultimate goal of owning the most successful zoological establishment, you will build enclosures, accommodate animals, and support conservation projects all over the world. Specialists and unique buildings will help you in achieving this goal. Each player h...",13.604167,"[(Takenoko, [Animals, Environmental, End Game Bonuses, Hexagon Grid, Set Collection, Tile Placement], 3.875), (Clash of Deck, [Card Drafting], 2.1041666666666665), (Ticket to Ride: Europe, [Card Drafting, End Game Bonuses, Hand Management, Set Collection, Korea Boardgames Co., Ltd.], 0.9375)]"
2,Oath: Chronicles of Empire and Exile,,"In Oath, one to six players guide the course of history in an ancient land. Players might take the role of agents bolstering the old order or scheme to bring the kingdom to ruin. The consequences of one game will ripple through those that follow, changing what resources and actions future players may have at their disposal and even altering the gam...",12.288462,"[(Clash of Deck, [Fantasy, Card Drafting], 3.884615384615385), (Takenoko, [Action Points, Dice Rolling, Matagot], 1.7884615384615385), (Cards Against Humanity, [Hand Management, Crowdfunding: Kickstarter], 0.8076923076923077)]"
3,Merchants of the Dark Road,,"After half a year of daylight, we must now prepare for the dark season. The roads will be treacherous but they will still need to be braved by a select few in order to keep our cities thriving. In Merchants of the Dark Road, you are one of these brave few merchants that travel the dangerous paths between cities. While the job is perilous, fame and ...",10.8125,"[(Clash of Deck, [Fantasy, Medieval], 4.208333333333333), (Takenoko, [Contracts, Dice Rolling, End Game Bonuses, Set Collection], 2.583333333333333), (Cards Against Humanity, [Crowdfunding: Kickstarter], 0.4375)]"
4,Rise of the Necromancers,,"Are you evil? Maybe just a little? Then unleash your most wretched forces and seize power! To become the Necromancer King, you have to master dark magic, raise an army of undead minions, explore festering dungeons, and research ancient artifacts and spells. Rise of the Necromancers is an evil strategy game for 2-5 players, in whic...",10.2,"[(Clash of Deck, [Fantasy], 3.3666666666666667), (Takenoko, [Dice Rolling], 1.0333333333333332), (Unmatched: Cobble & Fog, [Fantasy, Horror, Variable Player Powers], 0.9)]"
