In [1]:
# TODO: save hottest game info
# TODO: calculate different score for each type of features and/or filter only some types

In [2]:
from bs4 import BeautifulSoup as bs
import requests
from time import sleep
import pandas as pd
import numpy as np
import cachetools
import pickle
from IPython import display

In [3]:
HOT_BOARDGAME_URL = "https://api.geekdo.com/xmlapi2/hot?type=boardgame"
BOARDGAME_INFO_URL = "https://api.geekdo.com/xmlapi2/thing?id={id}"
USER_COLLECTION_URL = "https://api.geekdo.com/xmlapi2/collection?username={username}"
TOP_N = 5

In [4]:
username = input('Insert your BGG username: ')

Insert your BGG username:  informatica


In [5]:
# simple function for requests execution and BeautifulSoup content conversion
def get_bs_content_from_url(url):
    url_response = requests.get(url)
    url_bs_content = bs(url_response.content, "lxml")
    return url_bs_content

In [6]:
# simple function that, given an id, it returns its features (boardgamecategory, boardgamemechanic...)
def get_boardgame_features(id, additional_info=None):
    if additional_info is None:
        additional_info = []
    features = []
    boardgame_info_response_bs_content = get_bs_content_from_url(BOARDGAME_INFO_URL.format(id=id))
    links = boardgame_info_response_bs_content.find_all("link")
    for link in links:
        features.append(
            {
                "type": link.get("type"),
                "id": link.get("id"),
                "value": link.get("value")
            }
        )
    if len(additional_info) == 0:
        return features
    else:
        return features, *[boardgame_info_response_bs_content.find(a).text for a in additional_info]

In [7]:
# get hot boardgames
# since the hot boardgames section is pretty static during the day, add the result into a TTL canche
# of one element only and 12h of life 
try:
    with open('hot_boardgames.pickle', 'rb') as f:
        cache = pickle.load(f)
        hot_boardgames = cache.get('hot_boardgames', [])
except FileNotFoundError:
    cache = cachetools.TTLCache(ttl=60*60*12, maxsize=1)  # 12hours
    hot_boardgames = []
    
if len(hot_boardgames) == 0:
    print("updating hot_boardgames cache")
    hot_boardgames_bs_content = get_bs_content_from_url(HOT_BOARDGAME_URL)
    items = hot_boardgames_bs_content.find_all("item")
    for item in items:
        features, description, thumbnail = get_boardgame_features(item.get("id"), additional_info=['description', 'thumbnail'])
        hot_boardgames.append(
            {
                "id": item.get("id"),
                "rank": item.get("id"),
                "name": item.find("name").get("value"),
                "features": features,
                "description": description,
                "thumbnail": thumbnail
            }
        )
    
    cache['hot_boardgames'] = hot_boardgames
    with open('hot_boardgames.pickle', 'wb') as f:
        pickle.dump(cache, f)

hot_boardgames_df = pd.DataFrame(hot_boardgames)

In [8]:
hot_boardgames_df.head()

Unnamed: 0,id,rank,name,features,description,thumbnail
0,342942,342942,Ark Nova,"[{'type': 'boardgamecategory', 'id': '1089', '...","In Ark Nova, you will plan and design a modern...",https://cf.geekdo-images.com/SoU8p28Sk1s8MSvoM...
1,329082,329082,Radlands,"[{'type': 'boardgamemechanic', 'id': '2001', '...","Radlands is a competitive, dueling card game a...",https://cf.geekdo-images.com/YpQUtpxNtDHBF5eVc...
2,312484,312484,Lost Ruins of Arnak,"[{'type': 'boardgamecategory', 'id': '1022', '...","On an uninhabited island in uncharted seas, ex...",https://cf.geekdo-images.com/6GqH14TJJhza86BX5...
3,316554,316554,Dune: Imperium,"[{'type': 'boardgamecategory', 'id': '1064', '...","Some important links: The Official FAQ, the Un...",https://cf.geekdo-images.com/PhjygpWSo-0labGrP...
4,255984,255984,Sleeping Gods,"[{'type': 'boardgamecategory', 'id': '1022', '...",&quot;Are the stars unfamiliar here?&quot; she...,https://cf.geekdo-images.com/Zdt8l4oTBpFICsMyN...


In [9]:
# TODO: add number of liked items found

In [10]:
# get user's collection. Please not that for the first request, you only get a "got it, retry later" response
# so if this is the case, retry again after 5s
# since the user collection is pretty static during the day, add the result into a TTL canche
# of five elements and 12h of life 
try:
    with open('users_collections.pickle', 'rb') as f:
        cache = pickle.load(f)
        liked_boardgames = cache.get(username, [])
except FileNotFoundError:
    cache = cachetools.TTLCache(ttl=60*60*12, maxsize=5)  # 12hours
    liked_boardgames = []

if len(liked_boardgames) == 0:
    print("updating users collections")
    collection_bs_content = get_bs_content_from_url(USER_COLLECTION_URL.format(username=username))
    if collection_bs_content.find("errors"):
        raise AttributeError(f"username '{username}' not found")
    liked_items = collection_bs_content.find_all("item")
    if len(liked_items) == 0:
        sleep(5)
        collection_bs_content = get_bs_content_from_url(USER_COLLECTION_URL.format(username=username))
        liked_items = collection_bs_content.find_all("item")

    # for each boardgame in collection, get the same features we got above for the hottest 
    print(f"found {len(liked_items)} liked boardgames, processing...")
    for liked_item in liked_items:
        liked_boardgames.append(
            {
                "id": liked_item.get("objectid"),
                "name": liked_item.find("name").text,
                "features": get_boardgame_features(liked_item.get("objectid"))
            }
        )

    cache[username] = liked_boardgames
    with open('users_collections.pickle', 'wb') as f:
        pickle.dump(cache, f)

# if the number of liked boardgames is empty makes no sense to continue but this can be caused by:
# - the user doesn't have any boardgame in their collection, try with another username you know it has a not-empty collection 
# - the user has a big collection, try again later because it may take a while to get it in this 2-steps process  
if len(liked_boardgames) == 0:
    raise ValueError(f"""no liked boardgame for for user '{username}'. This can be caused by: 
    - the user doesn't have any boardgame in their collection, try another
    - the user has a big collection, try later
    """)
    
liked_boardgames_df = pd.DataFrame(liked_boardgames)

In [11]:
liked_boardgames_df.head()

Unnamed: 0,id,name,features
0,337324,A.D.E.L.E.,"[{'type': 'boardgamecategory', 'id': '1022', '..."
1,308500,Clash of Deck,"[{'type': 'boardgamecategory', 'id': '1002', '..."
2,334307,Clash of Decks: Starter Kit,"[{'type': 'boardgamecategory', 'id': '1002', '..."
3,325489,Dante Alighieri: Comedia – Inferno,"[{'type': 'boardgamecategory', 'id': '1002', '..."
4,342372,The Elder Scrolls V: Skyrim – The Adventure Game,"[{'type': 'boardgamecategory', 'id': '1010', '..."


In [12]:
# merge the two DFs hot_boardgames_df and liked_boardgames_df in a cross join way => each hot bg with every liked bg
# in this way we are ready to calculate the affinity for each couple of boardgames
total_df = hot_boardgames_df.merge(liked_boardgames_df, how='cross', suffixes=('_hot', '_liked'))

In [13]:
total_df.head()

Unnamed: 0,id_hot,rank,name_hot,features_hot,description,thumbnail,id_liked,name_liked,features_liked
0,342942,342942,Ark Nova,"[{'type': 'boardgamecategory', 'id': '1089', '...","In Ark Nova, you will plan and design a modern...",https://cf.geekdo-images.com/SoU8p28Sk1s8MSvoM...,337324,A.D.E.L.E.,"[{'type': 'boardgamecategory', 'id': '1022', '..."
1,342942,342942,Ark Nova,"[{'type': 'boardgamecategory', 'id': '1089', '...","In Ark Nova, you will plan and design a modern...",https://cf.geekdo-images.com/SoU8p28Sk1s8MSvoM...,308500,Clash of Deck,"[{'type': 'boardgamecategory', 'id': '1002', '..."
2,342942,342942,Ark Nova,"[{'type': 'boardgamecategory', 'id': '1089', '...","In Ark Nova, you will plan and design a modern...",https://cf.geekdo-images.com/SoU8p28Sk1s8MSvoM...,334307,Clash of Decks: Starter Kit,"[{'type': 'boardgamecategory', 'id': '1002', '..."
3,342942,342942,Ark Nova,"[{'type': 'boardgamecategory', 'id': '1089', '...","In Ark Nova, you will plan and design a modern...",https://cf.geekdo-images.com/SoU8p28Sk1s8MSvoM...,325489,Dante Alighieri: Comedia – Inferno,"[{'type': 'boardgamecategory', 'id': '1002', '..."
4,342942,342942,Ark Nova,"[{'type': 'boardgamecategory', 'id': '1089', '...","In Ark Nova, you will plan and design a modern...",https://cf.geekdo-images.com/SoU8p28Sk1s8MSvoM...,342372,The Elder Scrolls V: Skyrim – The Adventure Game,"[{'type': 'boardgamecategory', 'id': '1010', '..."


In [14]:
# CORE
# the affinity calculation is based on the following idea: calculate the percentage of the features
# of each hottest game compared against each game the user likes
def calculate_affinity(x, verbose=False):
    hot_boardgame, liked_boardgame = x['features_hot'], x['features_liked']
    n_features = 0
    common_features = []
    liked_boardgame_features_id = [f['value'] for f in liked_boardgame]
    for hot_boardgame_feature in hot_boardgame:
        n_features+=1
        if hot_boardgame_feature['value'] in liked_boardgame_features_id:
            common_features.append(hot_boardgame_feature['value'])
    if n_features>0:
        affinity = len(common_features)/n_features
    else:
        affinity = 0
    return affinity, common_features

In [15]:
# calculate now the affinity for each couple hot_boardgame - liked_boardgame and add to the total_df 
# the corresponding affinity and common features that contributed to obtain that affinity 
total_df[['affinity', 'common_features']] = total_df.apply(calculate_affinity, result_type='expand', axis=1, verbose=True)

In [16]:
total_df.head()

Unnamed: 0,id_hot,rank,name_hot,features_hot,description,thumbnail,id_liked,name_liked,features_liked,affinity,common_features
0,342942,342942,Ark Nova,"[{'type': 'boardgamecategory', 'id': '1089', '...","In Ark Nova, you will plan and design a modern...",https://cf.geekdo-images.com/SoU8p28Sk1s8MSvoM...,337324,A.D.E.L.E.,"[{'type': 'boardgamecategory', 'id': '1022', '...",0.125,"[Hand Management, Solo / Solitaire Game, Varia..."
1,342942,342942,Ark Nova,"[{'type': 'boardgamecategory', 'id': '1089', '...","In Ark Nova, you will plan and design a modern...",https://cf.geekdo-images.com/SoU8p28Sk1s8MSvoM...,308500,Clash of Deck,"[{'type': 'boardgamecategory', 'id': '1002', '...",0.041667,[Card Drafting]
2,342942,342942,Ark Nova,"[{'type': 'boardgamecategory', 'id': '1089', '...","In Ark Nova, you will plan and design a modern...",https://cf.geekdo-images.com/SoU8p28Sk1s8MSvoM...,334307,Clash of Decks: Starter Kit,"[{'type': 'boardgamecategory', 'id': '1002', '...",0.125,"[Card Drafting, Hand Management, Solo / Solita..."
3,342942,342942,Ark Nova,"[{'type': 'boardgamecategory', 'id': '1089', '...","In Ark Nova, you will plan and design a modern...",https://cf.geekdo-images.com/SoU8p28Sk1s8MSvoM...,325489,Dante Alighieri: Comedia – Inferno,"[{'type': 'boardgamecategory', 'id': '1002', '...",0.125,"[End Game Bonuses, Set Collection, Variable Pl..."
4,342942,342942,Ark Nova,"[{'type': 'boardgamecategory', 'id': '1089', '...","In Ark Nova, you will plan and design a modern...",https://cf.geekdo-images.com/SoU8p28Sk1s8MSvoM...,342372,The Elder Scrolls V: Skyrim – The Adventure Game,"[{'type': 'boardgamecategory', 'id': '1010', '...",0.0,[]


In [17]:
# now add for each row the value of the MAX affinity obtained for each hot_boardgame 
total_df['max_affinity'] = total_df.groupby('name_hot').affinity.transform(np.max)

In [18]:
total_df.head()

Unnamed: 0,id_hot,rank,name_hot,features_hot,description,thumbnail,id_liked,name_liked,features_liked,affinity,common_features,max_affinity
0,342942,342942,Ark Nova,"[{'type': 'boardgamecategory', 'id': '1089', '...","In Ark Nova, you will plan and design a modern...",https://cf.geekdo-images.com/SoU8p28Sk1s8MSvoM...,337324,A.D.E.L.E.,"[{'type': 'boardgamecategory', 'id': '1022', '...",0.125,"[Hand Management, Solo / Solitaire Game, Varia...",0.125
1,342942,342942,Ark Nova,"[{'type': 'boardgamecategory', 'id': '1089', '...","In Ark Nova, you will plan and design a modern...",https://cf.geekdo-images.com/SoU8p28Sk1s8MSvoM...,308500,Clash of Deck,"[{'type': 'boardgamecategory', 'id': '1002', '...",0.041667,[Card Drafting],0.125
2,342942,342942,Ark Nova,"[{'type': 'boardgamecategory', 'id': '1089', '...","In Ark Nova, you will plan and design a modern...",https://cf.geekdo-images.com/SoU8p28Sk1s8MSvoM...,334307,Clash of Decks: Starter Kit,"[{'type': 'boardgamecategory', 'id': '1002', '...",0.125,"[Card Drafting, Hand Management, Solo / Solita...",0.125
3,342942,342942,Ark Nova,"[{'type': 'boardgamecategory', 'id': '1089', '...","In Ark Nova, you will plan and design a modern...",https://cf.geekdo-images.com/SoU8p28Sk1s8MSvoM...,325489,Dante Alighieri: Comedia – Inferno,"[{'type': 'boardgamecategory', 'id': '1002', '...",0.125,"[End Game Bonuses, Set Collection, Variable Pl...",0.125
4,342942,342942,Ark Nova,"[{'type': 'boardgamecategory', 'id': '1089', '...","In Ark Nova, you will plan and design a modern...",https://cf.geekdo-images.com/SoU8p28Sk1s8MSvoM...,342372,The Elder Scrolls V: Skyrim – The Adventure Game,"[{'type': 'boardgamecategory', 'id': '1010', '...",0.0,[],0.125


In [19]:
# this is the final part where we calculate and shoe the suggestion in affinity descending order:
# - filter only the liked_boardgames that contributed to obtain the hot_boardgame's max affinity
# - filter out the hot_boardgames the user already liked (affintity'd be 1 but the suggestion'd be useless)
ranked_df = total_df.query(f"affinity == max_affinity and name_hot not in {liked_boardgames_df.name.to_list()}").reset_index(drop=True)
# - convert the image into a HTML tag
ranked_df['thumbnail'] = ranked_df['thumbnail'].apply(lambda x: f'<img src="{x}" width="500"/>')
# - truncate the description at max 350 characters
ranked_df['description'] = ranked_df['description'].str.slice(0, 350) + '...'
# - convert the name into a clickable link 
ranked_df['name_hot'] = ranked_df.apply(lambda x: f"<a href='https://boardgamegeek.com/boardgame/{x['id_hot']}'>{x['name_hot']}</a>", axis=1)
# - group by and order by affinity
ranked_df = ranked_df.groupby(['name_hot', 'thumbnail', 'affinity', 'description', 'name_liked'])[['common_features']].agg({'common_features': lambda x: x})
ranked_df = ranked_df.sort_values('affinity', ascending=False)

In [20]:
# since the TOP_N in this case would not match the actual number of rows to show, calculate the 
# actual head value finding the first value that returns TOP_N distinct hot_boardgame 
actual_head = len(ranked_df)
for i in range(len(ranked_df), 1, -1):
    if ranked_df.head(i).reset_index().name_hot.nunique() == TOP_N:
        actual_head = i
        break

In [21]:
# final show
display.HTML(ranked_df.head(actual_head).to_html(index=True, escape=False, classes='table table-striped'))

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,common_features
name_hot,thumbnail,affinity,description,name_liked,Unnamed: 5_level_1
Unfathomable,,0.36,"The year is 1913. The steamship SS Atlantica is two days out from port on its voyage across the Atlantic Ocean. Its unsuspecting passengers fully anticipated a calm journey to Boston, Massachusetts, with nothing out of the ordinary to look forward to. However, strange nightmares plague the minds of the people aboard the ship every night; rumors cir...",Unmatched: Cobble & Fog,"[Fantasy, Horror, Novel-based, Area Movement, Hand Management, Team-Based Game, Variable Player Powers, Components: Miniatures, Geekach Games]"
Radlands,,0.272727,"Radlands is a competitive, dueling card game about identifying fiercely powerful card synergies. Act as the leader of your post apocalyptic group of survivors in a tooth-and-nail fight to protect your three camps from a vicious rival tribe. If all of them are destroyed, you lose the game. The main resource in the game is water. Yo...",Unmatched: Cobble & Fog,"[Action Points, Hand Management, Take That]"
Radlands,,0.272727,"Radlands is a competitive, dueling card game about identifying fiercely powerful card synergies. Act as the leader of your post apocalyptic group of survivors in a tooth-and-nail fight to protect your three camps from a vicious rival tribe. If all of them are destroyed, you lose the game. The main resource in the game is water. Yo...",A.D.E.L.E.,"[Action Points, Hand Management, Crowdfunding: Kickstarter]"
War of the Ring: Second Edition,,0.229167,"In War of the Ring, one player takes control of the Free Peoples (FP), the other player controls Shadow Armies (SA). Initially, the Free People Nations are reluctant to take arms against Sauron, so they must be attacked by Sauron or persuaded by Gandalf or other Companions, before they start to fight properly: this is represented by the Political T...",Unmatched: Cobble & Fog,"[Fantasy, Fighting, Miniatures, Novel-based, Area Movement, Card Play Conflict Resolution, Hand Management, Team-Based Game, Components: Miniatures, GaGa Games, Geekach Games]"
Bitoku,,0.214286,"In Bitoku, the players take on the roles of Bitoku spirits of the forest in their path towards transcendence, with the goal of elevating themselves and becoming the next great spirit of the forest. To do so, they will have the help of the yÅkai, the kodamas and the different pilgrims that accompany them on their path. This is a hand-mana...",Unmatched: Cobble & Fog,"[Fantasy, Hand Management, IELLO]"
Bitoku,,0.214286,"In Bitoku, the players take on the roles of Bitoku spirits of the forest in their path towards transcendence, with the goal of elevating themselves and becoming the next great spirit of the forest. To do so, they will have the help of the yÅkai, the kodamas and the different pilgrims that accompany them on their path. This is a hand-mana...",Clash of Decks: Starter Kit,"[Fantasy, Hand Management, Solo / Solitaire Game]"
Euthia: Torment of Resurrection,,0.206897,"You will face the threat of three mighty dragons born from the darkness of magic Faer, and in doing so, will have to deal with monsters and dangerous creatures plaguing the land. Search for legendary artifacts. Experience nature’s raw elemental power as it influences your steps. Fulfill quests to help the struggling people of Euthia. ...",A.D.E.L.E.,"[Adventure, Dice Rolling, Hand Management, Solo / Solitaire Game, Variable Player Powers, Crowdfunding: Kickstarter]"
