In [126]:
import re
import time

import pandas as pd
import numpy as np

import requests
from bs4 import BeautifulSoup
import lxml

import dill
import glob
import os

from datetime import datetime, timedelta

In [137]:
def get_thing(id, **args):
    '''A "thing" is BGG's designation for a physical item, 
       such as a board game, expansion, board game accessory, 
       etc.  The "id" supplied can have several numbers 
       separated by commas to retrieve more than one item 
       at a time.
    
       For more information see: https://boardgamegeek.com/wiki/page/BGG_XML_API2#
       
       **args can supply an arbitrary collection of options 
       (in the form of paramaters like key=value) that will 
       be appended into the query string, where these pairs 
       will be turned into strings like "key=value" and added 
       to the query string (preceded, of course, by an ampersand 
       to make it a separate element of the URL query string).  
       
       Returns:  A string for the "thing".  The only processing 
       done is to remove the newline and tab characters from 
       the string.  
    '''
    
    url = 'https://www.boardgamegeek.com/xmlapi2/thing?id=' + str(id).strip()
    for (k,v) in args.items():   #  Add the arbitrary (key,value) 
                                 #  pairs passed to the query string.
        url += '&' + str(k) + '=' + str(v)
        
    r = requests.get(url)
    if r.status_code == 404:
        return None
    while r.status_code == 202:
        time.sleep(5)
        r = requests.get(url)
    return re.sub('[\n\t]', '', r.text)

def add_options(url, own=None, preordered=None, prevowned=None, 
                fortrade=None, want=None, wanttobuy=None, 
                wanttoplay=None, wishlist=None, comment=None):
    '''A "utility" type of function to add elements to the query 
       string.  We assume that the parameters are {0,1} integer 
       values (if they are not "None").  Note that we will quietly 
       skip over these parameters if they are not 0 or 1, treating 
       them implicitly as "None" values.  
       
       Returns:  The url with the additional options added as 
       'key=value' parameters to the url.  
    '''
    if own in [0,1]:
        url += '&own=' + str(own)
    if prevowned in [0,1]:
        url += '&prevowned=' + str(prevowned)
    if preordered in [0,1]:
        url += '&preordered=' + str(preordered)
    if fortrade in [0,1]:
        url += '&fortrade=' + str(fortrade)
    if want in [0,1]:
        url += '&want=' + str(want)
    if wishlist in [0,1]:
        url += '&wishlist=' + str(wishlist)
    if wanttobuy in [0,1]:
        url += '&wanttobuy=' + str(wanttobuy)
    if wanttoplay in [0,1]:
        url += '&wanttoplay=' + str(wanttoplay)
    if comment in [0,1]:
        url += '&comment=' + str(comment)
    return url

def get_collection(bgg_user_id, own=None, preordered=None, 
                   prevowned=None, fortrade=None, want=None, 
                   wanttobuy=None, wanttoplay=None, 
                   wishlist=None, comment=None, save=True):
    '''For more information see:  https://boardgamegeek.com/wiki/page/BGG_XML_API2

       Get the board games, and then get the board game 
       expansions.  This is a quirk of the BGG xmlapi2 interface, 
       in that it will incorrectly return the expansions as 
       subtype="boardgame", so we make two calls to get the 
       boardgames, and then the expansions separately.
       
       Returns:  A pandas DataFrame with the designated boardgames 
       in the user's collection, with columns containing 
       information about the games such as the user rating, 
       number of plays, etc.  
       
       Note:  In an effort to reduce traffic, we will check
       if we have previously retrieved the collections within
       the previous 24 hour period.  If so, we just load and
       return that information, otherwise we will download the
       collection.  
    '''
    
    #  Check:  Do we have a previous version of this
    #  collection that was retrieved in the last 24 hours? 
    #  If so, we use that.  Otherwise we get the collection
    #  information from BGG.
    cutoff = timedelta(hours=24)
    files_to_check = glob.glob(f'collections/{bgg_user_id}-*.*')
    if files_to_check:
        name = files_to_check[0]
        file_time_stamp = datetime(int(name[-18:-14]), int(name[-14:-12]), 
                            int(name[-12:-10]), int(name[-9:-7]), 
                            int(name[-7:-5]))
        now = datetime.now()
        if (now - file_time_stamp) <= cutoff:
            with open(name, 'rb') as f:
                glist = dill.load(f)
                return glist
    
    result = []
    for game_type in ['excludesubtype=boardgameexpansion', 
                      'subtype=boardgameexpansion']:
        url = 'https://www.boardgamegeek.com/xmlapi2/collection?username=' + \
                   bgg_user_id.strip() + '&'+ game_type + '&stats=1'
        #  Add parameters to the url based on what was 
        #  passed to this function.
        url = add_options(url, own, preordered, prevowned, 
                          fortrade, want, wanttobuy, wanttoplay, 
                          wishlist, comment)
        r = requests.get(url)
        if r.status_code == 404:
            pass
        else:
            while r.status_code == 202:   ##  BGG says that 
                            ## it usually queues requests 
                            ## for a collection, so we 
                            ## must check for a 202 code, 
                            ## and sleep and try again if necessary.  
                time.sleep(7)
                r = requests.get(url)
            initial_res = re.sub('[\n\t]', '', r.text)
            #  Check if there was an error from BGG, such as 
            #  an invalid username.  Raise an exception if
            #  we find an error in the response.  
            error = BeautifulSoup(initial_res, 'lxml').find('error')
            if error:
                raise ValueError(f'{error.find("message").text}')
            result.extend(list(BeautifulSoup(initial_res, 'lxml').find_all('item')))
   
    glist = []
    for item in result:
        d = dict()
        d['objectid'] = item.attrs['objectid']
        d['subtype'] = item.attrs['subtype']
        if item.find('yearpublished'):
            d['yearpublished'] = item.find('yearpublished').text
        d['name'] = item.find('name').text
        d.update(item.find("status").attrs)
        d['numplays'] = item.find('numplays').text
        d['lastmodified'] = pd.to_datetime(d['lastmodified'])
        if item.find('rating'):
            d['rating'] = item.find('rating').attrs['value']
        if item.find('comment'):
            d['comment'] = item.find('comment').text
        glist.append(d)
    
    glist = pd.DataFrame(glist, 
                    columns=['objectid','subtype',
                            'name','yearpublished','own',
                            'prevowned','fortrade','want',
                            'wanttoplay','wanttobuy',
                            'wishlist','preordered',
                            'lastmodified','numplays',
                            'rating',
                            'comment']).set_index('objectid').sort_values('name')
    for column in ['yearpublished', 'own', 'prevowned', 
                   'fortrade', 'want', 'wanttoplay', 
                   'wanttobuy', 'wishlist', 'preordered', 
                   'numplays']:
        glist[column] = glist[column].fillna(-1).astype(np.int32)
    
    #  Do we save the collection information?  
    #  By default, we do, governed by the "save" parameter
    if save:
        #  First remove any previous versions for this user
        files_to_delete = glob.glob(f'collections/{bgg_user_id}*.dill')
        for f in files_to_delete:
            os.remove(f)
        
        #  Save the collection using the BGG username supplied
        now = datetime.strftime(datetime.now(), '%Y%m%d-%H%M')
        filename = f'collections/{bgg_user_id}-{now}.dill'
        with open(filename, 'wb') as f:
            dill.dump(glist, f)
        
    return glist

In [138]:
c = get_collection('craw-daddy')

In [21]:
c.head()

Unnamed: 0_level_0,subtype,name,yearpublished,own,prevowned,fortrade,want,wanttoplay,wanttobuy,wishlist,preordered,lastmodified,numplays,rating,comment
objectid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
122711,boardgame,"""La Garde recule!""",2011,1,0,1,0,0,0,0,0,2015-01-15 11:02:06,0,,
8257,boardgameexpansion,&Cetera,2013,1,0,0,0,0,0,0,0,2015-01-12 01:52:06,0,,
153999,boardgame,"...and then, we held hands.",2015,1,0,0,0,0,0,0,0,2015-10-21 14:41:59,4,,
27236,boardgame,.45 Adventure: Crimefighting Action in the Pul...,2006,1,0,0,0,0,0,0,0,2015-01-12 01:53:04,0,,
155122,boardgame,"1066, Tears to Many Mothers",2018,1,0,0,0,0,0,0,0,2018-11-30 21:02:41,0,,


In [22]:
len(c)

1836

In [23]:
c[c['wanttoplay'] == 1]

Unnamed: 0_level_0,subtype,name,yearpublished,own,prevowned,fortrade,want,wanttoplay,wanttobuy,wishlist,preordered,lastmodified,numplays,rating,comment
objectid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
199269,boardgame,1572: The Lost Expedition,2016,0,0,0,0,1,0,0,0,2017-07-11 09:08:01,0,,
3097,boardgame,1849: The Game of Sicilian Railways,1998,0,0,0,0,1,0,1,0,2019-10-20 22:34:45,0,,
202617,boardgame,18CLE,2016,0,0,0,0,1,0,1,0,2019-01-01 00:04:48,0,,
346248,boardgame,18Korea,2021,0,0,0,0,1,0,1,0,2021-11-08 20:02:32,0,,
816,boardgame,2038: Tycoons of the Asteroid Belt,1995,0,0,0,0,1,0,0,0,2020-11-07 19:16:48,0,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
43196,boardgame,Yalu: The Chinese Counteroffensive in Korea: N...,2010,0,0,0,0,1,0,0,0,2010-04-18 11:32:42,0,,
33767,boardgame,Yavalath,2007,0,0,0,0,1,0,1,0,2018-11-21 12:38:03,0,,
196478,boardgame,Yeomen: The 9 Card Agincourt Game,2016,0,0,0,0,1,0,0,0,2016-04-11 04:51:57,0,,
148641,boardgame,Yōkaï no Mori,2013,0,0,0,0,1,0,0,0,2014-06-24 04:18:34,0,,


In [24]:
c[c['wishlist'] == 1]

Unnamed: 0_level_0,subtype,name,yearpublished,own,prevowned,fortrade,want,wanttoplay,wanttobuy,wishlist,preordered,lastmodified,numplays,rating,comment
objectid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
3097,boardgame,1849: The Game of Sicilian Railways,1998,0,0,0,0,1,0,1,0,2019-10-20 22:34:45,0,,
202617,boardgame,18CLE,2016,0,0,0,0,1,0,1,0,2019-01-01 00:04:48,0,,
346248,boardgame,18Korea,2021,0,0,0,0,1,0,1,0,2021-11-08 20:02:32,0,,
308305,boardgame,21Moon,2020,0,0,0,0,1,0,1,0,2021-10-21 12:30:29,0,,
176588,boardgame,A Glorious Chance: The Naval Struggle for Lake...,2022,0,0,0,0,0,0,1,0,2021-11-08 20:00:13,0,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
67609,boardgame,Way of the Dragon,2012,0,0,0,0,0,0,1,0,2017-09-29 03:43:08,0,,
339789,boardgame,Welcome to the Moon,2021,0,0,0,0,0,0,1,0,2021-10-24 15:10:55,0,,
347509,boardgame,Wiñay Kawsay,-1,0,0,0,0,1,0,1,0,2021-10-22 13:49:25,0,,
33767,boardgame,Yavalath,2007,0,0,0,0,1,0,1,0,2018-11-21 12:38:03,0,,


In [73]:
len(c[c['wishlist'] == 1])

52

In [27]:
hopalong = get_collection('Hopalong')

In [28]:
len(hopalong[hopalong['own'] == 1])

4282

In [30]:
helixx = get_collection('Helixx')

In [31]:
len(helixx[helixx['wishlist'] == 1])

38

In [32]:
##  Retrieve all of the boardgame categories used by BGG for classification.

def get_BGG_categories():
    page = requests.get('https://boardgamegeek.com/browse/boardgamecategory')
    soup = BeautifulSoup(page.text)
    result = []
    for item in soup.findAll('td'):
        anchor = item.find('a')
        if anchor is not None:
            value = anchor.attrs['href'].split('/')[2]
            category = anchor.text
            result.append([value, category])

    return pd.DataFrame(result, columns=['id','category']).set_index('id')

In [33]:
boardGameCategories = get_BGG_categories()

with open('data/boardGameCategories.dill', 'wb') as f:
      dill.dump(boardGameCategories, f)

In [34]:
boardGameCategories

Unnamed: 0_level_0,category
id,Unnamed: 1_level_1
1009,Abstract Strategy
1032,Action / Dexterity
1022,Adventure
2726,Age of Reason
1048,American Civil War
...,...
1019,Wargame
1025,Word Game
1065,World War I
1049,World War II


In [35]:
##  Retrieve all of the boardgame mechanisms used by BGG for classification.

def get_BGG_mechanisms():
    mechs = []
    page = requests.get('https://boardgamegeek.com/browse/boardgamemechanic')
    soup = BeautifulSoup(re.sub('[\t\n]', '', page.text))
    for item in soup.findAll('td'):
        anchor = item.find('a')
        if anchor:
            c = anchor.attrs['href'].split('/')[2]
            m = anchor.text
            mechs.append((c,m))
    return pd.DataFrame(mechs, columns=['id', 'mechanism']).set_index('id')

In [36]:
boardGameMechanisms = get_BGG_mechanisms()

with open('data/boardGameMechanisms.dill', 'wb') as f:
    dill.dump(boardGameMechanisms, f)

In [37]:
boardGameMechanisms

Unnamed: 0_level_0,mechanism
id,Unnamed: 1_level_1
2073,Acting
2838,Action Drafting
2001,Action Points
2689,Action Queue
2839,Action Retrieval
...,...
2017,Voting
2082,Worker Placement
2935,Worker Placement with Dice Workers
2933,"Worker Placement, Different Worker Types"


In [174]:
boardGameMechanisms[boardGameMechanisms['mechanism'].str.contains('Auction')]

Unnamed: 0_level_0,mechanism
id,Unnamed: 1_level_1
2012,Auction/Bidding
2930,Auction: Dexterity
2924,Auction: Dutch
2932,Auction: Dutch Priority
2918,Auction: English
2931,Auction: Fixed Placement
2923,Auction: Once Around
2920,Auction: Sealed Bid
2919,Auction: Turn Order Until Pass
2928,Closed Economy Auction
