In [1]:
import re
import time

import pandas as pd
import numpy as np

import requests
from bs4 import BeautifulSoup
import lxml

import dill

In [69]:
def get_thing(id, **args):
    '''A "thing" is BGG's designation for a physical item, such as a board game,
       expansion, board game accessory, etc.  The "id" supplied can have several numbers
       separated by commas to retrieve more than one item at a time.
    
       For more information see: https://boardgamegeek.com/wiki/page/BGG_XML_API2#
       
       **args can supply an arbitrary collection of options (in the form of paramaters like key=value) 
       that will be appended into the query string, where these pairs will be turned into strings 
       like "key=value" and added to the query string (preceded, of course, by an ampersand to make 
       it a separate element of the URL query string).  
       
       Returns:  A string for the "thing".  The only processing done is to remove the newline and
       tab characters from the string.  
    '''
    
    url = 'https://www.boardgamegeek.com/xmlapi2/thing?id=' + str(id).strip()
    for (k,v) in args.items():   #  Add the arbitrary (key,value) pairs passed to the query string.
        url += '&' + str(k) + '=' + str(v)
        
    r = requests.get(url)
    if r.status_code == 404:
        return None
    while r.status_code == 202:
        time.sleep(6)
        r = requests.get(url)
    return re.sub('[\n\t]', '', r.text)

def add_options(url, own=None, preordered=None, prevowned=None, fortrade=None, want=None, 
                wanttobuy=None, wanttoplay=None, wishlist=None, comment=None):
    '''A "utility" type of function to add elements to the query string.  We assume that the
       parameters are {0,1} integer values (if they are not "None").  Note that we will quietly 
       skip over these parameters if they are not 0 or 1, treating them implicitly as "None" values.  
       
       Returns:  The url with the additional options added as 'key=value' parameters to the url.  
    '''
    if own in [0,1]:
        url += '&own=' + str(own)
    if prevowned in [0,1]:
        url += '&prevowned=' + str(prevowned)
    if preordered in [0,1]:
        url += '&preordered=' + str(preordered)
    if fortrade in [0,1]:
        url += '&fortrade=' + str(fortrade)
    if want in [0,1]:
        url += '&want=' + str(want)
    if wishlist in [0,1]:
        url += '&wishlist=' + str(wishlist)
    if wanttobuy in [0,1]:
        url += '&wanttobuy=' + str(wanttobuy)
    if wanttoplay in [0,1]:
        url += '&wanttoplay=' + str(wanttoplay)
    if comment in [0,1]:
        url += '&comment=' + str(comment)
    return url

def get_collection(bgg_user_id, own=None, preordered=None, prevowned=None, fortrade=None, want=None, 
                   wanttobuy=None, wanttoplay=None, wishlist=None, comment=None):
    '''For more information see:  https://boardgamegeek.com/wiki/page/BGG_XML_API2

       Get the board games, and then get the board game expansions.  This is a quirk of the 
       BGG xmlapi2 interface, in that it will incorrectly return the expansions as subtype="boardgame",
       so we make two calls to get the boardgames, and then the expansions separately.
       
       Returns:  A pandas DataFrame with the designated boardgames in the user's collection, with columns
       containing information about the games such as the user rating, number of plays, etc.  
    '''
    result = []
    for game_type in ['excludesubtype=boardgameexpansion', 'subtype=boardgameexpansion']:
        url = 'https://www.boardgamegeek.com/xmlapi2/collection?username=' + \
                   bgg_user_id.strip() + '&'+ game_type + '&stats=1'
        #  Add parameters to the url based on what was passed to this function.
        url = add_options(url, own, preordered, prevowned, fortrade, want, wanttobuy, wanttoplay, wishlist, comment)
        r = requests.get(url)
        if r.status_code == 404:
            pass
        else:
            while r.status_code == 202:   ##  BGG says that it usually queues requests for a collection, so we 
                                      ##  must check for a 202 code, and sleep and try again if necessary.  
                time.sleep(8)
                r = requests.get(url)
            initial_res = re.sub('[\n\t]', '', r.text)
            result.extend(list(BeautifulSoup(initial_res, 'lxml').find_all('item')))
   
    glist = []
    for item in result:
        d = dict()
        d['objectid'] = item.attrs['objectid']
        d['subtype'] = item.attrs['subtype']
        if item.find('yearpublished'):
            d['yearpublished'] = item.find('yearpublished').text
        d['name'] = item.find('name').text
        d.update(item.find("status").attrs)
        d['numplays'] = item.find('numplays').text
        d['lastmodified'] = pd.to_datetime(d['lastmodified'])
        if item.find('rating'):
            d['rating'] = item.find('rating').attrs['value']
        if item.find('comment'):
            d['comment'] = item.find('comment').text
        glist.append(d)
    
    glist = pd.DataFrame(glist, columns=['objectid','subtype','name','yearpublished','own',
                                         'prevowned','fortrade','want','wanttoplay','wanttobuy',
                                         'wishlist','preordered','lastmodified','numplays','rating',
                                         'comment']).set_index('objectid').sort_values('name')
    for column in ['yearpublished', 'own', 'prevowned', 'fortrade', 'want', 'wanttoplay', 
                   'wanttobuy', 'wishlist', 'preordered', 'numplays']:
        glist[column].fillna(-1, inplace=True)
        glist[column] = glist[column].astype(np.int32)
    
    return glist

In [70]:
c = get_collection('craw-daddy')

In [71]:
c.head()

Unnamed: 0_level_0,subtype,name,yearpublished,own,prevowned,fortrade,want,wanttoplay,wanttobuy,wishlist,preordered,lastmodified,numplays,rating,comment
objectid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
122711,boardgame,"""La Garde recule!""",2011,1,0,1,0,0,0,0,0,2015-01-15 11:02:06,0,,
8257,boardgameexpansion,&Cetera,2013,1,0,0,0,0,0,0,0,2015-01-12 01:52:06,0,,
153999,boardgame,"...and then, we held hands.",2015,1,0,0,0,0,0,0,0,2015-10-21 14:41:59,4,,
27236,boardgameexpansion,.45 Adventure: Crimefighting Action in the Pul...,2006,1,0,0,0,0,0,0,0,2015-01-12 01:53:04,0,,
155122,boardgame,"1066, Tears To Many Mothers",2018,1,0,0,0,0,0,0,0,2018-11-30 21:02:41,0,,


In [72]:
len(c)

1532

In [74]:
c[c['wishlist'] == 1]

Unnamed: 0_level_0,subtype,name,yearpublished,own,prevowned,fortrade,want,wanttoplay,wanttobuy,wishlist,preordered,lastmodified,numplays,rating,comment
objectid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
3097,boardgame,1849: The Game of Sicilian Railways,1998,0,0,0,0,1,0,1,0,2019-10-20 22:34:45,0,,
23540,boardgame,1889: History of Shikoku Railways,2004,0,0,0,0,1,0,1,0,2018-11-21 12:06:08,1,,
202617,boardgame,18CLE,2016,0,0,0,0,1,0,1,0,2019-01-01 00:04:48,0,,
176588,boardgame,A Glorious Chance: The Naval Struggle for Lake...,2020,0,0,0,0,0,0,1,0,2019-10-21 09:46:34,0,,
60029,boardgame,A Week In Hell: The Battle of Hue,2010,0,0,0,0,1,0,1,0,2018-11-21 12:43:07,0,,
260710,boardgame,Amul,2019,0,0,0,0,0,0,1,0,2019-11-16 22:18:18,0,,
4616,boardgame,Arimaa,2002,0,0,0,0,1,0,1,0,2013-11-07 10:36:58,0,,
245456,boardgame,Attack of the 50 Foot Colossi,2018,0,0,0,0,0,0,1,0,2018-11-21 12:32:14,0,,
231218,boardgame,Black Sonata,2017,0,0,0,0,0,1,1,0,2019-11-16 18:13:44,0,,
197376,boardgame,Charterstone,2017,0,0,0,0,0,0,1,0,2018-11-12 17:40:47,0,,


In [73]:
len(c[c['wishlist'] == 1])

52

In [27]:
hopalong = get_collection('Hopalong')

In [41]:
len(hopalong[hopalong['own'] == 1])

3997

In [8]:
helixx = get_collection('Helixx')

In [54]:
len(helixx[helixx['wishlist'] == 1])

26

In [167]:
##  Retrieve all of the boardgame categories used by BGG for classification.

def get_BGG_categories():
    page = requests.get('https://boardgamegeek.com/browse/boardgamecategory')
    soup = BeautifulSoup(page.text)
    result = []
    for item in soup.findAll('td'):
        anchor = item.find('a')
        if anchor is not None:
            value = anchor.attrs['href'].split('/')[2]
            category = anchor.text
            result.append([value, category])

    return pd.DataFrame(result, columns=['id','category']).set_index('id')

In [171]:
boardGameCategories = get_BGG_categories()

with open('data/boardGameCategories.dill', 'wb') as f:
      dill.dump(boardGameCategories, f)

In [149]:
##  Retrieve all of the boardgame mechanisms used by BGG for classification.

def get_BGG_mechanisms():
    mechs = []
    page = requests.get('https://boardgamegeek.com/browse/boardgamemechanic')
    soup = BeautifulSoup(re.sub('[\t\n]', '', page.text))
    for item in soup.findAll('td'):
        anchor = item.find('a')
        if anchor:
            c = anchor.attrs['href'].split('/')[2]
            m = anchor.text
            mechs.append((c,m))
    return pd.DataFrame(mechs, columns=['id', 'mechanism']).set_index('id')

In [150]:
boardGameMechanisms = get_BGG_mechanisms()

with open('data/boardGameMechanisms.dill', 'wb') as f:
    dill.dump(boardGameMechanisms, f)

In [151]:
boardGameMechanisms

Unnamed: 0_level_0,mechanism
id,Unnamed: 1_level_1
2073,Acting
2838,Action Drafting
2001,Action Points
2689,Action Queue
2839,Action Retrieval
...,...
2017,Voting
2082,Worker Placement
2935,Worker Placement with Dice Workers
2933,"Worker Placement, Different Worker Types"


In [174]:
boardGameMechanisms[boardGameMechanisms['mechanism'].str.contains('Auction')]

Unnamed: 0_level_0,mechanism
id,Unnamed: 1_level_1
2012,Auction/Bidding
2930,Auction: Dexterity
2924,Auction: Dutch
2932,Auction: Dutch Priority
2918,Auction: English
2931,Auction: Fixed Placement
2923,Auction: Once Around
2920,Auction: Sealed Bid
2919,Auction: Turn Order Until Pass
2928,Closed Economy Auction
