In [1]:
import re
import time

import pandas as pd
import numpy as np

import requests
from bs4 import BeautifulSoup
import lxml

import dill

In [88]:
def get_thing(id, **args):
    '''A "thing" is BGG's designation for a physical item, such as a board game,
       expansion, board game accessory, etc.  The "id" supplied can have several numbers
       separated by commas to retrieve more than one item at a time.
    
       For more information see: https://boardgamegeek.com/wiki/page/BGG_XML_API2#
       
       **args can supply an arbitrary collection of options (in the form of paramaters like key=value) 
       that will be appended into the query string, where these pairs will be turned into strings 
       like "key=value" and added to the query string (preceded, of course, by an ampersand to make 
       it a separate element of the URL query string).  
       
       Returns:  A string for the "thing".  The only processing done is to remove the newline and
       tab characters from the string.  
    '''
    
    url = 'https://www.boardgamegeek.com/xmlapi2/thing?id=' + str(id).strip()
    for (k,v) in args.items():   #  Add the arbitrary (key,value) pairs passed to the query string.
        url += '&' + str(k) + '=' + str(v)
        
    r = requests.get(url)
    if r.status_code == 404:
        return None
    while r.status_code == 202:
        time.sleep(6)
        r = requests.get(url)
    return re.sub('[\n\t]', '', r.text)

def add_options(url, own=None, preordered=None, prevowned=None, fortrade=None, want=None, 
                wanttobuy=None, wanttoplay=None, wishlist=None, comment=None):
    '''A "utility" type of function to add elements to the query string.  We assume that the
       parameters are {0,1} integer values (if they are not "None").  Note that we will quietly 
       skip over these parameters if they are not 0 or 1, treating them implicitly as "None" values.  
       
       Returns:  The url with the additional options added as 'key=value' parameters to the url.  
    '''
    if own in [0,1]:
        url += '&own=' + str(own)
    if prevowned in [0,1]:
        url += '&prevowned=' + str(prevowned)
    if preordered in [0,1]:
        url += '&preordered=' + str(preordered)
    if fortrade in [0,1]:
        url += '&fortrade=' + str(fortrade)
    if want in [0,1]:
        url += '&want=' + str(want)
    if wishlist in [0,1]:
        url += '&wishlist=' + str(wishlist)
    if wanttobuy in [0,1]:
        url += '&wanttobuy=' + str(wanttobuy)
    if wanttoplay in [0,1]:
        url += '&wanttoplay=' + str(wanttoplay)
    if comment in [0,1]:
        url += '&comment=' + str(comment)
    return url

def get_collection(bgg_user_id, own=None, preordered=None, prevowned=None, fortrade=None, want=None, 
                   wanttobuy=None, wanttoplay=None, wishlist=None, comment=None):
    '''For more information see:  https://boardgamegeek.com/wiki/page/BGG_XML_API2

       Get the board games, and then get the board game expansions.  This is a quirk of the 
       BGG xmlapi2 interface, in that it will incorrectly return the expansions as subtype="boardgame",
       so we make two calls to get the boardgames, and then the expansions separately.
       
       Returns:  A pandas DataFrame with the designated boardgames in the user's collection, with columns
       containing information about the games such as the user rating, number of plays, etc.  
    '''
    url = 'https://www.boardgamegeek.com/xmlapi2/collection?username=' + \
              bgg_user_id.strip() + '&excludesubtype=boardgameexpansion&stats=1'
    #  Add parameters to the url based on what was passed to this function.
    url = add_options(url, own, preordered, prevowned, fortrade, want, wanttobuy, wanttoplay, wishlist, comment)
    r = requests.get(url)
    if r.status_code == 404:
        return None
    while r.status_code == 202:   ##  BGG says that it usually queues requests for a collection, so we 
                                  ##  must check for a 202 code, and sleep and try again if necessary.  
        time.sleep(8)
        r = requests.get(url)
    first_result = re.sub('[\n\t]', '', r.text)
    first_result = list(BeautifulSoup(first_result, 'lxml').find_all('item'))

    url = 'https://www.boardgamegeek.com/xmlapi2/collection?username=' + bgg_user_id.strip() + \
              '&subtype=boardgameexpansion&stats=1'
    #  Add parameters to the url based on what was passed to this function.
    url = add_options(url, own, preordered, prevowned, fortrade, want, wanttobuy, wanttoplay, wishlist, comment)
    r = requests.get(url)
    while r.status_code == 202:   
        time.sleep(8)
        r = requests.get(url)
    second_result = re.sub('[\n\t]', '', r.text)
    second_result = list(BeautifulSoup(second_result, 'lxml').find_all('item'))
    
    glist = []
    for item in first_result + second_result:
        d = dict()
        d['objectid'] = item.attrs['objectid']
        d['subtype'] = item.attrs['subtype']
        if item.find('yearpublished'):
            d['yearpublished'] = item.find('yearpublished').text
        d['name'] = item.find('name').text
        d.update(item.find("status").attrs)
        d['numplays'] = item.find('numplays').text
        d['lastmodified'] = pd.to_datetime(d['lastmodified'])
        if item.find('rating'):
            d['rating'] = item.find('rating').attrs['value']
        if item.find('comment'):
            d['comment'] = item.find('comment').text
        glist.append(d)
    
    glist = pd.DataFrame(glist, columns=['objectid','subtype','name','yearpublished','own',
                                         'prevowned','fortrade','want','wanttoplay','wanttobuy',
                                         'wishlist','preordered','lastmodified','numplays','rating',
                                         'comment'])
    for column in ['objectid', 'yearpublished', 'own', 'prevowned', 'fortrade', 'want', 'wanttoplay', 
                   'wanttobuy', 'wishlist', 'preordered', 'numplays']:
        glist[column].fillna(-1, inplace=True)
        glist[column] = glist[column].astype(np.int32)
    #glist['lastmodified'] = glist['lastmodified'].to_datetime()
    
    return glist

In [90]:
c = get_collection('craw-daddy', prevowned=0, comment=1)

In [91]:
c

Unnamed: 0,objectid,subtype,name,yearpublished,own,prevowned,fortrade,want,wanttoplay,wanttobuy,wishlist,preordered,lastmodified,numplays,rating,comment
0,32224,boardgame,4th Corner,2007,0,0,0,0,0,0,0,0,2009-02-15 16:01:54,2,2,I would rather sit in the corner than play thi...
1,31260,boardgame,Agricola,2007,1,0,0,0,0,0,0,0,2016-02-29 10:44:49,23,7,While I obviously still haven't figured out th...
2,40760,boardgame,Alea Iacta Est,2009,1,0,0,0,0,0,0,0,2015-01-19 04:12:10,11,7,Interesting game that mitigates the luck of di...
3,6249,boardgame,Alhambra,2003,0,0,0,0,0,0,0,0,2010-12-13 16:38:23,6,6.5,Decent enough game. Will play it if it's sugg...
4,48726,boardgame,Alien Frontiers,2010,1,0,0,0,0,0,0,0,2012-01-02 10:25:15,13,6.5,"Hmmm, still trying to figure out if I like thi..."
5,5404,boardgame,Amun-Re,2003,1,0,0,0,0,0,0,0,2015-01-19 04:12:59,6,7.5,Probably my favorite game for five players. A...
6,13122,boardgame,Antiquity,2006,1,0,0,0,0,0,0,0,2015-01-19 04:14:16,2,7.5,A game (as one person described it) that activ...
7,179956,boardgame,The Arabian Pots,2015,1,0,1,0,0,0,0,0,2018-03-02 08:57:17,2,5,"An interesting idea, but doesn't really offer ..."
8,494,boardgame,Ave Caesar,1989,1,0,0,0,0,0,0,0,2012-01-02 10:26:30,20,7.5,"Great game, if played in the right spirit (and..."
9,230802,boardgame,Azul,2017,1,0,0,0,0,0,0,0,2018-03-02 08:58:13,8,7.4,Decent abstract game. Nice to pull out from t...


In [6]:
c.dtypes

objectid                  int32
subtype                  object
name                     object
yearpublished             int32
own                       int32
prevowned                 int32
fortrade                  int32
want                      int32
wanttoplay                int32
wanttobuy                 int32
wishlist                  int32
preordered                int32
lastmodified     datetime64[ns]
numplays                  int32
rating                   object
comment                  object
dtype: object

In [7]:
c[c['lastmodified'].isna()]

Unnamed: 0,objectid,subtype,name,yearpublished,own,prevowned,fortrade,want,wanttoplay,wanttobuy,wishlist,preordered,lastmodified,numplays,rating,comment


In [8]:
c[c['wishlist'] == 1]

Unnamed: 0,objectid,subtype,name,yearpublished,own,prevowned,fortrade,want,wanttoplay,wanttobuy,wishlist,preordered,lastmodified,numplays,rating,comment
50,4616,boardgame,Arimaa,2002,0,0,0,0,1,0,1,0,2013-11-07 10:36:58,0,,
56,245456,boardgame,Attack of the 50 Foot Colossi,2018,0,0,0,0,0,0,1,0,2018-11-12 19:36:38,0,,
59,231581,boardgame,AuZtralia,2018,0,0,0,0,0,0,1,0,2018-11-12 19:32:08,0,,
133,197376,boardgame,Charterstone,2017,0,0,0,0,0,0,1,0,2018-11-12 17:40:47,0,,
169,172558,boardgame,Crashland,2015,0,0,0,0,1,0,1,0,2016-11-01 17:38:10,0,,
184,192802,boardgame,Days of Ire: Budapest 1956,2016,0,0,0,0,0,0,1,0,2016-11-01 17:26:30,0,,
215,156180,boardgame,Eggs and Empires,2014,0,0,0,0,1,0,1,0,2016-11-14 11:44:50,0,,
225,188390,boardgame,Enemy Coast Ahead: The Doolittle Raid,-1,0,0,0,0,1,0,1,0,2016-11-05 11:55:14,0,,
251,257733,boardgame,Fine Sand,2018,0,0,0,0,0,0,1,0,2018-10-27 10:46:19,0,,
260,176262,boardgame,Fleet Wharfside,2015,0,0,0,0,0,0,1,0,2015-11-07 16:04:33,0,,


In [9]:
c = get_collection('Hopalong')

In [10]:
c

Unnamed: 0,objectid,subtype,name,yearpublished,own,prevowned,fortrade,want,wanttoplay,wanttobuy,wishlist,preordered,lastmodified,numplays,rating,comment
0,8257,boardgameexpansion,& Cetera,2003,1,0,0,0,0,0,0,0,2016-08-27 15:02:45,0,,Unplayed
1,40688,boardgameexpansion,10 Jahre alea - Schatzkiste,2009,1,0,0,0,0,0,0,0,2016-08-27 15:11:10,0,,
2,83368,boardgameexpansion,1825 Development Kit D1: Additional Tiles,2010,1,0,0,0,0,0,0,0,2016-08-27 15:29:53,0,,
3,17466,boardgameexpansion,1825 Extension Kit K1: Supplementary Tiles,1999,0,0,0,0,0,0,1,0,2011-01-25 13:45:08,0,,Replaced by D1
4,17467,boardgameexpansion,1825 Extension Kit K2: Advanced Trains,1999,1,0,0,0,0,0,0,0,2016-08-27 15:32:10,0,,
5,17468,boardgameexpansion,1825 Extension Kit K3: Phase Four,1999,0,0,0,0,0,1,1,0,2016-08-27 15:33:35,0,,
6,17469,boardgameexpansion,1825 Extension Kit K5: Minor Companies for Unit 2,2001,1,0,0,0,0,0,0,0,2016-08-27 15:34:34,0,,
7,17470,boardgameexpansion,1825 Extension Kit K6: Advanced Tiles,2004,0,0,0,0,0,0,1,0,2011-01-25 13:45:45,0,,Replaced by D1
8,17471,boardgameexpansion,"1825 Extension Kit K7: London, Tilbury and Sou...",1999,1,0,0,0,0,0,0,0,2016-08-27 15:35:14,0,,
9,17398,boardgameexpansion,1825 Regional Kit R1: Wales,1997,1,0,0,0,0,0,0,0,2016-08-27 15:36:02,0,,


In [11]:
c[c['wishlist'] == 1]

Unnamed: 0,objectid,subtype,name,yearpublished,own,prevowned,fortrade,want,wanttoplay,wanttobuy,wishlist,preordered,lastmodified,numplays,rating,comment
3,17466,boardgameexpansion,1825 Extension Kit K1: Supplementary Tiles,1999,0,0,0,0,0,0,1,0,2011-01-25 13:45:08,0,,Replaced by D1
5,17468,boardgameexpansion,1825 Extension Kit K3: Phase Four,1999,0,0,0,0,0,1,1,0,2016-08-27 15:33:35,0,,
7,17470,boardgameexpansion,1825 Extension Kit K6: Advanced Tiles,2004,0,0,0,0,0,0,1,0,2011-01-25 13:45:45,0,,Replaced by D1
12,184524,boardgameexpansion,1867: The Railways of Canada,2015,0,0,0,0,0,0,1,0,2015-09-25 06:49:18,0,,
20,167903,boardgameexpansion,20 Jahre Darmstadt Spielt,2014,0,0,0,0,0,0,1,0,2018-10-23 15:07:26,0,,
23,247315,boardgameexpansion,7 Wonders: Armada,2018,0,0,0,0,0,0,1,0,2018-08-28 11:14:13,0,,
58,23189,boardgameexpansion,Age of Steam Expansion: 1830's Pennsylvania / ...,2006,0,0,0,0,0,0,1,0,2018-10-23 15:20:32,0,,
103,124787,boardgameexpansion,Alcatraz: The Scapegoat – Maximum Security,2012,0,0,0,0,0,0,1,0,2016-10-15 10:18:30,0,,
199,226690,boardgameexpansion,Arkham Horror: The Card Game – Marie Lambeau P...,2016,0,0,0,0,0,0,1,0,2017-09-25 13:16:00,0,,
323,124486,boardgameexpansion,Cadwallon: City of Thieves – The King of Ashes,2012,0,0,0,0,0,0,1,0,2018-10-23 15:06:59,0,,


In [12]:
helixx = get_collection('Helixx')

In [13]:
helixx[helixx['wishlist'] == 1]

Unnamed: 0,objectid,subtype,name,yearpublished,own,prevowned,fortrade,want,wanttoplay,wanttobuy,wishlist,preordered,lastmodified,numplays,rating,comment
3,193867,boardgame,1822: The Railways of Great Britain,2016,0,0,0,0,1,0,1,0,2017-11-17 17:02:21,0,,
9,38343,boardgame,Ad Astra,2009,0,0,0,0,0,0,1,0,2015-11-01 11:23:42,0,,
34,205346,boardgame,Beer Empire,2016,0,0,0,0,1,0,1,0,2016-11-01 10:34:39,0,,
53,172081,boardgame,Burgle Bros.,2015,0,0,0,0,0,0,1,0,2018-11-12 08:44:11,3,,
60,245934,boardgame,Carpe Diem,2018,0,0,0,0,0,0,1,0,2018-11-12 08:25:09,1,,
63,102794,boardgame,Caverna: The Cave Farmers,2013,0,0,0,0,0,0,1,0,2018-11-12 08:25:26,7,,
65,248900,boardgame,Ceylon,2018,0,0,0,0,0,0,1,0,2018-11-12 08:42:06,0,,
75,245638,boardgame,Coimbra,2018,0,0,0,0,0,0,1,0,2018-11-12 08:45:10,0,,
117,249381,boardgame,The Estates,2018,0,0,0,0,0,0,1,0,2018-11-12 11:38:23,1,,
121,183284,boardgame,Factory Funner,2016,0,0,0,0,0,0,1,0,2018-11-12 08:40:12,2,,


In [94]:
##  Retrieve all of the boardgame categories used by BGG for classification.

page = requests.get('https://boardgamegeek.com/advsearch/boardgame')
soup = BeautifulSoup(page.text,"lxml")
soup.get("boardgamecategory")
gc = soup.find("div",{"id":"boardgamecategory"})
catValues = gc.findAll("input",{"name":"propertyids[]"})
catValues = [int(x.attrs['value']) for x in catValues]
gc.findAll("td",{"width":"70%"})
cats = [x.text for x in gc.findAll("td",{"width":"70%"})]

l = list(zip(catValues, cats))

boardGameCategory = pd.DataFrame(l,columns=['id','category'])

boardGameCategory.set_index('id', inplace=True)

In [93]:
boardGameCategory.dtypes

id           int64
category    object
dtype: object

In [95]:
boardGameCategory

Unnamed: 0_level_0,category
id,Unnamed: 1_level_1
1009,Abstract Strategy
1032,Action / Dexterity
1022,Adventure
2726,Age of Reason
1048,American Civil War
1108,American Indian Wars
1075,American Revolutionary War
1055,American West
1050,Ancient
1089,Animals


In [19]:
with open('data/boardGameCategory.dill','wb') as f:
    dill.dump(boardGameCategory, f)

In [21]:
#  Get the mechanic categories used by BGG for describing games' mechanisms of play.
gm = soup.find("div",{'id':'boardgamemechanic'})
gm.findAll('input',{'name':'propertyids[]'})
gm_values=[int(x.attrs['value']) for x in gm.findAll('input',{'name':'propertyids[]'})]
gm_strings=[x.text for x in gm.findAll('td',{'width':'70%'})]

l = list(zip(gm_values, gm_strings))

boardGameMechanic = pd.DataFrame(l, columns=['id','mechanic'])

boardGameMechanic.set_index('id', inplace=True)

In [22]:
boardGameMechanic

Unnamed: 0_level_0,mechanic
id,Unnamed: 1_level_1
2073,Acting
2689,Action / Movement Programming
2001,Action Point Allowance System
2080,Area Control / Area Influence
2043,Area Enclosure
2046,Area Movement
2021,Area-Impulse
2012,Auction/Bidding
2014,Betting/Wagering
2018,Campaign / Battle Card Driven


In [23]:
with open('data/boardGameMechanic.dill','wb') as f:
    dill.dump(boardGameMechanic,f)

In [24]:
i = get_thing(1)

In [25]:
type(i)

str

In [26]:
url = 'https://www.boardgamegeek.com/xmlapi2/thing?id=1'
r = requests.get(url)

In [27]:
type(r)

requests.models.Response

In [28]:
result = BeautifulSoup(r.text, "lxml")

In [29]:
result

<?xml version="1.0" encoding="utf-8"?><html><body><items termsofuse="https://boardgamegeek.com/xmlapi/termsofuse"><item id="1" type="boardgame">
<thumbnail>https://cf.geekdo-images.com/thumb/img/RgXAhOreEqPeNiPpDPEUTwLm5Wk=/fit-in/200x150/pic159509.jpg</thumbnail>
<image>https://cf.geekdo-images.com/original/img/vOttDcPBg1Tas9F6vFDhRmVaNH8=/0x0/pic159509.jpg</image>
<name sortindex="5" type="primary" value="Die Macher"></name>
<description>Die Macher is a game about seven sequential political races in different regions of Germany. Players are in charge of national political parties, and must manage limited resources to help their party to victory. The winning party will have the most victory points after all the regional elections. There are four different ways of scoring victory points. First, each regional election can supply one to eighty victory points, depending on the size of the region and how well your party does in it. Second, if a party wins a regional election and has some m

In [30]:
result.find("item").attrs['id']

'1'

In [31]:
result.find("name").attrs['value']

'Die Macher'

In [32]:
result.find("description").text

'Die Macher is a game about seven sequential political races in different regions of Germany. Players are in charge of national political parties, and must manage limited resources to help their party to victory. The winning party will have the most victory points after all the regional elections. There are four different ways of scoring victory points. First, each regional election can supply one to eighty victory points, depending on the size of the region and how well your party does in it. Second, if a party wins a regional election and has some media influence in the region, then the party will receive some media-control victory points. Third, each party has a national party membership which will grow as the game progresses and this will supply a fair number of victory points. Lastly, parties score some victory points if their party platform matches the national opinions at the end of the game.&#10;&#10;The 1986 edition featured four parties from the old West Germany and supported

In [33]:
result.find("yearpublished").attrs['value']

'1986'

In [34]:
result.find("minplayers").attrs['value']

'3'

In [35]:
result.find("maxplayers").attrs['value']

'5'

In [36]:
#  Board game categories  
catDict = {link.attrs['value'] : 1 for link in result.findAll("link", {"type":"boardgamecategory"})}
catDict

{'Economic': 1, 'Negotiation': 1, 'Political': 1}

In [37]:
#  Mechanics in the game
mechDict = {link.attrs['value'] : 1 for link in result.findAll('link', {'type':'boardgamemechanic'})}
mechDict

{'Area Control / Area Influence': 1,
 'Auction/Bidding': 1,
 'Dice Rolling': 1,
 'Hand Management': 1,
 'Simultaneous Action Selection': 1}

In [38]:
#  "Families" of the game
familyDict = {link.attrs['value'] : 1 for link in result.findAll('link', {'type':'boardgamefamily'})}
familyDict

{'Country: Germany': 1,
 'Political: Elections': 1,
 'Valley Games Classic Line': 1}

In [39]:
int(True)

1

In [82]:
r = get_thing(10,blah=34, stuff='thing')
r = BeautifulSoup(r)

https://www.boardgamegeek.com/xmlapi2/thing?id=10&blah=34&stuff=thing


In [83]:
r.findAll('item')

[<item id="10" type="boardgame"> <thumbnail>https://cf.geekdo-images.com/thumb/img/eqDi6VPAU3XUvDDhqur97X2umO0=/fit-in/200x150/pic1798136.jpg</thumbnail> <image>https://cf.geekdo-images.com/original/img/M3fRcs2sKpffc7Yl1KXodvbBa5Q=/0x0/pic1798136.jpg</image> <name sortindex="1" type="primary" value="Elfenland"></name> <name sortindex="1" type="alternate" value="Elfenland (Волшебное Путешествие)"></name> <description>Elfenland is a redesign of the original White Wind game Elfenroads.  The game is set in the mythical world of the elves.  A group of fledgling elves (the players) are charged with visiting as many of the twenty Elfencities as they can over the course of 4 rounds.  To accomplish the task they will use various forms of transportation such as Giant Pigs, Elfcarts, Unicorns, Rafts, Magic Clouds, Trollwagons, and Dragons.&amp;#10;&amp;#10;Gameplay:  Players begin in the Elf capitol, draw one face down movement tile, and are dealt eight transport cards and a secret 'home' city ca

In [59]:
type(r)

bs4.BeautifulSoup