In [1]:
import re
import time

import pandas as pd
import numpy as np

import requests
from bs4 import BeautifulSoup
import lxml

import dill
import glob
import os

from datetime import datetime, timedelta

#  Filter out some annoying warnings from the latest version of BeautifulSoup
import warnings
from bs4.builder import XMLParsedAsHTMLWarning
warnings.filterwarnings('ignore', category=XMLParsedAsHTMLWarning)

In [2]:
BASE_API = 'https://www.boardgamegeek.com/xmlapi2'

USER_DATA = 'USERS'
GEEKBUDDIES_DATA = 'GEEKBUDDIES'

In [3]:
def get_collection(bggUserName, cutoff=timedelta(days=7)):
    '''For more information see:  https://boardgamegeek.com/wiki/page/BGG_XML_API2

       Get the board games, and then get the board game 
       expansions.  This is a quirk of the BGG xmlapi2 interface, 
       in that it will incorrectly return the expansions as 
       subtype="boardgame", so we make two calls to get the 
       boardgames, and then the expansions separately.
       
       Returns:  A pandas DataFrame with the designated boardgames 
       in the user's collection, with columns containing 
       information about the games such as the user rating, 
       number of plays, etc.  
       
       Note:  In an effort to reduce traffic, we will check
       if we have previously retrieved the collections within
       the previous week.  If so, we just load and
       return that information, otherwise we will download the
       collection.  
    '''
    bggUserName = bggUserName.strip()
    
    #  Check:  Do we have a previous version of this
    #  collection that was retrieved in the last 7 days (by default)? 
    #  If so, we use that.  Otherwise we get the collection
    #  information from BGG.
    files_to_check = glob.glob(f'{USER_DATA}/{bggUserName}-*.*')
    if files_to_check and (cutoff is not None):
        name = files_to_check[0]
        file_time_stamp = datetime(int(name[-18:-14]), int(name[-14:-12]), 
                            int(name[-12:-10]), int(name[-9:-7]), 
                            int(name[-7:-5]))
        now = datetime.now()
        if (now - file_time_stamp) <= cutoff:
            with open(name, 'rb') as f:
                glist = dill.load(f)
                return glist
    
    result = []
    for game_type in ['excludesubtype=boardgameexpansion', 
                      'subtype=boardgameexpansion']:
        url = f'{BASE_API}/collection?username={bggUserName.strip()}&{game_type}&stats=1'

        r = requests.get(url)
        if r.status_code == 404:
            return 'Page not found'
        else:
            while r.status_code == 202:   ##  BGG says that it usually 
                            ## queues requests for a collection, so we 
                            ## must check for a 202 code, and sleep 
                            ## and try again if necessary.  
                time.sleep(12)
                r = requests.get(url)
            initial_res = re.sub('[\n\t]', '', r.text)
            #  Check if there was an error from BGG, such as 
            #  an invalid username.  Return the error message if found.  
            error = BeautifulSoup(initial_res, 'lxml').find('error')
            if error:
                return error.text
            result.extend(list(BeautifulSoup(initial_res, 'lxml').find_all('item')))
    
    ##  Handle a special case where someone has not logged their collection, in
    ##  order to avoid certain errors.  
    if len(result) == 0:
        with open(f'{USER_DATA}/______no_collection.dill', 'rb') as f:
            glist = dill.load(f)
        
        now = datetime.strftime(datetime.now(), '%Y%m%d-%H%M')
        with open(f'{USER_DATA}/{bggUserName}-{now}.dill', 'wb') as f:
            dill.dump(glist, f)
        
        return glist
        
    glist = []
    for item in result:
        d = dict()
        d['id'] = int(item.attrs['objectid'])
        d['name'] = item.find('name').text
        d['subtype'] = item.attrs['subtype']
        if item.find('yearpublished'):
            d['yearpublished'] = int(item.find('yearpublished').text)

        d.update(item.find("status").attrs)
        d['numplays'] = int(item.find('numplays').text)
        d['lastmodified'] = pd.to_datetime(d['lastmodified'])
        if item.find('rating'):
            d['rating'] = item.find('rating').attrs['value']
            if d['rating'] == 'N/A':
                d['rating'] = np.nan
            else:
                d['rating'] = float(d['rating'])
        else:
            d['rating'] = np.nan
        if item.find('wishlistpriority'):
            d['wishlistpriority'] = item.find('wishlistpriority').text
        else:
            d['wishlistpriority'] = np.nan
        if item.find('comment'):
            d['comment'] = item.find('comment').text
        else:
            d['comment'] = np.nan
        d['username'] = bggUserName
        glist.append(d)
    
    glist = pd.DataFrame(glist).set_index('id').sort_values('name')
    glist = glist[['name', 'subtype', 'yearpublished', 'own', 'prevowned', 'fortrade',
       'want', 'wanttoplay', 'wanttobuy', 'wishlist', 'preordered',
       'lastmodified', 'rating', 'numplays', 'wishlistpriority',
       'comment', 'username']]
    
    for column in ['yearpublished', 'own', 'prevowned', 
                   'fortrade', 'want', 'wanttoplay', 
                   'wanttobuy', 'wishlist', 'preordered', 
                   'numplays', 'wishlistpriority']:
        glist[column] = glist[column].fillna(-1).astype(np.int32)
    
    #  Let's save the collection once we have it 
    #  First remove any previous versions for this user
    files_to_delete = glob.glob(f'{USER_DATA}/{bggUserName}-*.dill')
    for f in files_to_delete:
        os.remove(f)
        
    now = datetime.strftime(datetime.now(), '%Y%m%d-%H%M')
    with open(f'{USER_DATA}/{bggUserName}-{now}.dill', 'wb') as f:
        dill.dump(glist, f)
        
    return glist

In [4]:
class User():
    '''A class to denote a BGG user, i.e. identified by their 
       BGG username and a method to gather their collection 
       (assuming it's been put onto BGG.  

       Includes a method to get the list of Geekbuddies of 
       a user, and various other methods to filter the collection.
    '''
    def __init__(self, bggUserName, cutoff=timedelta(days=7)):
        self.bggUserName = bggUserName.strip()
        #  Gather the collection of a user
        self.collection = get_collection(self.bggUserName, cutoff)
        if isinstance(self.collection, str):
            raise ValueError(f'{self.collection}')
            
    def __repr__(self):
        return f'BGG User: {self.bggUserName}'
    
    def refresh_collection(self):
        #  Force an immediate "refresh" of the collection information of a user
        self.collection = get_collection(self.bggUserName, cutoff=timedelta(seconds=0))

    def filter(self, subtype=None,
               own=None, prevowned=None, 
               fortrade=None, 
               want=None, wanttoplay=None, wanttobuy=None, wishlist=None, 
               preordered=None, 
               has_rating=None, has_comment=None,
               wishlistpriority=None, 
               yearpublished=None, published_before=None, published_after=None,
               min_numplays=0, max_numplays=None):
        '''A method to filter the collection based on various 
           criteria and return a new DataFrame with the filtered 
           games.  This does not modify the underlying "collection" 
           information of a user. 
        '''
        result = self.collection.copy()
        
        if isinstance(subtype, str) and subtype in ['boardgame', 'boardgameexpansion']:
            result = result[result['subtype'] == subtype].copy()
            
        for option, flag in [('own', own),
                             ('prevowned', prevowned), 
                             ('fortrade', fortrade), 
                             ('want', want), 
                             ('wanttoplay', wanttoplay),
                             ('wanttobuy', wanttobuy), 
                             ('wishlist', wishlist), 
                             ('preordered', preordered)]:
            if isinstance(flag, (int, bool)):
                result = result[result[option] == int(flag)].copy()
        
        for option, flag in [('rating', has_rating),
                             ('comment', has_comment)]:
            if isinstance(flag, (int, bool)):
                if flag:
                    result = result[~result[option].isna()].copy()
                else:
                    result = result[result[option].isna()].copy()

        if isinstance(wishlistpriority, int):
            result = result[result['wishlistpriority'] == wishlistpriority].copy()

        if isinstance(yearpublished, int):
            result = result[result['yearpublished'] == yearpublished].copy()
        if isinstance(published_before, int):
            result = result[result['yearpublished'] < published_before].copy()
        if isinstance(published_after, int):
            result = result[result['yearpublished'] > published_after] .copy()       

        if isinstance(min_numplays, (int, float)):
            result = result[result['numplays'] >= min_numplays].copy()
        if isinstance(max_numplays, (int, float)):
            result = result[result['numplays'] <= max_numplays].copy()
            
        return result.copy()
    
    def own(self):
        return self.filter(own=True)
    
    def prevowned(self):
        return self.filter(prevowned=True)
    
    def fortrade(self):
        return self.filter(fortrade=True)
    
    def want(self):
        return self.filter(want=True)
    
    def wanttoplay(self):
        return self.filter(wanttoplay=True)
    
    def wanttobuy(self):
        return self.filter(wanttobuy=True)
    
    def wishlist(self):
        return self.filter(wishlist=True)
    
    def preordered(self):
        return self.filter(preordered=True)

    def has_rating(self):
        return self.filter(has_rating=True)
    
    def has_comment(self):
        return self.filter(has_comment=True)
    
    def base(self):
        return self.filter(subtype='boardgame')
    
    def expansion(self):
        return self.filter(subtype='boardgameexpansion')
    
    def geekbuddies(self, cutoff=timedelta(days=7)):
        '''Get the list of Geekbuddies of a user.  This 
           assumes that the user has at most 1000 Geekbuddies 
           (which is the current maximum number returned 
           from the API call, without additional pagination, 
           which I am not doing here).
        '''

        #  Check to see if this has been retrieved recently, and, if so,
        #  just load and return that data.  
        files_to_check = glob.glob(f'{GEEKBUDDIES_DATA}/{self.bggUserName}-*.*')
        if files_to_check and (cutoff is not None):
            name = files_to_check[0]
            file_time_stamp = datetime(int(name[-18:-14]), int(name[-14:-12]), 
                                int(name[-12:-10]), int(name[-9:-7]), 
                                int(name[-7:-5]))
            now = datetime.now()
            if (now - file_time_stamp) <= cutoff:
                with open(name, 'rb') as f:
                    buddies = dill.load(f)
                return buddies

        #  Otherwise, make the call to retrieve and store this information.
        url = f'{BASE_API}/users?name={self.bggUserName}&buddies=1'
        result = requests.get(url)
        error = BeautifulSoup(result.text, 'lxml').find('error')
        if error:
            return f'{error.text}'
        
        buddies = [(item.get('name'), item.get('id')) for item in BeautifulSoup(result.text).find_all('buddy')]

        #  Let's save the list of geekbuddies once we have it 
        #  First remove any previous versions for this user
        files_to_delete = glob.glob(f'{GEEKBUDDIES_DATA}/{self.bggUserName}-*.dill')
        for f in files_to_delete:
            os.remove(f)
        
        now = datetime.strftime(datetime.now(), '%Y%m%d-%H%M')
        with open(f'{GEEKBUDDIES_DATA}/{self.bggUserName}-{now}.dill', 'wb') as f:
            dill.dump(buddies, f)
            
        return buddies

In [5]:
crawdaddy = User('craw-daddy')

In [6]:
crawdaddy.refresh_collection()

In [7]:
crawdaddy.geekbuddies()

[('Aelf', '35433'),
 ('alexmunky', '259149'),
 ('Angband', '250258'),
 ('Ben_Tyrer', '68434'),
 ('Blitzburgh', '1391336'),
 ('cathal', '93409'),
 ('chooi', '17083'),
 ('cree1978', '165662'),
 ('gameslore', '22899'),
 ('ghazghkull', '306324'),
 ('Helixx', '243425'),
 ('here comes bod', '369413'),
 ('Hopalong', '298'),
 ('jmoore032000', '315188'),
 ('Jugular', '13209'),
 ('kergenramirez', '334879'),
 ('Leocrusher', '434697'),
 ('McGran', '261104'),
 ('Mr Pink', '148447'),
 ('PaulGrogan', '1685'),
 ('Polydor', '131595'),
 ('Sapo', '164378'),
 ('stargate', '92653'),
 ('stejames', '143504'),
 ('tonyboydell', '7766'),
 ('VALIS13', '315336'),
 ('W Eric Martin', '15734'),
 ('Zanfus', '388555')]

In [8]:
crawdaddy.collection.dtypes

name                        object
subtype                     object
yearpublished                int32
own                          int32
prevowned                    int32
fortrade                     int32
want                         int32
wanttoplay                   int32
wanttobuy                    int32
wishlist                     int32
preordered                   int32
lastmodified        datetime64[ns]
rating                     float64
numplays                     int32
wishlistpriority             int32
comment                     object
username                    object
dtype: object

In [9]:
crawdaddy.filter(subtype='boardgame', yearpublished=2022, own=True)

Unnamed: 0_level_0,name,subtype,yearpublished,own,prevowned,fortrade,want,wanttoplay,wanttobuy,wishlist,preordered,lastmodified,rating,numplays,wishlistpriority,comment,username
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
357485,18Svea,boardgame,2022,1,0,0,0,0,0,0,0,2023-06-16 15:56:37,,1,-1,,craw-daddy
360471,Aquamarine,boardgame,2022,1,0,0,0,0,0,0,0,2023-06-14 14:58:03,7.0,3,-1,Fairly light but enjoyable roll-and-write. Go...,craw-daddy
342942,Ark Nova,boardgame,2022,1,0,0,0,0,0,0,0,2022-05-16 14:14:46,8.0,12,-1,All plays so far have been with only two playe...,craw-daddy
255360,Bargain Basement Bathysphere,boardgame,2022,1,0,0,0,0,0,0,0,2023-02-21 16:54:36,,1,-1,,craw-daddy
339532,Battlecrest: Fellwoods Base Game,boardgame,2022,1,0,0,0,0,0,0,0,2022-10-01 22:05:33,,0,-1,,craw-daddy
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
350759,Vault: Premium Artwork,boardgame,2022,1,0,0,0,0,0,0,0,2022-08-09 12:48:52,,0,-1,,craw-daddy
364797,Watch Out! That's a Dracula!,boardgame,2022,1,0,0,0,0,0,0,0,2023-02-23 20:42:34,,0,-1,,craw-daddy
237179,Weather Machine,boardgame,2022,1,0,0,0,0,0,0,0,2022-10-29 20:21:16,,3,-1,,craw-daddy
371636,Wildtails: A Pirate Legacy,boardgame,2022,1,0,0,0,0,0,0,0,2023-02-23 20:42:45,,0,-1,,craw-daddy


In [10]:
crawdaddy.collection.columns

Index(['name', 'subtype', 'yearpublished', 'own', 'prevowned', 'fortrade',
       'want', 'wanttoplay', 'wanttobuy', 'wishlist', 'preordered',
       'lastmodified', 'rating', 'numplays', 'wishlistpriority', 'comment',
       'username'],
      dtype='object')

In [None]:
users = ['russ']

level = 0
while level <= 2 and users:
    print(f'----------  {level}  ---------')
    new_users = []
    for u in users[::-1]:
        print(u)
        person = User(u)
        buddies = person.geekbuddies()
        new_users.extend([i[0] for i in buddies])
        time.sleep(12)
        
    users = new_users
    level += 1

----------  0  ---------
russ
----------  1  ---------
Zielu
zboat
zara2stra
yid0
