In [69]:
import requests
import re
import xmltodict
from bs4 import BeautifulSoup

response = requests.get("https://boardgamegeek.com/browse/boardgame/page/1")
soup = BeautifulSoup(response.content, 'html.parser')
table = soup.find('table', {'class': 'collection_table'})
rows = table.find_all('tr')[1:]

board_games = []
for row in rows:
    columns = row.find_all('td')
    if len(columns) < 6:
        continue
    
    ## Get the information in the summary table
    rank = int(columns[0].text.strip())

    image_html_tag = str(columns[1].find("a").find("img"))
    id_html_tag = str(columns[1].find("a"))
    game_id = re.search('/(\d+)/', id_html_tag).group(1)
    image_url = re.search('src="(.*?)"', image_html_tag).group(1)

    name = columns[2].find_all('a')[0].text.strip()
    description = columns[2].find('p', class_='smallefont dull').text.strip()

    geek_rating = columns[3].text.strip()
    avg_rating = columns[4].text.strip()

    num_ratings = columns[5].text.strip()

    ## Call the api to get more detailed information about the game
    response = requests.get("https://www.boardgamegeek.com/xmlapi2/thing?id=224517&stats=1")
    # Convert the XML response to a dictionary
    data_dict = xmltodict.parse(response.content)


    board_game = {'id':game_id, 'rank': rank, 'name': name, 'description': description, 'img_url':image_url}
    board_games.append(board_game)

In [71]:
len(board_games)

100

In [80]:
response = requests.get("https://boardgamegeek.com/browse/boardgame/")
soup = BeautifulSoup(response.content, 'html.parser')
num_pages = int(soup.find('a', title='last page').text.strip("[] "))
num_pages

1443

In [79]:
print(soup.find('a', title='last page').text.strip("[] "))

1443


In [161]:
response = requests.get("https://www.boardgamegeek.com/xmlapi2/thing?id=224517&stats=1")
# Convert the XML response to a dictionary
data_dict = xmltodict.parse(response.content)

In [139]:
int(data_dict['items']['item']['maxplayers']['@value'])

'4'

In [158]:
## Extract the best player counts for this game based on the BGG user poll
num_players_poll = [ poll for poll in data_dict['items']['item']['poll'] if poll['@name']=='suggested_numplayers'][0]
num_votes = int(num_players_poll['@totalvotes'])
num_players_dict = {}
for poll_result in num_players_poll['results']:    
    recommended = 0
    not_recommended = 0
    player_count = poll_result['@numplayers']
    for vote in poll_result['result']:
        match vote['@value']:
            case 'Best':
                recommended += int(vote['@numvotes'])
                not_recommended -= int(vote['@numvotes'])
            case 'Recommended':
                recommended += int(vote['@numvotes'])
            case 'Not Recommended':
                not_recommended += int(vote['@numvotes'])
    rec = recommended - not_recommended
    
    if '+' in player_count:
        max_players = int(data_dict['items']['item']['maxplayers']['@value'])
        for i in range(int(player_count.strip("+")), max_players + 1):
            if i in num_players_dict:
                num_players_dict[i] = num_players_dict[i] + rec
            else:
                num_players_dict[i] = rec
    else:
        num_players_dict[int(player_count)] = rec

num_players_dict = {(k, v/num_votes) for k,v in num_players_dict.items()}

In [159]:
num_players_dict

{(1, -0.46705202312138727),
 (2, 0.7236994219653179),
 (3, 1.3988439306358382),
 (4, 0.8520231213872832)}

In [126]:
data_dict['items']['item'].keys()

dict_keys(['@type', '@id', 'thumbnail', 'image', 'name', 'description', 'yearpublished', 'minplayers', 'maxplayers', 'poll', 'playingtime', 'minplaytime', 'maxplaytime', 'minage', 'link'])

In [187]:
data_dict['items']['item']['image']

'https://cf.geekdo-images.com/x3zxjr-Vw5iU4yDPg70Jgw__original/img/FpyxH41Y6_ROoePAilPNEhXnzO8=/0x0/filters:format(jpeg)/pic3490053.jpg'

In [182]:
data_dict['items']['item']['playingtime']['@value']

'120'

In [185]:
data_dict['items']['item']['maxplaytime']['@value']

'120'

In [107]:
## Get the long (ish) description
data_dict['items']['item']['description']

"Brass: Birmingham is an economic strategy game sequel to Martin Wallace' 2007 masterpiece, Brass. Brass: Birmingham tells the story of competing entrepreneurs in Birmingham during the industrial revolution, between the years of 1770-1870.&#10;&#10;As in its predecessor, you must develop, build, and establish your industries and network, in an effort to exploit low or high market demands.&#10;&#10;Each round, players take turns according to the turn order track, receiving two actions to perform any of the following actions (found in the original game):&#10;&#10;1) Build - Pay required resources and place an industry tile.&#10;2) Network - Add a rail / canal link, expanding your network.&#10;3) Develop - Increase the VP value of an industry.&#10;4) Sell - Sell your cotton, manufactured goods and pottery.&#10;5) Loan - Take a &pound;30 loan and reduce your income.&#10;&#10;Brass: Birmingham also features a new sixth action:&#10;&#10;6) Scout - Discard three cards and take a wild location

In [170]:
## Get the complexity score
data_dict['items']['item']['statistics']['ratings']['averageweight']['@value']

'3.8983'

In [179]:
categories = []
mechanics = []
publishers = []
artists = []
designers = []
other_implementations = []
families = []
for data in data_dict['items']['item']['link']:
    match data['@type']:
        case "boardgamecategory":
            categories.append(data['@value'])
        case "boardgamemechanic":
            mechanics.append(data['@value'])
        case 'boardgamepublisher':
            publishers.append(data['@value'])
        case 'boardgamedesigner':
            designers.append(data['@value'])
        case 'boardgameartist':
            artists.append(data['@value'])
        case 'boardgameimplementation':
            other_implementations.append(data['@id'])
        case 'boardgamefamily':
            families.append(data['@value'])
        case _:
            print(data['@type'])


boardgamefamily
boardgamefamily
boardgamefamily
boardgamefamily
boardgamefamily
boardgamefamily
boardgamefamily
boardgamefamily
boardgamefamily
boardgamefamily
boardgamefamily
boardgamefamily
boardgamefamily
boardgameaccessory
boardgameaccessory
boardgameaccessory
boardgameaccessory
boardgameaccessory
boardgameaccessory
boardgameaccessory
boardgameaccessory
boardgameaccessory
boardgameaccessory
boardgameaccessory
boardgameaccessory
boardgameaccessory
boardgameaccessory


In [118]:
[d['@totalvotes'] for d in data_dict['items']['item']['poll']]

['865', '126', '43']