[website](https://boardgamegeek.com/boardgame/224517/brass-birmingham/stats)

In [45]:
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import (
    NoSuchElementException,
    TimeoutException,
    StaleElementReferenceException,
    WebDriverException,
    InvalidSessionIdException
)
import time
from tqdm import tqdm
import logging
import json
import pandas as pd

In [None]:
logging.basicConfig(
    filename='scraper.log',
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    datefmt='%Y-%m-%d %H:%M:%S'
)

# How to Use
start_time = time.time()
logging.info("Starting scraping task...")

# Simulate some task

end_time = time.time()
duration = end_time - start_time

logging.info(f"Scraping completed in {duration:.2f} seconds.")

In [41]:
#function to cleanly collect game credits

def get_credits(credits, index, new_line='\n'):
    try:
        raw = credits[index].text
        if new_line in raw:
            text = raw.replace(new_line, ", ").split(", ")
        else: text = [raw.strip()]
        return text
    except (IndexError, ValueError, AttributeError):
        return None

In [54]:
# embrace stat-indexing with wide arms 

def get_stats(stats_elem, stats_index, sub_index, new_line='\n'):
    try:
        stat_category = stats_elem[stats_index].text.split(new_line)
        pre_stat = stat_category[sub_index].strip()
        if ',' in pre_stat:
            stat = int(pre_stat.replace(',',''))
        elif ' / ' in pre_stat:
            stat = float(pre_stat.split(' / ')[0])
        else: stat = pre_stat
        return stat
    except (IndexError, ValueError, AttributeError):
        return None

Sandbox

In [None]:
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))

url = "https://boardgamegeek.com/boardgame/450985/mini-crimes-black-friday-special"
driver.get(url)



Extraction Cell

In [None]:
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))

allboardgames = 0
all_games_links = []
boardgames = []

page_one = 1 
page_end = 2

try:
    for page in range(page_one, page_end):
        url = f"https://boardgamegeek.com/browse/boardgame/page/{page}"
        driver.get(url)
        time.sleep(3)

        try:
            game_links_per_page = [link.get_attribute('href') for link in driver.find_elements(By.XPATH, '//a[@class="primary"]')]
            all_games_links.extend(game_links_per_page)
        except (NoSuchElementException, StaleElementReferenceException) as e:
                print(f"Error on page {page}: {e}")
except (TimeoutException, WebDriverException, InvalidSessionIdException) as e:
    print(f"Navigation error: {e}")

game_links = 'GoToGames.txt'
with open(game_links, 'w') as file:
    for game_link in all_games_links:
         file.write(game_link + '\n')

print(f"The links to all {len(all_games_links)} board games has been collected and stored successfully")

with tqdm(total=len(all_games_links), desc="Games Scraped") as pbar:

    try:
        for href in all_games_links:
            driver.get(href)
            time.sleep(5)

            page_source = driver.page_source
            soup = BeautifulSoup(page_source, 'html.parser')

        #Section Zero
        #xt/players
            players = driver.find_elements(By.XPATH, '//span[@ng-if="::geekitemctrl.geekitem.data.item.minplayers > 0 || geekitemctrl.geekitem.data.item.maxplayers > 0"]')

            if players:
                players_text = players[0].text.strip()
                
                if '–' in players_text:
                    min_players = int(players_text.split('–')[0])
                    max_players = int(players_text.split('–')[1])
                else:
                    min_players = max_players = int(players_text)
            else:
                min_players = max_players = None

        #xt/timing
            timing = driver.find_elements(By.XPATH, '//span[@min="::geekitemctrl.geekitem.data.item.minplaytime" and @max="::geekitemctrl.geekitem.data.item.maxplaytime"]')

            if timing:
                timing_text = timing[0].text.strip()
                
                if '–' in timing_text:
                    min_playtime = int(timing_text.split('–')[0])
                    max_playtime = int(timing_text.split('–')[1])
                else:
                    min_playtime = max_playtime = int(timing_text)
            else:
                min_playtime = max_playtime = None


        #Section One
        #click on credits
            SeeFullCredits = WebDriverWait(driver,10).until(
                EC.element_to_be_clickable((By.XPATH, '//a[@ui-sref="geekitem.credits"]'))
            )
            SeeFullCredits.click()

        #wait for game_name and release year element
            WebDriverWait(driver,10).until(
            EC.visibility_of_element_located((By.XPATH, '//span[@ng-bind-html="creditsctrl.geekitem.data.item[info.keyname]|to_trusted"]'))
            )

        #xt/game_name, release year
            spans = driver.find_elements(By.XPATH, '//span[@ng-bind-html="creditsctrl.geekitem.data.item[info.keyname]|to_trusted"]')
            game_name = spans[0].text.strip()
            release_year = int(spans[1].text.strip())

            credits = driver.find_elements(By.XPATH, '//div[@ng-if="info.datatype == \'geekitem_linkdata\'"]')

        #xt/game-credits with the get_credits func.
            designers = get_credits(credits, 0)
            solo_designer = get_credits(credits, 1)
            artists = get_credits(credits, 2)
            publishers = get_credits(credits, 3)
            developer = get_credits(credits, 4)
            graphic_designer = get_credits(credits, 5)
            sculptor = get_credits(credits, 6)
            editor = get_credits(credits, 7)
            writer = get_credits(credits, 8)
            insert_designer = get_credits(credits, 9)
            categories = get_credits(credits, 10)
            mechanisms = get_credits(credits, 11)
            family = get_credits(credits, 12)

        
        #Section 3 
        #click on stats section
            SeeGameStats = WebDriverWait(driver,10).until(
            EC.element_to_be_clickable((By.XPATH, '//a[@ui-sref="geekitem.stats({})"]'))
            )
            SeeGameStats.click()

        #wait until stats appear
            WebDriverWait(driver,10).until(
                EC.visibility_of_element_located((By.XPATH, '//div[@class="row game-stats"]'))
            )

        #xt/relevant stats with get_stats func.
            stats_elem = driver.find_elements(By.XPATH, '//ul[@class="outline fs-responsive-sm outline-border-col-xs"]')

        #game stats
            average_rating = get_stats(stats_elem,0,1)
            num_of_ratings = get_stats(stats_elem,0,3)
            std_deviation = get_stats(stats_elem,0,5)
            weight_over_five = get_stats(stats_elem,0,7)
            comments = get_stats(stats_elem,0,9)
            fans = get_stats(stats_elem,0,11)
            page_views = get_stats(stats_elem,0,13)

        #game ranks
            overall_rank = get_stats(stats_elem,1,1)

        #play stats
            all_time_plays = get_stats(stats_elem,2,1)
            this_month_plays = get_stats(stats_elem,2,3)

        #collecton stats
            own = get_stats(stats_elem,3,1)
            previously_owned = get_stats(stats_elem,3,3)
            for_trade = get_stats(stats_elem,3,5)
            want_in_trade = get_stats(stats_elem,3,8)
            wishlist = get_stats(stats_elem,3,11)

        #wait until ratings appear
            WebDriverWait(driver,10).until(
                EC.visibility_of_element_located((By.XPATH, '//ratings-stats-graph[@objecttype="thing"]'))
            )
            
            
            
                  

            per_boardgame = {
                "boardgame":game_name,
                "release_year":release_year,
                "min_players":min_players,
                "max_players":max_players,
                "min_playtime":min_playtime,
                "max_playtime":max_playtime,
                "designers":designers,
                "solo_designer":solo_designer,
                "artists":artists,
                "publishers":publishers,
                "developers":developer,
                "graphic_designers":graphic_designer,
                "sculptor":sculptor,
                "editor":editor,
                "writer":writer,
                "insert_designer":insert_designer,
                "categories":categories,
                "mechanisms":mechanisms,
                "family":family
            }

            boardgames.append(per_boardgame)
            allboardgames+=1
            pbar.update(1)


            # SeeGameStats = WebDriverWait(driver,5).until(
            #     EC.element_to_be_clickable((By.XPATH, '//a[@ui-sref="geekitem.stats({})"]'))
            # )
            # SeeGameStats.click()

            



    finally:
        driver.quit()

with open("boardgamegeek.json", "w", encoding="utf-8") as f:
    json.dump(boardgames, f, indent=4, ensure_ascii=False)