[website](https://boardgamegeek.com/boardgame/224517/brass-birmingham/stats)

In [2]:
import random
import time
import json
import csv
import os
import logging
import traceback
import requests
from tqdm import tqdm
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import (
    NoSuchElementException, StaleElementReferenceException, 
    TimeoutException, WebDriverException, InvalidSessionIdException
)
from webdriver_manager.chrome import ChromeDriverManager

In [3]:
# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler('scraper.log')
    ]
)

In [4]:
#Function to extract players and play-time in SECTION 0

def players_time(players, timing):
    def extract_min_max(elements):
        if elements:
            text = elements[0].text.strip()
            if '–' in text:
                minimum = int(text.split('–')[0])
                maximum = int(text.split('–')[1])
            else:
                minimum = maximum = int(text)
        else:
            minimum = maximum = None
        return minimum, maximum

    min_players, max_players = extract_min_max(players)
    min_time, max_time = extract_min_max(timing)

    return min_players, max_players, min_time, max_time

In [5]:
#function to cleanly collect game credits in SECTION 1

def get_credits(credits, index, new_line='\n'):
    try:
        raw = credits[index].text
        text = raw 

        result = text.split(new_line)
        cleaned = [t.strip() for t in result if t.strip() and t.strip().upper() != 'N/A']
        
        return cleaned if cleaned else None
    except (IndexError, ValueError, AttributeError):
        return None

In [6]:
# collect game stats in SECTION 2

def get_stats(stats_elem, stats_index, sub_index, new_line='\n'):
    try:
        stat_category = stats_elem[stats_index].text.split(new_line)
        pre_stat = stat_category[sub_index].strip()
        if ',' in pre_stat:
            stat = int(pre_stat.replace(',',''))
        elif ' / ' in pre_stat:
            stat = float(pre_stat.split(' / ')[0])
        else: stat = int(pre_stat) if pre_stat.isdigit() else pre_stat
        return stat
    except (IndexError, ValueError, AttributeError):
        return None

In [7]:
# function to handle ratings in SECTION 3

def get_rating(ratings, rating_index):
    raw_rating = ratings[rating_index].text.strip()
    if 'k' and '.' in raw_rating:
        rating = int(raw_rating.replace('.', '').replace('k', '00'))
    elif 'k' in raw_rating:
        rating = int(raw_rating.replace('k', '000'))
    else: rating = int(raw_rating)
    return rating

In [8]:
# function to get current exchange rates so as to normalize all prices to USD
def get_exchange_rates(base="USD"):
    url = f"https://open.er-api.com/v6/latest/{base}"
    response = requests.get(url)
    return response.json().get("rates", {})

# function to carry out the conversion 
def convert_to_usd(price, currency, exchange_rates):
    if currency == "USD":
        return price
    rate = exchange_rates.get(currency)
    if not rate:
        return None 
    return round(price / rate, 2)

In [9]:
#function to collect marketplace data 
def marketplace_crawler(store_elem):
    shops_data = []
    for row in store_elem:
        store_name_elem = row.find_elements(By.XPATH, ".//div[contains(@class, 'summary-item-title')]")
        store_price_elem = row.find_elements(By.XPATH, ".//span[@itemprop='price'] | .//strong[contains(@class, 'ng-binding')]")
        currency_elem = row.find_elements(By.XPATH, './/span[@itemprop="priceCurrency"]')

        # time.sleep(20)
        
        for storage, pricing in zip(store_name_elem, store_price_elem):
            store_raw = storage.text.strip()
            price_raw = pricing.text.strip()

            if not store_raw or not price_raw:
                continue
            
            if '(' in store_raw:
                store = store_raw.split(' ')[0]
            else: store = store_raw
            try:
                if '$' in price_raw:
                    price = float(price_raw.replace('$', '').strip())
                else:
                    price = float(price_raw)
            except ValueError:
                continue
            currency = "USD"
            for prefix in currency_elem:
                if currency_elem:
                    currency = prefix.get_attribute('content')
                else:
                    currency = 'USD'
            
            exchange_rates = get_exchange_rates("USD")
            price_usd = convert_to_usd(price, currency, exchange_rates)

            shops_data.append({
                'store': store,
                'base_price': price,
                'currency': currency,
                'base_price_usd': price_usd
            })

    return shops_data

Extraction Cell

In [11]:
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))

allboardgames = 0
all_games_links = []

destination = "boardgamegeek.json" 
if not os.path.exists(destination):
    with open(destination, 'w') as f:
        json.dump([], f)

first_iteration = True
row_number = 1001

page_one = 11
page_end = 16

try:
    logging.info("="*60)
    logging.info(f"Collecting boardgame links across {page_end - page_one} pages...")
    for page in range(page_one, page_end):
        url = f"https://boardgamegeek.com/browse/boardgame/page/{page}"
        driver.get(url)
        time.sleep(10)  

        try:
            game_links_per_page = [link.get_attribute('href') for link in driver.find_elements(By.XPATH, '//a[@class="primary"]')]
            all_games_links.extend(game_links_per_page)
        except (NoSuchElementException, StaleElementReferenceException) as e:
                logging.warning(f"Error on page {page}: {e}")
except (TimeoutException, WebDriverException, InvalidSessionIdException) as e:
    logging.error(f"Navigation error: {e}")

game_links = 'GoToGames.txt'
with open(game_links, 'a', encoding='utf-8') as file:
    for game_link in all_games_links:
         file.write(game_link + '\n')

logging.info(f"{len(all_games_links)} links found for this session and stored at {game_links}...")

with tqdm(total=len(all_games_links), desc="Games Scraped") as pbar:

    
    for href in all_games_links:
        
        try:
            start_time = time.time()
            driver.get(href + '/credits') # plus /credits so we dont have to click on SeeFullCredits everytime in Section 1
            # driver.get(href)
            logging.info(f"Accessing Webpage: {href}")
            time.sleep(10)

            page_source = driver.page_source
            # soup = BeautifulSoup(page_source, 'html.parser')

        #Section 0
        #xt/players and play-time
            try:
                players = driver.find_elements(By.XPATH, '//span[@ng-if="::geekitemctrl.geekitem.data.item.minplayers > 0 || geekitemctrl.geekitem.data.item.maxplayers > 0"]')
                timing = driver.find_elements(By.XPATH, '//span[@min="::geekitemctrl.geekitem.data.item.minplaytime" and @max="::geekitemctrl.geekitem.data.item.maxplaytime"]')
                
                description_elem = driver.find_elements(By.XPATH, '//span[@itemprop="description"]')
                description = description_elem[0].text.strip()
                min_players, max_players, min_time, max_time = players_time(players, timing)

                bg = {
                    'row_id': row_number,
                    'description': description,
                    'player_counts': {
                        'min_players': min_players,
                        'max_players': max_players
                    },
                    'playtime': {
                        'min_playtime': min_time,
                        'max_playtime': max_time
                    }
                }
            except Exception as e:
                 logging.warning(f"{e} during players & playtime extraction at {href}")

        #Section 1
        #click on credits
            # try:
            #     SeeFullCredits = WebDriverWait(driver,10).until(
            #         EC.element_to_be_clickable((By.XPATH, '//a[@ui-sref="geekitem.credits"]'))
            #     )
            #     SeeFullCredits.click()
            # except Exception as e:
            #     logging.warning(f'Could not click Credits at {href}: {e}')

        #wait for game_name and release year element
            WebDriverWait(driver,10).until(
            EC.visibility_of_element_located((By.XPATH, '//span[@ng-bind-html="creditsctrl.geekitem.data.item[info.keyname]|to_trusted"]'))
            )

        #xt/game_name, release year
            try:
                spans = driver.find_elements(By.XPATH, '//span[@ng-bind-html="creditsctrl.geekitem.data.item[info.keyname]|to_trusted"]')
                credits = driver.find_elements(By.XPATH, '//div[@ng-if="info.datatype == \'geekitem_linkdata\'"]')
                minimum_age = driver.find_elements(By.XPATH, "//span[@itemprop='suggestedMinAge']")
                age = int(minimum_age[0].text.strip()) if minimum_age[0].text.isdigit() else None

                bg['boardgame'] = spans[0].text.strip()

                game_name = spans[0].text.strip()
                logging.info(f"Now Scraping game {row_number}: {game_name} | URL: {href}")

                bg['minimum_age'] = age

                bg['game_info'] = {
                    'release_year' : int(spans[1].text.strip()) if spans[1].text.isdigit() else None,
                    "categories": get_credits(credits, 10),
                    "mechanisms": get_credits(credits, 11),
                    "family": get_credits(credits, 12)
                }

        #xt/game-credits with the get_credits func.
                bg['credits'] = {
                    "designers": get_credits(credits, 0),
                    "solo_designer": get_credits(credits, 1),
                    "artists": get_credits(credits, 2),
                    "publishers": get_credits(credits, 3),
                    "developer": get_credits(credits, 4),
                    "graphic_designer": get_credits(credits, 5),
                    "sculptor": get_credits(credits, 6),
                    "editor": get_credits(credits, 7),
                    "writer": get_credits(credits, 8),
                    "insert_designer": get_credits(credits, 9)
                }
            except Exception as e:
                logging.warning(f"{e} in credits section of {driver.current_url}")
        
        #Section 2
        #click on stats section
            SeeGameStats = WebDriverWait(driver,10).until(
            EC.element_to_be_clickable((By.XPATH, '//a[@ui-sref="geekitem.stats({})"]'))
            )
            SeeGameStats.click()

        #wait until stats appear
            WebDriverWait(driver,10).until(
                EC.visibility_of_element_located((By.XPATH, '//div[@class="row game-stats"]'))
            )

            try:
        #xt/relevant stats with get_stats func.
                stats_elem = driver.find_elements(By.XPATH, '//ul[@class="outline fs-responsive-sm outline-border-col-xs"]')

        #game stats
                bg['game_stats'] = {
                    "average_rating": get_stats(stats_elem, 0, 1),
                    "num_of_ratings": get_stats(stats_elem, 0, 3),
                    "std_deviation": get_stats(stats_elem, 0, 5),
                    "weight": get_stats(stats_elem, 0, 7),
                    "comments": get_stats(stats_elem, 0, 9),
                    "fans": get_stats(stats_elem, 0, 11),
                    "page_views": get_stats(stats_elem, 0, 13)
                }
            except Exception as e:
                logging.warning(f"{e} in game stats of {driver.current_url}")

        #ranks stats
            bg['ranks']={}
            try:
                rank_labels = driver.find_elements(By.XPATH, '//span[@class="rank-title ng-binding"]')
                rank_values = driver.find_elements(By.XPATH, '//a[@class="rank-value ng-binding ng-scope"]')

                for label, value in zip(rank_labels, rank_values):
                    bg['ranks'][label.text.strip().lower()] = int(value.text.replace(",","").strip())

            except Exception as e:
                logging.warning(f"{e} in rank stats of {href}")

        #play stats
            try:
                bg['play_stats'] = {
                    "all_time_plays": get_stats(stats_elem, 2, 1),
                    "this_month_plays": get_stats(stats_elem, 2, 3)
                }
            except Exception as e:
                logging.warning(f"{e} in play stats of {driver.current_url}")

        #collecton stats
            try:
                bg['collection_stats'] = {
                    "own": get_stats(stats_elem, 3, 1),
                    "previously_owned": get_stats(stats_elem, 3, 3),
                    "for_trade": get_stats(stats_elem, 3, 5),
                    "want_in_trade": get_stats(stats_elem, 3, 8),
                    "wishlist": get_stats(stats_elem, 3, 11)
                }
            except Exception as e:
                logging.warning(f"{e} in collection stats of {driver.current_url}")

        #Section 3
        #wait until ratings appear
            WebDriverWait(driver, 10).until(
                lambda d: len(d.find_elements(By.XPATH, "//*[name()='text']")) >= 20
            )
            try:
                ratings = driver.find_elements(By.XPATH, "//*[name()='text']") #workaround xpath for html that include namespaces like SVG in this case
            
            #xt/ratings with the get_rating function    
                bg['ratings'] = {
                    f"rated_{i}": get_rating(ratings, 9 + i)
                    for i in range(1, 11)
                }
            except Exception as e:
                logging.warning(f"{e} in scraping ratings of {driver.current_url}")

            
        #Section 4 
        #Click on Shoppings Tab
            try:
                ShopListings = WebDriverWait(driver,10).until(
                    EC.element_to_be_clickable((By.XPATH, '//a[@ui-sref="geekitem.marketplace.stores({})"]'))
                    )
                ShopListings.click()

        # wait until all store items become visible 
                try:
                    WebDriverWait(driver, 15).until(
                        EC.visibility_of_element_located((By.XPATH, "//li[contains(@class, 'summary-sale-item')]"))
                    )
                except TimeoutException:
                    WebDriverWait(driver, 5).until(
                        EC.visibility_of_element_located((By.XPATH,  "//stores-items-module"))
                    )

        #collect data with the marketplace_crawler function
                store_elem = driver.find_elements(By.XPATH, "//li[contains(@class, 'summary-sale-item')]")
                
                # bg['marketplace'] = shops_data
                marketplace_data = marketplace_crawler(store_elem)
                bg['marketplace'] = marketplace_data 
            except Exception as e:
                logging.warning(f"{e} in scraping shop listings at {driver.current_url}")
                
            bg['link_to_game']=href

            end_time = time.time()
            duration = end_time - start_time

            logging.info(f"Scraping completed for {game_name} in {duration:.2f} seconds.")
        
            with open(destination, 'r', encoding='utf-8') as f:
                    content = f.read()
                    data = json.loads(content) if content.strip() else []
            data.append(bg)
            
            with open(destination, "w", encoding="utf-8") as f:
                json.dump(data, f, indent=4, ensure_ascii=False)

            logging.info(f"Records dumped in json file: {destination}")
    
        except Exception as e:
            logging.error(f"Error scraping boardgame, {bg.get('name')} at {url} - {type(e).__name__}: {e}")
            logging.error(traceback.format_exc())
        logging.info("="*75)
        row_number +=1
        allboardgames+=1
        pbar.update(1)

            
driver.quit()

Games Scraped: 0it [00:00, ?it/s]


In [None]:
final_destination = "boardgame-geek-dataset.csv"

with open(destination, "r", encoding="utf-8") as f:
    data = json.load(f)  

# Flatten marketplace (special handling)
def flatten_marketplace(record):
    flat_market = {}
    for store in record.get("marketplace", []):
        store_name = store["store"].lower().replace(" ", "_")
        currency = store["currency"].lower()
        
        price_col = f"{store_name}_price_{currency}"
        flat_market[price_col] = store.get("base_price")

        if currency != "usd":
            usd_col = f"{store_name}_price_usd"
            flat_market[usd_col] = store.get("base_price_usd")

    return flat_market

# Recursive flattener for nested dicts
def flatten_dict(d, parent_key="", sep="_"):
    items = []
    for k, v in d.items():
        new_key = f"{parent_key}{sep}{k}" if parent_key else k
        if isinstance(v, dict):
            items.extend(flatten_dict(v, new_key, sep=sep).items())
        elif isinstance(v, list) and all(isinstance(x, dict) for x in v):
            continue
        elif isinstance(v, list):
            items.append((new_key, ";".join(map(str, v))))
        else:
            items.append((new_key, v))
    return dict(items)

# Full record flattener
def flatten_record(record):
    flat = flatten_dict(record)
    flat.update(flatten_marketplace(record))
    flat.pop("marketplace", None)

    return flat

flat_records = [flatten_record(rec) for rec in data]

df = pd.DataFrame(flat_records)
df.to_csv(final_destination, index=False)

Sandbox

In [16]:
class UserAgentRotator:
    def __init__(self):
        self.user_agents = [
            # Chrome Windows
            "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
            "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36",
            "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36",
            # Chrome Mac
            "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
            "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36",
            # Firefox Windows
            "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.0",
            "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:120.0) Gecko/20100101 Firefox/120.0",
            # Firefox Mac
            "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:121.0) Gecko/20100101 Firefox/121.0",
            # Safari Mac
            "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.1 Safari/605.1.15",
            # Edge Windows
            "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0",
            # Chrome Linux
            "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
        ]
        
        self.resolutions = [
            (1920, 1080), (1366, 768), (1536, 864), (1440, 900),
            (1280, 720), (1600, 900), (2560, 1440), (1920, 1200)
        ]

    def get_random_user_agent(self):
        return random.choice(self.user_agents)
    
    def get_random_resolution(self):
        return random.choice(self.resolutions)

    def create_driver_with_rotation(self, headless=False):
        """Create a new driver with randomized user agent and anti-detection measures"""
        options = Options()
        
        # Random user agent
        user_agent = self.get_random_user_agent()
        options.add_argument(f"--user-agent={user_agent}")
        
        # Anti-detection measures
        options.add_argument("--disable-blink-features=AutomationControlled")
        options.add_experimental_option("excludeSwitches", ["enable-automation"])
        options.add_experimental_option('useAutomationExtension', False)
        options.add_argument("--disable-web-security")
        options.add_argument("--allow-running-insecure-content")
        options.add_argument("--disable-extensions")
        options.add_argument("--disable-plugins")
        options.add_argument("--no-sandbox")
        options.add_argument("--disable-dev-shm-usage")
        
        # Random window size
        width, height = self.get_random_resolution()
        options.add_argument(f"--window-size={width},{height}")
        
        if headless:
            options.add_argument("--headless")
        
        # Create driver
        driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
        
        # Execute script to remove webdriver property
        driver.execute_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")
        
        logging.info(f"Created new driver with User-Agent: {user_agent[:60]}...")
        return driver

# Main scraper with rotation integration  
def scrape_boardgames_with_rotation():
    rotator = UserAgentRotator()
    driver = None
    
    # Your existing variables
    allboardgames = 0
    all_games_links = []
    destination = "boardgamegeek.json"
    
    if not os.path.exists(destination):
        with open(destination, 'w') as f:
            json.dump([], f)

    row_number = 1001
    page_one = 11
    page_end = 12
    
    # Rotation settings
    ROTATION_FREQUENCY = 15  # Rotate more frequently 
    MAX_RETRIES = 2  # Reduce retries to avoid getting stuck
    
    try:
        # Initial driver creation
        driver = rotator.create_driver_with_rotation(headless=False)
        
        # Step 1: Collect game links (your existing logic)
        logging.info("="*60)
        logging.info(f"Collecting boardgame links across {page_end - page_one} pages...")
        
        for page in range(page_one, page_end):
            url = f"https://boardgamegeek.com/browse/boardgame/page/{page}"
            
            # Retry logic for page loading
            for attempt in range(MAX_RETRIES):
                try:
                    driver.get(url)
                    time.sleep(random.uniform(8, 15))  # Random delay
                    
                    game_links_per_page = [
                        link.get_attribute('href') 
                        for link in driver.find_elements(By.XPATH, '//a[@class="primary"]')
                    ]
                    all_games_links.extend(game_links_per_page)
                    break
                    
                except (TimeoutException, WebDriverException, InvalidSessionIdException) as e:
                    logging.warning(f"Attempt {attempt + 1} failed for page {page}: {e}")
                    if attempt == MAX_RETRIES - 1:
                        # Rotate user agent on final failure
                        logging.info("Max retries reached, rotating user agent...")
                        try:
                            driver.quit()
                        except:
                            pass
                        driver = rotator.create_driver_with_rotation(headless=False)
                        time.sleep(random.uniform(10, 20))
                    else:
                        time.sleep(random.uniform(15, 30))
                        
        # Save links to file
        game_links = 'GoToGames.txt'
        with open(game_links, 'a', encoding='utf-8') as file:
            for game_link in all_games_links:
                file.write(game_link + '\n')

        logging.info(f"{len(all_games_links)} links found for this session and stored at {game_links}...")

        # Step 2: Scrape individual games with rotation
        with tqdm(total=len(all_games_links), desc="Games Scraped") as pbar:
            
            for idx, href in enumerate(all_games_links):
                
                # Rotate user agent every ROTATION_FREQUENCY games
                if idx > 0 and idx % ROTATION_FREQUENCY == 0:
                    logging.info(f"Rotating user agent at game {idx + 1} (row {row_number})")
                    try:
                        driver.quit()
                    except:
                        pass
                    driver = rotator.create_driver_with_rotation(headless=False)
                    time.sleep(random.uniform(15, 25))  # Longer delay after rotation
                
                # Main scraping logic with retry mechanism
                game_scraped = False
                retry_count = 0
                
                while not game_scraped and retry_count < MAX_RETRIES:
                    try:
                        # Check if driver is still alive
                        try:
                            driver.current_url
                        except (InvalidSessionIdException, WebDriverException):
                            logging.info("Driver session lost, creating new one...")
                            driver = rotator.create_driver_with_rotation(headless=False)
                            time.sleep(random.uniform(10, 15))
                        
                        start_time = time.time()
                        
                        # Navigate to credits page
                        driver.get(href + '/credits')
                        logging.info(f"Accessing Webpage: {href}")
                        time.sleep(random.uniform(8, 15))

                        # Initialize bg dict properly
                        bg = {}

                        # Section 0: Basic game info
                        try:
                            players = driver.find_elements(By.XPATH, '//span[@ng-if="::geekitemctrl.geekitem.data.item.minplayers > 0 || geekitemctrl.geekitem.data.item.maxplayers > 0"]')
                            timing = driver.find_elements(By.XPATH, '//span[@min="::geekitemctrl.geekitem.data.item.minplaytime" and @max="::geekitemctrl.geekitem.data.item.maxplaytime"]')
                            
                            description_elem = driver.find_elements(By.XPATH, '//span[@itemprop="description"]')
                            description = description_elem[0].text.strip() if description_elem else "No description"
                            min_players, max_players, min_time, max_time = players_time(players, timing)

                            bg.update({
                                'row_id': row_number,
                                'description': description,
                                'player_counts': {
                                    'min_players': min_players,
                                    'max_players': max_players
                                },
                                'playtime': {
                                    'min_playtime': min_time,
                                    'max_playtime': max_time
                                }
                            })
                        except Exception as e:
                            logging.warning(f"{e} during players & playtime extraction at {href}")

                        # Section 1: Game details and credits
                        WebDriverWait(driver, 10).until(
                            EC.visibility_of_element_located((By.XPATH, '//span[@ng-bind-html="creditsctrl.geekitem.data.item[info.keyname]|to_trusted"]'))
                        )

                        try:
                            spans = driver.find_elements(By.XPATH, '//span[@ng-bind-html="creditsctrl.geekitem.data.item[info.keyname]|to_trusted"]')
                            credits = driver.find_elements(By.XPATH, '//div[@ng-if="info.datatype == \'geekitem_linkdata\'"]')
                            minimum_age = driver.find_elements(By.XPATH, "//span[@itemprop='suggestedMinAge']")
                            age = int(minimum_age[0].text.strip()) if minimum_age and minimum_age[0].text.isdigit() else None

                            game_name = spans[0].text.strip() if spans else f"Unknown Game {row_number}"
                            bg['boardgame'] = game_name
                            logging.info(f"Now Scraping game {row_number}: {game_name} | URL: {href}")

                            bg['minimum_age'] = age
                            bg['game_info'] = {
                                'release_year': int(spans[1].text.strip()) if len(spans) > 1 and spans[1].text.isdigit() else None,
                                "categories": get_credits(credits, 10),
                                "mechanisms": get_credits(credits, 11), 
                                "family": get_credits(credits, 12)
                            }

                        #xt/game-credits with the get_credits func.
                            bg['credits'] = {
                                "designers": get_credits(credits, 0),
                                "solo_designer": get_credits(credits, 1),
                                "artists": get_credits(credits, 2),
                                "publishers": get_credits(credits, 3),
                                "developer": get_credits(credits, 4),
                                "graphic_designer": get_credits(credits, 5),
                                "sculptor": get_credits(credits, 6),
                                "editor": get_credits(credits, 7),
                                "writer": get_credits(credits, 8),
                                "insert_designer": get_credits(credits, 9)
                            }
                        except Exception as e:
                            logging.warning(f"{e} in credits section of {driver.current_url}")
                        
                        # Section 2: Stats (simplified for now)
                        try:
                        #click on stats section
                            SeeGameStats = WebDriverWait(driver, 15).until(
                                EC.element_to_be_clickable((By.XPATH, '//a[@ui-sref="geekitem.stats({})"]'))
                            )
                            SeeGameStats.click()

                        #wait until stats appear
                            WebDriverWait(driver, 10).until(
                                EC.visibility_of_element_located((By.XPATH, '//div[@class="row game-stats"]'))
                            )

                        # extract relevant stats with get_stats func.
                            stats_elem = driver.find_elements(By.XPATH, '//ul[@class="outline fs-responsive-sm outline-border-col-xs"]')

                        #game stats
                            bg['game_stats'] = {
                                "average_rating": get_stats(stats_elem, 0, 1),
                                "num_of_ratings": get_stats(stats_elem, 0, 3),
                                "std_deviation": get_stats(stats_elem, 0, 5),
                                "weight": get_stats(stats_elem, 0, 7),
                                "comments": get_stats(stats_elem, 0, 9),
                                "fans": get_stats(stats_elem, 0, 11),
                                "page_views": get_stats(stats_elem, 0, 13)
                            }
                        except Exception as e:
                            logging.warning(f"{e} in game stats of {driver.current_url}")

                    #ranks stats
                        bg['ranks']={}
                        try:
                            rank_labels = driver.find_elements(By.XPATH, '//span[@class="rank-title ng-binding"]')
                            rank_values = driver.find_elements(By.XPATH, '//a[@class="rank-value ng-binding ng-scope"]')

                            for label, value in zip(rank_labels, rank_values):
                                bg['ranks'][label.text.strip().lower()] = int(value.text.replace(",","").strip())

                        except Exception as e:
                            logging.warning(f"{e} in rank stats of {href}")

                    #play stats
                        try:
                            bg['play_stats'] = {
                                "all_time_plays": get_stats(stats_elem, 2, 1),
                                "this_month_plays": get_stats(stats_elem, 2, 3)
                            }
                        except Exception as e:
                            logging.warning(f"{e} in play stats of {driver.current_url}")

                    #collecton stats
                        try:
                            bg['collection_stats'] = {
                                "own": get_stats(stats_elem, 3, 1),
                                "previously_owned": get_stats(stats_elem, 3, 3),
                                "for_trade": get_stats(stats_elem, 3, 5),
                                "want_in_trade": get_stats(stats_elem, 3, 8),
                                "wishlist": get_stats(stats_elem, 3, 11)
                            }
                        except Exception as e:
                            logging.warning(f"{e} in collection stats of {driver.current_url}")
                        
                    #Section 3
                    #wait until ratings appear
                        WebDriverWait(driver, 10).until(
                            lambda d: len(d.find_elements(By.XPATH, "//*[name()='text']")) >= 20
                        )
                        try:
                            ratings = driver.find_elements(By.XPATH, "//*[name()='text']") #workaround xpath for html that include namespaces like SVG in this case
                        
                        #xt/ratings with the get_rating function    
                            bg['ratings'] = {
                                f"rated_{i}": get_rating(ratings, 9 + i)
                                for i in range(1, 11)
                            }
                        except Exception as e:
                            logging.warning(f"{e} in scraping ratings of {driver.current_url}")

                        
                    #Section 4 
                    #Click on Shoppings Tab
                        try:
                            ShopListings = WebDriverWait(driver,10).until(
                                EC.element_to_be_clickable((By.XPATH, '//a[@ui-sref="geekitem.marketplace.stores({})"]'))
                                )
                            ShopListings.click()

                    # wait until all store items become visible 
                            try:
                                WebDriverWait(driver, 15).until(
                                    EC.visibility_of_element_located((By.XPATH, "//li[contains(@class, 'summary-sale-item')]"))
                                )
                            except TimeoutException:
                                WebDriverWait(driver, 5).until(
                                    EC.visibility_of_element_located((By.XPATH,  "//stores-items-module"))
                                )

                    #collect data with the marketplace_crawler function
                            store_elem = driver.find_elements(By.XPATH, "//li[contains(@class, 'summary-sale-item')]")
                            
                            # bg['marketplace'] = shops_data
                            marketplace_data = marketplace_crawler(store_elem)
                            bg['marketplace'] = marketplace_data 
                        except Exception as e:
                            logging.warning(f"{e} in scraping shop listings at {driver.current_url}")
                            
                        bg['link_to_game']=href

                        end_time = time.time()
                        duration = end_time - start_time
                        game_name = bg.get('boardgame', 'Unknown Game')
                        logging.info(f"Scraping completed for {game_name} in {duration:.2f} seconds.")
                    
                        # Save to JSON with better error handling
                        try:
                            # Read existing data
                            if os.path.exists(destination) and os.path.getsize(destination) > 0:
                                with open(destination, 'r', encoding='utf-8') as f:
                                    data = json.load(f)
                            else:
                                data = []
                            
                            # Append new data
                            data.append(bg)
                            
                            # Write back to file
                            with open(destination, "w", encoding="utf-8") as f:
                                json.dump(data, f, indent=4, ensure_ascii=False)

                            logging.info(f"Successfully saved game {row_number} to {destination}")
                            
                        except Exception as json_error:
                            logging.error(f"Failed to save to JSON: {json_error}")
                            # Try to save as backup
                            backup_file = f"backup_game_{row_number}.json"
                            with open(backup_file, "w", encoding="utf-8") as f:
                                json.dump(bg, f, indent=4, ensure_ascii=False)
                            logging.info(f"Saved backup to {backup_file}")
                        
                        # Success - exit retry loop
                        game_scraped = True
                        
                    except Exception as e:
                        retry_count += 1
                        logging.error(f"Attempt {retry_count} failed for {href}: {type(e).__name__}: {e}")
                        
                        if retry_count < MAX_RETRIES:
                            # Rotate user agent on error
                            logging.info("Error encountered, rotating user agent...")
                            try:
                                driver.quit()
                            except:
                                pass
                            driver = rotator.create_driver_with_rotation(headless=False)
                            time.sleep(random.uniform(20, 35))
                        else:
                            logging.error(f"Max retries reached for {href}")
                            logging.error(traceback.format_exc())
                
                logging.info("="*75)
                row_number += 1
                allboardgames += 1
                pbar.update(1)
                
                # Random delay between games
                time.sleep(random.uniform(5, 12))
                
    finally:
        if driver:
            try:
                driver.quit()
            except:
                pass


if __name__ == "__main__":
    scrape_boardgames_with_rotation()

Games Scraped: 100%|██████████| 100/100 [1:18:34<00:00, 47.14s/it]
