In [1]:
import requests
import re
import xmltodict
import asyncio
import csv

def make_request(url: str, params: dict) -> requests.Response:
    response = requests.get(url=url, params=params)
    return response

async def api_request(url: str, params: dict) -> requests.Response:
    return await asyncio.to_thread(make_request, url, params)

async def main_get_data():
    all_data = []

    for i in range(1, 1000):
        url = "https://boardgamegeek.com/xmlapi2/thing"
        params = {
            "id": i,
            "type": "boardgame",
            "marketplace": 1 
        }

        response = await api_request(url=url, params=params)
        
        if response.status_code == 200:
            response_data = xmltodict.parse(response.content)
        else:
            continue

        listings = response_data['items'].get('item', {}).get('marketplacelistings', {}).get('listing', [])
        if not isinstance(listings, list):
            listings = [listings]

        for listing in listings:
            notes = listing.get('notes', {}).get('@value', '')
            if notes:
                all_data.append(notes)
    
    return all_data

def data_prep(data):
    cleaned_data = []
    for note in data:
        # Remove surrounding double quotes
        if note[0] == note[-1] == '"':
            note = note[1:-1]
        
        # Remove HTML/XML entities like &#10;
        note = re.sub(r"&#\d+;", "", note)
        
        # Remove BBCode-like commands (e.g., [/u], [/b], [/size])
        note = re.sub(r"\[/?(u|b|size)(=\w+)?\]", "", note)

        # Remove [url=...]...[/url] patterns
        note = re.sub(r"\[url=[^\]]+\](.*?)\[/url\]", r"\1", note)

        cleaned_data.append(note)
    return cleaned_data
        
# Save cleaned_data to CSV
def save_to_csv(data, filename="boardgame_notes.csv"):
    with open(filename, mode="w", newline="", encoding="utf-8") as file:
        writer = csv.writer(file, quoting=csv.QUOTE_ALL)  # Quote all fields
        writer.writerow(["notes", "label"])  # Write header
        for note in data:
            writer.writerow([note, 0])

if __name__ == "__main__":
    try:
        try:
            loop = asyncio.get_running_loop()
        except RuntimeError:  # No event loop running
            loop = None

        if loop:
            # If running in an environment with an existing event loop (e.g., Jupyter notebook)
            import nest_asyncio
            nest_asyncio.apply()  # Allow nested event loops
            result = loop.run_until_complete(main_get_data())
        else:
            # If running as a standalone script
            result = asyncio.run(main_get_data())
        
        ready_data = data_prep(result)

        # Save the results to a CSV file
        save_to_csv(ready_data)
        print(f"Data saved to boardgame_notes.csv")
    except Exception as e:
        print(f"Error: {str(e)}")

Data saved to boardgame_notes.csv
