In [1]:
import requests
import json
import re
import time
from bs4 import BeautifulSoup
from urllib.parse import urljoin
import concurrent.futures
import json
import pandas as pd
import re

In [5]:

def get_characters_from_category(category):
    url = f"https://wiki.bloodontheclocktower.com/Category:{category}"
    response = requests.get(url)
    
    if response.status_code != 200:
        print(f"Failed to retrieve {category} page: {response.status_code}")
        return []
    
    soup = BeautifulSoup(response.text, 'html.parser')
    category_section = soup.find('div', {'id': 'mw-pages'})
    
    if not category_section:
        print(f"Category section not found for {category}")
        return []
    
    character_links = category_section.find_all('a')
    characters = [link.get('title') for link in character_links if link.get('title')]
    
    return characters

def get_all_characters():
    categories = ["Townsfolk", "Outsiders", "Minions", "Demons"]
    all_characters = {}
    
    with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
        future_to_category = {executor.submit(get_characters_from_category, category): category for category in categories}
        
        for future in concurrent.futures.as_completed(future_to_category):
            category = future_to_category[future]
            try:
                characters = future.result()
                all_characters[category] = characters
                print(f"Found {len(characters)} characters in {category}")
            except Exception as e:
                print(f"Error processing {category}: {str(e)}")
                all_characters[category] = []
    
    flat_list = [char for category_chars in all_characters.values() for char in category_chars]
    all_characters["all"] = flat_list
    
    return all_characters

def get_character_ability(character_name):
    base_url = "https://wiki.bloodontheclocktower.com/"
    url = urljoin(base_url, character_name)
    
    try:
        response = requests.get(url)
        
        if response.status_code != 200:
            return "Character page not found"
        
        soup = BeautifulSoup(response.text, 'html.parser')
        summary_heading = soup.find('span', {'class': 'mw-headline', 'id': 'Summary'})
        
        if not summary_heading:
            return "Summary section not found"
        
        summary_paragraph = summary_heading.find_parent('h2').find_next('p')
        
        if not summary_paragraph:
            return "Summary text not found"
        
        ability_text = summary_paragraph.text
        quoted_text_match = re.search(r'"([^"]*)"', ability_text)
        
        if quoted_text_match:
            ability = quoted_text_match.group(1)
        else:
            ability = "No quoted ability text found"
        
        return ability
    
    except Exception as e:
        return f"Error processing character: {str(e)}"

def process_character(character, category_data):
    character_category = None
    for category, chars in category_data.items():
        if category != "all" and character in chars:
            character_category = category
            break
    
    ability = get_character_ability(character)
    
    return {
        "name": character,
        "category": character_category,
        "ability": ability
    }

def build_complete_character_data():
    category_data = get_all_characters()
    
    complete_data = {
        "categories": {},
        "characters": []
    }
    
    for category, chars in category_data.items():
        if category != "all":
            complete_data["categories"][category] = chars
    
    all_chars = category_data["all"]
    total_chars = len(all_chars)
    
    print(f"\nFetching abilities for {total_chars} characters...")
    
    character_data = []
    with concurrent.futures.ThreadPoolExecutor(max_workers=30) as executor:
        future_to_character = {executor.submit(process_character, character, category_data): character for character in all_chars}
        
        for i, future in enumerate(concurrent.futures.as_completed(future_to_character)):
            character = future_to_character[future]
            try:
                char_data = future.result()
                character_data.append(char_data)
                print(f"Fetched {i+1}/{total_chars}: {character}")
            except Exception as e:
                print(f"Error processing {character}: {str(e)}")
                character_data.append({
                    "name": character,
                    "category": None,
                    "ability": f"Error: {str(e)}"
                })
    
    complete_data["characters"] = character_data
    return complete_data

def save_data_to_json(data, filename="botc_complete_data.json"):
    backup_filename = "botc_complete_data_backup.json"
    with open(backup_filename, 'w', encoding='utf-8') as f:
        json.dump(data, f, indent=2)
    print(f"\nSaved backup data to {backup_filename}")
    with open(filename, 'w', encoding='utf-8') as f:
        json.dump(data, f, indent=2)
    
    print(f"\nSaved complete data for {len(data['characters'])} characters to {filename}")

def create_excel_from_json(json_file, output_excel):
    with open(json_file, 'r', encoding='utf-8') as f:
        data = json.load(f)
    
    characters = data['characters']
    df_data = []
    
    for char in characters:
        name = char['name']
        category = char['category']
        ability = char['ability']
        
        each_night = 1 if re.search(r'Each night', ability, re.IGNORECASE) else ''
        you_start = 1 if re.search(r'You start', ability, re.IGNORECASE) else ''
        each_day = 1 if re.search(r'Each day', ability, re.IGNORECASE) else ''
        once_per_game = 1 if re.search(r'Once per game', ability, re.IGNORECASE) else ''
        
        df_data.append({
            'NAME': name,
            'CATEGORY': category,
            'ABILITY': ability,
            'Each night': each_night,
            'You start': you_start,
            'Each day': each_day,
            'Once per Game': once_per_game
        })
    
    df = pd.DataFrame(df_data)
    df.to_excel(output_excel, index=False, startrow=1, startcol=1)
    print(f"Excel file created: {output_excel}")



In [3]:

print("Blood on the Clocktower Character Scraper")
print("----------------------------------------")

start_time = time.time()
complete_data = build_complete_character_data()
save_data_to_json(complete_data)

end_time = time.time()
print(f"Total execution time: {end_time - start_time:.2f} seconds")

# Print sample of characters with abilities
print("\nSample of characters with abilities:")
for char in complete_data["characters"][:5]:
    print(f"- {char['name']} ({char['category']}):")
    print(f"  Ability: {char['ability']}")

Blood on the Clocktower Character Scraper
----------------------------------------
Found 22 characters in Outsiders
Found 19 characters in Demons
Found 26 characters in Minions
Found 68 characters in Townsfolk

Fetching abilities for 135 characters...
Fetched 1/135: Barber
Fetched 2/135: Damsel
Fetched 3/135: Tinker
Fetched 4/135: Drunk
Fetched 5/135: Heretic
Fetched 6/135: Puzzlemaster
Fetched 7/135: Hatter
Fetched 8/135: Goon
Fetched 9/135: Zealot
Fetched 10/135: Leviathan
Fetched 11/135: Butler
Fetched 12/135: Golem
Fetched 13/135: Al-Hadikhia
Fetched 14/135: Moonchild
Fetched 15/135: Imp
Fetched 16/135: Snitch
Fetched 17/135: Sweetheart
Fetched 18/135: Klutz
Fetched 19/135: Saint
Fetched 20/135: Lleech
Fetched 21/135: Recluse
Fetched 22/135: Mutant
Fetched 23/135: Fang Gu
Fetched 24/135: Ojo
Fetched 25/135: Po
Fetched 26/135: Pukka
Fetched 27/135: Riot
Fetched 28/135: Shabaloth
Fetched 29/135: Vigormortis
Fetched 30/135: Vortox
Fetched 31/135: Yaggababble
Fetched 32/135: Zombuul
Fe

In [6]:

create_excel_from_json("botc_complete_data.json", "botc_character_list.xlsx")

Excel file created: botc_character_list.xlsx
