#### Import

In [1]:
import requests
import json
import re
import time
from bs4 import BeautifulSoup
from urllib.parse import urljoin
import concurrent.futures
import json
import pandas as pd
import re
import random

#### Methods

In [2]:

def get_characters_from_category(category):
    url = f"https://wiki.bloodontheclocktower.com/Category:{category}"
    response = requests.get(url)
    
    if response.status_code != 200:
        print(f"Failed to retrieve {category} page: {response.status_code}")
        return []
    
    soup = BeautifulSoup(response.text, 'html.parser')
    category_section = soup.find('div', {'id': 'mw-pages'})
    
    if not category_section:
        print(f"Category section not found for {category}")
        return []
    
    character_links = category_section.find_all('a')
    characters = [link.get('title') for link in character_links if link.get('title')]
    
    return characters

def get_all_characters():
    categories = ["Townsfolk", "Outsiders", "Minions", "Demons"]
    all_characters = {}
    
    with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
        future_to_category = {executor.submit(get_characters_from_category, category): category for category in categories}
        
        for future in concurrent.futures.as_completed(future_to_category):
            category = future_to_category[future]
            try:
                characters = future.result()
                all_characters[category] = characters
                print(f"Found {len(characters)} characters in {category}")
            except Exception as e:
                print(f"Error processing {category}: {str(e)}")
                all_characters[category] = []
    
    flat_list = [char for category_chars in all_characters.values() for char in category_chars]
    all_characters["all"] = flat_list
    
    return all_characters

def get_character_ability(character_name):
    base_url = "https://wiki.bloodontheclocktower.com/"
    url = urljoin(base_url, character_name)
    
    try:
        response = requests.get(url)
        
        if response.status_code != 200:
            return "Error: Character page not found"
        
        soup = BeautifulSoup(response.text, 'html.parser')
        summary_heading = soup.find('span', {'class': 'mw-headline', 'id': 'Summary'})
        
        if not summary_heading:
            return "Error: Summary section not found"
        
        summary_paragraph = summary_heading.find_parent('h2').find_next('p')
        
        if not summary_paragraph:
            return "Error: Summary text not found"
        
        ability_text = summary_paragraph.text
        ability_text = ability_text.replace("”",'"').replace('“','"').replace("\n",'').replace('"','')
        quoted_text_match = re.search(r'^"(.*)"$', ability_text)
        if quoted_text_match:
            ability = quoted_text_match.group(1)
        else:
            ability = ability_text
        
        return ability.replace('"','')
    
    except Exception as e:
        return f"Error processing character: {str(e)}"

def process_character(character, category_data):
    character_category = None
    for category, chars in category_data.items():
        if category != "all" and character in chars:
            character_category = category
            break
    
    ability = get_character_ability(character)
    if 'Error:' in ability:
        print(f"{character}'s ability not found {ability}")
    
    return {
        "name": character,
        "category": character_category,
        "ability": ability
    }

def build_complete_character_data():
    category_data = get_all_characters()
    
    complete_data = {
        "categories": {},
        "characters": []
    }
    
    for category, chars in category_data.items():
        if category != "all":
            complete_data["categories"][category] = chars
    
    all_chars = category_data["all"]
    total_chars = len(all_chars)
    
    print(f"\nFetching abilities for {total_chars} characters... (55 secs)")
    
    character_data = []
    with concurrent.futures.ThreadPoolExecutor(max_workers=30) as executor:
        future_to_character = {executor.submit(process_character, character, category_data): character for character in all_chars}
        
        for i, future in enumerate(concurrent.futures.as_completed(future_to_character)):
            character = future_to_character[future]
            try:
                char_data = future.result()
                character_data.append(char_data)
                # print(f"Fetched {i+1}/{total_chars}: {character}")
            except Exception as e:
                print(f"Error processing {character}: {str(e)}")
                character_data.append({
                    "name": character,
                    "category": None,
                    "ability": f"Error: {str(e)}"
                })
    
    complete_data["characters"] = character_data
    return complete_data

def save_data_to_json(data, filename="botc_complete_data.json"):
    backup_filename = "botc_complete_data_backup.json"
    with open(backup_filename, 'w', encoding='utf-8') as f:
        json.dump(data, f, indent=2)
    print(f"\nSaved backup data to {backup_filename}")
    with open(filename, 'w', encoding='utf-8') as f:
        json.dump(data, f, indent=2)
    
    print(f"\nSaved complete data for {len(data['characters'])} characters to {filename}")

def create_excel_from_json(json_file, output_excel):
    with open(json_file, 'r', encoding='utf-8') as f:
        data = json.load(f)
    
    characters = data['characters']
    df_data = []
    
    for char in characters:
        name = char['name']
        category = char['category']
        ability = char['ability']
        
        each_night = 1 if re.search(r'Each night', ability, re.IGNORECASE) else ''
        you_start = 1 if re.search(r'You start', ability, re.IGNORECASE) else ''
        each_day = 1 if re.search(r'Each day', ability, re.IGNORECASE) else ''
        once_per_game = 1 if re.search(r'Once per game', ability, re.IGNORECASE) else ''
        
        df_data.append({
            'NAME': name,
            'CATEGORY': category,
            'ABILITY': ability,
            'Each night': each_night,
            'You start': you_start,
            'Each day': each_day,
            'Once per Game': once_per_game
        })
    
    df = pd.DataFrame(df_data)
    df.to_csv(output_excel+'.csv', index=False)
    df.to_excel(output_excel+'.xlsx', index=False, startrow=1, startcol=1)
    print(f"Excel file created: {output_excel}")

def create_formatted_json(townsfolk, outsiders, minions, demons, script_name="Custom Script", author="user"):
    result = []
    result.append({
        "id": "_meta",
        "author": author,
        "name": script_name
    })
    result.extend(townsfolk)
    result.extend(outsiders)
    result.extend(minions)
    result.extend(demons)
    return json.dumps(result)


In [3]:

print("Blood on the Clocktower Character Scraper")
print("----------------------------------------")

start_time = time.time()
complete_data = build_complete_character_data()
save_data_to_json(complete_data)

end_time = time.time()
print(f"\nTotal execution time: {end_time - start_time:.2f} seconds")

print("\nRandom sample of characters with abilities:")
sample_list = complete_data["characters"]
random.shuffle(sample_list)
for char in sample_list[:3]:
    print(f"- {char['name']} ({char['category']}):")
    print(f"  Ability: {char['ability']}")

create_excel_from_json("botc_complete_data.json", "botc_character_list")

Blood on the Clocktower Character Scraper
----------------------------------------
Found 68 characters in Townsfolk
Found 22 characters in Outsiders
Found 26 characters in Minions
Found 19 characters in Demons

Fetching abilities for 135 characters... (55 secs)

Saved backup data to botc_complete_data_backup.json

Saved complete data for 135 characters to botc_complete_data.json

Total execution time: 63.68 seconds

Random sample of characters with abilities:
- Magician (Townsfolk):
  Ability: The Demon thinks you are a Minion. Minions think you are a Demon.
- Fearmonger (Minions):
  Ability: Each night, choose a player: if you nominate & execute them, their team loses. All players know if you choose a new player.
- Moonchild (Outsiders):
  Ability: When you learn that you died, publicly choose 1 alive player. Tonight, if it was a good player, they die.
Excel file created: botc_character_list


In [4]:
create_excel_from_json("botc_complete_data.json", "botc_character_list")

Excel file created: botc_character_list


##### Create JSON to upload on https://script.bloodontheclocktower.com/

In [5]:

def sort_characters_by_timing(characters_df):
    timing_priorities = {
        'You start': 0,
        'Each night': 1,
        'Each day': 2,
        'Once per Game': 3,
        'Other': 4
    }

    priorities = []
    for _, row in characters_df.iterrows():
        if row['You start'] == 1:
            priorities.append(timing_priorities['You start'])
        elif row['Each night'] == 1:
            priorities.append(timing_priorities['Each night'])
        elif row['Each day'] == 1:
            priorities.append(timing_priorities['Each day'])
        elif row['Once per Game'] == 1:
            priorities.append(timing_priorities['Once per Game'])
        else:
            priorities.append(timing_priorities['Other'])
    
    characters_df = characters_df.copy()
    characters_df['timing_priority'] = priorities
    sorted_df = characters_df.sort_values(by=['timing_priority', 'NAME'])
    
    return sorted_df

def create_formatted_json_sorted(csv_file, characters, script_name="Custom Script", author="user"):
    df = pd.read_csv(csv_file)
    df['NAME'] = df['NAME'].str.lower()
    
    townsfolk_df = df[(df['CATEGORY'] == 'Townsfolk') & (df['NAME'].isin(characters[0]))]
    outsiders_df = df[(df['CATEGORY'] == 'Outsiders') & (df['NAME'].isin(characters[1]))]
    minions_df = df[(df['CATEGORY'] == 'Minions') & (df['NAME'].isin(characters[2]))]
    demons_df = df[(df['CATEGORY'] == 'Demons') & (df['NAME'].isin(characters[3]))]
    
    townsfolk_df = sort_characters_by_timing(townsfolk_df)
    outsiders_df = sort_characters_by_timing(outsiders_df)
    minions_df = sort_characters_by_timing(minions_df)
    demons_df = sort_characters_by_timing(demons_df)
    
    townsfolk = townsfolk_df['NAME'].tolist()
    outsiders = outsiders_df['NAME'].tolist()
    minions = minions_df['NAME'].tolist()
    demons = demons_df['NAME'].tolist()
    
    result = []
    result.append({
        "id": "_meta",
        "author": author,
        "name": script_name
    })
    
    result.extend(townsfolk)
    result.extend(outsiders)
    result.extend(minions)
    result.extend(demons)
    
    return json.dumps(result)


In [6]:
townsfolk = ["shugenja", "empath", "chambermaid", "snakecharmer", "flowergirl", 
            "towncrier", "savant", "juggler", "seamstress", "sage", "farmer", "virgin"]
outsiders = ["cannibal", "drunk", "lunatic", "barber"]
minions = ["hatter", "widow", "harpy", "psychopath", "vizier", "kazali"]
demons = ["imp", "ojo", "vortox"]
    
output_json = create_formatted_json_sorted(
    csv_file='botc_character_list.csv',
    characters=[townsfolk, outsiders, minions, demons],
    script_name="SAMPLE SCRIPT",
    author="skrowe"
)

print(output_json)

with open("botc_script.json", "w", encoding="utf-8") as f:
    f.write(output_json)

print("JSON saved to botc_script.json")

[{"id": "_meta", "author": "skrowe", "name": "SAMPLE SCRIPT"}, "shugenja", "chambermaid", "empath", "flowergirl", "savant", "seamstress", "farmer", "juggler", "sage", "virgin", "barber", "drunk", "lunatic", "harpy", "psychopath", "vizier", "widow", "imp", "ojo", "vortox"]
JSON saved to botc_script.json
