In [1]:
import requests
from bs4 import BeautifulSoup
import json
import re
import string
import sys
from bs4.element import NavigableString, Tag
from os import path

In [2]:
basepath = path.abspath('')
item_filepath = path.abspath(path.join(basepath, "..", "game_data", "roll_20_all_items.json"))
f = open(item_filepath, "r", encoding="utf-8")
all_items = json.load(f)
f.close()

spell_filepath = path.abspath(path.join(basepath, "..", "game_data", "roll_20_all_spells.json"))
fs = open(spell_filepath, "r", encoding="utf-8")
all_spells = json.load(fs)
fs.close()
    
legal_traits = ["jack of all trades", "pact of the blade", "step of the wind", "mask of the wild", "pact of the tome",
               "fury of the small", "book of ancient secrets"]
banned_traits = ["you have"]

class DualSpecError(Exception):
    pass

class NoTraitsError(Exception):
    pass

class NoInvError(Exception):
    pass

class NoProfError(Exception):
    pass

class BadItemError(Exception):
    pass

modifiers = [-5, -5, -4, -4, -3, -3, -2, -2, -1, -1, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10]
attr_keys = {"STR": "strength", "DEX": "dexterity", "CON": "constitution", 
             "INT": "intelligence", "WIS": "wisdom", "CHA": "charisma"}
spellcasting_mod = {"bard": "CHA", "paladin": "CHA", "sorcerer": "CHA", "warlock": "CHA",
                   "cleric": "WIS", "ranger": "WIS", "wizard": "INT", "eldritch knight": "INT"}

In [9]:
url_list = ["http://www.kassoon.com/dnd/5e/character-sheet/846/elliza-maren-drow-elf-warlock-1/",
           "http://www.kassoon.com/dnd/5e/character-sheet/2384/vinheim-high-elf-wizard-1/",
           "http://www.kassoon.com/dnd/5e/character-sheet/2382/mijira-imbixtellrhyst-dragonborn-rogue-1/",
           "http://www.kassoon.com/dnd/5e/character-sheet/208/adam-fidge-variant-human-bard-5/",
           "https://www.kassoon.com//dnd/5e/character-sheet/48/paren-galastacia-wood-elf-rogue-1/",
           "http://www.kassoon.com/dnd/5e/character-sheet/55/dodge-lightfoot-halfling-sorcerer2/",
           "https://www.kassoon.com/dnd/5e/character-sheet/28/eston-human-bard-1-cleric-1/",
           "https://www.kassoon.com/dnd/5e/character-sheet/2100/",
           "https://www.kassoon.com/dnd/5e/character-sheet/95/",
           "https://www.kassoon.com/dnd/5e/character-sheet/11714/"]

In [4]:
# Scrape a single page
def scrape_page(url):
    char = {}
    page = requests.get(url)

    # Basic Info #
    soup = BeautifulSoup(page.content, "html.parser")
    char["name"] = soup.find("div", {"class": "character_name"}).contents[0]
    char["link"] = url
    char_box = soup.findAll("div", {"class": "underline autocomplete updstats"})
    cls = char_box[0].contents[0]
    if "/" in cls:
        raise DualSpecError
    match = re.match(r"([A-Za-z]+)\s?([0-9]+)", cls, re.I)
    if match:
        items = match.groups()
        char["class"] = items[0].lower()
        char["level"] = int(items[1])
    else:
        char["class"] = cls.rsplit(" ")[0].lower()
        char["level"] = int(cls.rsplit(" ")[1])
    char["background"] = char_box[1].contents[0].lower()
    char["race"] = char_box[2].contents[0].lower()
    char["alignment"] = char_box[3].contents[0].lower()
    char["gender"] = char_box[4].contents[0].lower()

    # Attributes #
    attrs = soup.findAll("div", {"class": "character_attribute_box"})
    char["attributes"] = {}
    for att in attrs[:6]:
        att_map = {}
        att_name = att.find("div", {"class": "character_attr_name"}).contents[0]
        att_map["true_val"] = int(att.find("div", {"class": "character_attr_value"}).contents[0])
        att_map["points"] = int(att.find("div", {"class": "character_attr_value editshow updstats"}).contents[0])
        att_map["racial_bonus"] = att_map["true_val"] - att_map["points"]
        att_map["modifier"] = modifiers[att_map["true_val"]]
        char["attributes"][att_name.lower()] = att_map
    char["attributes"]["armor_class"] = 10 + char["attributes"]["dexterity"]["modifier"]
    char["attributes"]["initiative"] = char["attributes"]["dexterity"]["modifier"]
    
    sheet_proficiency = int(attrs[8].find("div", {"class": "character_attr_bonus"}).contents[0])
    char["attributes"]["proficiency"] = int((char["level"]-1)/4 + 2)
    proficiency_diff = char["attributes"]["proficiency"] - sheet_proficiency
    
    char["attributes"]["perception"] = 10 + char["attributes"]["wisdom"]["modifier"]
    char["attributes"]["hit_points"] = int(attrs[11].find("div", {"id": "val15"}).contents[0])
    
    # Skills #
    skill_map = {}
    proficient_skills = []
    skills = soup.findAll("div", {"class": "clear flex"})
    for s in skills[1:-1]:
        skill_name = ((s.find("div", {"class": "character_skills_info"}).contents[0]).split("(")[0]).replace("* ", "").replace("'", "").strip().lower()
        skill_val = 0
        try:
            skill_val = int(s.find("span", {"class": "editshow character_skills_bonus underline"}).contents[0])
        except AttributeError:
            print("ERROR: " + skill_name)
            print(s)
            skill_val = 0
        if s.find("input", {"checked": "checked"}):
            proficient_skills.append(string.capwords(skill_name))
            skill_val = skill_val + proficiency_diff
        skill_map[string.capwords(skill_name)] = skill_val
    
    char["skills"] = skill_map
    char["proficient_skills"] = proficient_skills
    
    # Saves #
    saves = soup.find("div", {"class": "character_throws"}).findAll("div", {"class": "clear"})
    saving_throws = {}
    proficient_saves = []
    for s in saves[:-1]:
        save_name = s.find("div", {"class": "character_throw_info"}).contents[0].lower()
        save_val = char["attributes"][save_name]["modifier"]
        if s.find("input", {"checked": "checked"}):
            proficient_saves.append(save_name)
            save_val = save_val + char["attributes"]["proficiency"]
        saving_throws[save_name] = save_val
    char["saving_throws"] = saving_throws
    char["proficient_saves"] = proficient_saves
    
    
    # Attacks #
    misc = soup.find("div", {"class": "character_misc"})
    atks_and_spells = []
    item_list = misc.find("tbody", {"id": "list0"}).findAll("tr")
    for item in item_list:
        item_map = {}
        i = item.findAll("td")
        #print(i)
        #print(type(i[0].contents[0]))
        
        if (isinstance(i[0].contents[0], Tag)):
            if str(i[0].contents[0].contents[0]) == "Roll":
                continue
        item_map["name"] = string.capwords(i[0].contents[0])
        item_name = item_map["name"].title()
        
        # Item and spell mapping
        def map_item_or_spell(item_map, item_name):
            if item_name in all_items:
                if "Damage" in all_items[item_name]:
                    item_map["damage"] = all_items[item_name]["Damage"]
                if "Damage Type" in all_items[item_name]:
                    item_map["damage_type"] = all_items[item_name]["Damage Type"]
                else:
                    item_map["damage_type"] = "normal"
                if "Range" in all_items[item_name]:
                    item_map["range"] = all_items[item_name]["Range"]
                else:
                    item_map["range"] = "Reach"
                if "Properties" in all_items[item_name]:
                    item_map["properties"] = all_items[item_name]["Properties"]
                return (item_map, True)
            elif item_name in all_spells:
                if "Damage" in all_spells[item_name]:
                    item_map["damage"] = all_spells[item_name]["Damage"]
                if "Damage Type" in all_spells[item_name]:
                    item_map["damage_type"] = all_spells[item_name]["Damage Type"]
                if "Range" in all_spells[item_name]:    
                    item_map["range"] = all_spells[item_name]["Range"]
                else:
                    item_map["range"] = "Reach"
                return (item_map, True)
            else:
                return (item_map, False)
        
        item_map, v = map_item_or_spell(item_map, item_name)
        if not v:
            new_name = string.capwords(item_name.replace(" ", "").title())
            item_map, v2 = map_item_or_spell(item_map, new_name)
            if not v2:
                raise BadItemError("Bad Item or Spell: " + str(item_name))
            else:
                item_map["name"] = new_name.title()
        
        #print(item_map)
        atks_and_spells.append(item_map)
    
    char["attacks"] = atks_and_spells
    
    
    # Spellcasting #
    if char["class"] in spellcasting_mod:
        spellcasting = {}
        spellcasting["spellcast_modifier"] = attr_keys[spellcasting_mod[char["class"]]]
        spellcasting["spellsave_dc"] = 8 + char["attributes"][spellcasting["spellcast_modifier"]]["modifier"] + char["attributes"]["proficiency"]
        spellcasting["spell_attack_bonus"] = int(char["attributes"][spellcasting["spellcast_modifier"]]["modifier"] + char["attributes"]["proficiency"])
        for i in range(1,10):
            slot_val = misc.find("div", {"id": ("val" + str(55+i))}).contents
            if slot_val:
                if slot_val[0].find("-") != -1:
                    slot_val[0] = int(slot_val[0].split('-')[0])
                    

                spellcasting["level_" + str(i) + "_slots"] = int(slot_val[0])
            else:
                spellcasting["level_" + str(i) + "_slots"] = 0
        char["spellcasting"] = spellcasting
        
        
    # Features & Traits #
    features = soup.findAll("div", {"class": "edithide"})
    #print(feats[3].contents)
    features_and_traits = []
    pattern = re.compile("^([A-Z][a-z]+):")
    for i in features[3].contents:
        i = str(i).strip().replace("’", "'")
        #print(i)
        if ":" in i:
            #print(i)
            #print("")
            pattern = re.search("^([A-Za-z' ]+):", i)
            bold_pattern = re.search("\<b\>([A-Za-z':, ]+)\</b\>", i)
            p_pattern = re.search("^\<p\>([A-Za-z', ]+):", i)
            pat_str = ""
            if pattern:
                pat_str = string.capwords(str(pattern.group(0)).rstrip(":"))
            elif bold_pattern:
                pat_str = string.capwords(str(bold_pattern.group(0)).title().split("<B>")[1].rsplit(":</B>")[0])
            elif p_pattern:
                pat_str = string.capwords(str(p_pattern.group(0)).title().split("<P>")[1].rsplit(":")[0])
            
            if (pat_str != "") and (pat_str.lower() not in banned_traits):
                if pat_str not in features_and_traits:
                    features_and_traits.append(pat_str)
        
        else:
            spl = i.split()
            i = " ".join(spl)
            basic_pattern = re.search("^([A-Za-z', ]+)$", i)
            if basic_pattern:
                if i.lower() in legal_traits:
                    pat_str = string.capwords(str(basic_pattern.group(0)))
                    if pat_str not in features_and_traits:
                        features_and_traits.append(pat_str)
                elif "," in i.lower():
                    if ("draconic ancestry" in i.lower() or "favored enemy" in i.lower()):
                        pat_str = string.capwords(str(basic_pattern.group(0)))
                        if pat_str not in features_and_traits:
                            features_and_traits.append(pat_str)
                    else:
                        # print("Rejected: " + " ".join(spl) + " link: " + url)
                        pass
                elif len(spl) > 3:
                    # print("Rejected: " + " ".join(spl) + " link: " + url)
                    pass
                else:
                    pat_str = string.capwords(str(basic_pattern.group(0)))
                    if pat_str not in features_and_traits:
                        features_and_traits.append(pat_str)
            
            #else:
                #print("No Pattern: " + " ".join(spl))
                
    if not features_and_traits:
        raise NoTraitsError
    char["features"] = (features_and_traits)
    
    
    # Inventory #
    inv_list = []
    inv = soup.findAll("div", {"class": "character_panel"})[3].find("div", {"id": "val46"})
    # print(inv.contents)
    for item in inv:
        item_dict = {}
        if str(item) != "<br/>":
            # print(item)
            item_ct = re.search(r'\d+', item)
            if item_ct:
                item_ct = item_ct.group()
                item_name = re.split(r'\d+', item)[1].strip()
                if item_name[-1:] == "s":
                    item_name = item_name[:-1]
                item_dict["name"] = item_name
                item_dict["count"] = int(item_ct)
            else:
                item_dict["name"] = item
                item_dict["count"] = 1
            inv_list.append(item_dict)
    if len(inv_list) <= 0:
        raise NoInvError
    char["inventory"] = inv_list
    
    gold = soup.findAll("div", {"class": "character_panel"})[4].find("div", {"id": "val45"})
    # print(gold.contents)
    if len(gold.contents) > 0:
        gold_cont = re.search(r'\d+', gold.contents[0])
        if gold_cont:
            char["gp"] = int(gold_cont.group())
        else:
            raise BadItemError("Bad gp value: '" + str(gold.contents[0]) + "'")
    else:
        char["gp"] = 0
        
        
    # Proficiencies & Languages #
    profs = soup.findAll("div", {"class": "character_panel"})[5].find("div", {"id": "val47"})
    # print(profs.contents)
    prof_list = []
    lang_list = []
    for p in profs:
        if str(p) != "<br/>":
            p = string.capwords(p)
            if "Proficiencies" in p:
                pr = p.replace("Proficiencies", "", 1).strip(": ").split(",")
                for i in pr:
                    prof_list.append(i.strip())
                    
            if "Languages" in p:
                pr = p.replace("Languages", "", 1).strip(": ").split(",")
                for i in pr:
                    lang_list.append(i.strip())
                    
    if not prof_list or not lang_list:
        raise NoProfError
    char["proficiencies"] = prof_list
    char["languages"] = lang_list
                
    
    # End of char #
    return char

In [13]:
# TESTING #
print(json.dumps(scrape_page(url_list[9]), indent=4))

{
    "name": "Vola Darby",
    "link": "https://www.kassoon.com/dnd/5e/character-sheet/11714/",
    "class": "druid",
    "level": 1,
    "background": "urchin",
    "race": "half-orc",
    "alignment": "lawful good",
    "gender": "female",
    "attributes": {
        "strength": {
            "true_val": 12,
            "points": 10,
            "racial_bonus": 2,
            "modifier": 1
        },
        "dexterity": {
            "true_val": 14,
            "points": 14,
            "racial_bonus": 0,
            "modifier": 2
        },
        "constitution": {
            "true_val": 14,
            "points": 13,
            "racial_bonus": 1,
            "modifier": 2
        },
        "intelligence": {
            "true_val": 12,
            "points": 12,
            "racial_bonus": 0,
            "modifier": 1
        },
        "wisdom": {
            "true_val": 15,
            "points": 15,
            "racial_bonus": 0,
            "modifier": 2
        },
        "c

In [118]:
test_characters = []
for idx, url in enumerate(url_list):
    try:
        test_characters.append(scrape_page(url))
    except ValueError:
        print("ValError at idx " + str(idx) + " link: " + url)
    except IndexError:
        print("IndexError at idx " + str(idx) + " link: " + url)
    except DualSpecError:
        print("DualSpecError at idx " + str(idx) + " link: " + url)
    except NoTraitsError:
        print("NoTraitsError at idx " + str(idx) + " link: " + url)
    except NoInvError:
        print("NoInvError at idx " + str(idx) + " link: " + url)
    except NoProfError:
        print("NoProfError at idx " + str(idx) + " link: " + url)
    except BadItemError as e:
        print("BadItemError at idx " + str(idx) + " link: " + url)
print(json.dumps(test_characters, indent=4))
with open("scraper_output/kassoon_scrapes/test_kassoon_out.json", "w", encoding='utf-8') as outfile:
    json.dump(test_characters, outfile, ensure_ascii=False, indent=4)

NoProfError at idx 3 link: http://www.kassoon.com/dnd/5e/character-sheet/208/adam-fidge-variant-human-bard-5/
NoTraitsError at idx 5 link: http://www.kassoon.com/dnd/5e/character-sheet/55/dodge-lightfoot-halfling-sorcerer2/
DualSpecError at idx 6 link: https://www.kassoon.com/dnd/5e/character-sheet/28/eston-human-bard-1-cleric-1/
BadItemError at idx 8 link: https://www.kassoon.com/dnd/5e/character-sheet/95/
[
    {
        "name": "Elliza Maren",
        "class": "warlock",
        "level": 1,
        "background": "mercenary veteran",
        "race": "drow elf",
        "alignment": "neutral",
        "gender": "female",
        "attributes": {
            "strength": {
                "true_val": 8,
                "points": 8,
                "racial_bonus": 0,
                "modifier": -1
            },
            "dexterity": {
                "true_val": 16,
                "points": 14,
                "racial_bonus": 2,
                "modifier": 3
            },
          

In [7]:
# Scrape by sheet number
num_base = "https://www.kassoon.com/dnd/5e/character-sheet/"
all_chars = []
out_count = 100
total_valid = 0
total_attempts = 0
start = 11635
to_check = 25000
cap = 20000
class_count = {}
level_count = {}
print("Starting...")
for i in range(start, start + to_check):
    total_attempts = total_attempts + 1
    try:
        #print("i = " + str(i))
        all_chars.append(scrape_page(num_base + str(i) + "/"))
        total_valid = total_valid + 1
        if all_chars[-1]["class"] not in class_count:
            class_count[all_chars[-1]["class"]] = 1
        else:
            class_count[all_chars[-1]["class"]] += 1

        if all_chars[-1]["level"] not in level_count:
            level_count[all_chars[-1]["level"]] = 1
        else:
            level_count[all_chars[-1]["level"]] += 1

    except ValueError:
        print("ValError at idx " + str(i) + " link: " + num_base + str(i) + "/")
    except IndexError:
        print("IndexError at idx " + str(i) + " link: " + num_base + str(i) + "/")
    except DualSpecError:
        print("DualSpecError at idx " + str(i) + " link: " + num_base + str(i) + "/")
    except NoTraitsError:
        print("NoTraitsError at idx " + str(i) + " link: " + num_base + str(i) + "/")
    except NoInvError:
        print("NoInvError at idx " + str(i) + " link: " + num_base + str(i) + "/")
    except NoProfError:
        print("NoProfError at idx " + str(i) + " link: " + num_base + str(i) + "/")
    except BadItemError as e:
        print(e)
        print("BadItemError at idx " + str(i) + " link: " + num_base + str(i) + "/")
    
    if len(all_chars) >= 100:
        outf = "scraper_output/kassoon_scrapes/kassoon_out_" + str(out_count) + ".json"
        with open(outf, "w", encoding='utf-8') as outfile:
            json.dump(all_chars, outfile, ensure_ascii=False, indent=4)
        print("")
        print("Printing page " + str(out_count) + " to file")
        print("Total Attempts: " + str(total_attempts))
        print("Total Valid: " + str(total_valid))
        print("")
        print("Class Count:")
        print(class_count)
        print("Level Count:")
        print(level_count)
        print("")
        out_count = out_count + 1
        all_chars = []
    if total_valid >= cap:
        break
if(len(all_chars) > 0):
    print("")
    print("Printing page " + str(out_count) + " to file")

    outf = "scraper_output/kassoon_scrapes/kassoon_out_" + str(out_count) + ".json"
    with open(outf, "w", encoding='utf-8') as outfile:
        json.dump(all_chars, outfile, ensure_ascii=False, indent=4)        

print("Final Count!")
print("Total Attempts: " + str(total_attempts))
print("Total Valid: " + str(total_valid))
print("")
print("Class Count:")
print(class_count)
print("Level Count:")
print(level_count)


# Pass 1:
# Class Count (11634 attempts, 10000 success):
# {'druid': 826, 'paladin': 874, 'sorcerer': 762, 'cleric': 821, 'wizard': 882, 'ranger': 833, 'barbarian': 871, 'monk': 843, 'fighter': 890, 'rogue': 865, 'warlock': 739, 'bard': 793, 'apothecary': 1}
# Level Count:
# {1: 9811, 3: 49, 4: 23, 2: 23, 7: 6, 5: 38, 20: 10, 10: 12, 9: 4, 16: 1, 15: 3, 12: 1, 6: 10, 8: 6, 14: 1, 18: 1, 17: 1}
# Pass 2 (8661 attempts, 8600 success)
# {'druid': 711, 'paladin': 689, 'sorcerer': 736, 'rogue': 708, 'ranger': 731, 'bard': 714, 'warlock': 683, 'fighter': 742, 'wizard': 688, 'monk': 697, 'barbarian': 746, 'cleric': 755}
# Level Count:
# {1: 8600}

Starting...
NoTraitsError at idx 11675 link: https://www.kassoon.com/dnd/5e/character-sheet/11675/

Printing page 99 to file
Total Attempts: 101
Total Valid: 100

Class Count:
{'druid': 8, 'paladin': 9, 'sorcerer': 7, 'rogue': 16, 'ranger': 12, 'bard': 9, 'warlock': 6, 'fighter': 5, 'wizard': 12, 'monk': 5, 'barbarian': 4, 'cleric': 7}
Level Count:
{1: 100}


Printing page 100 to file
Total Attempts: 201
Total Valid: 200

Class Count:
{'druid': 14, 'paladin': 16, 'sorcerer': 20, 'rogue': 28, 'ranger': 23, 'bard': 16, 'warlock': 14, 'fighter': 13, 'wizard': 18, 'monk': 9, 'barbarian': 13, 'cleric': 16}
Level Count:
{1: 200}

NoTraitsError at idx 11857 link: https://www.kassoon.com/dnd/5e/character-sheet/11857/
NoTraitsError at idx 11902 link: https://www.kassoon.com/dnd/5e/character-sheet/11902/

Printing page 101 to file
Total Attempts: 303
Total Valid: 300

Class Count:
{'druid': 23, 'paladin': 24, 'sorcerer': 24, 'rogue': 32, 'ranger': 33, 'bard': 24, 'warlock': 28, 'fighter': 24, 'w

NoTraitsError at idx 14232 link: https://www.kassoon.com/dnd/5e/character-sheet/14232/
NoTraitsError at idx 14234 link: https://www.kassoon.com/dnd/5e/character-sheet/14234/

Printing page 124 to file
Total Attempts: 2617
Total Valid: 2600

Class Count:
{'druid': 203, 'paladin': 234, 'sorcerer': 204, 'rogue': 222, 'ranger': 224, 'bard': 204, 'warlock': 219, 'fighter': 215, 'wizard': 217, 'monk': 200, 'barbarian': 233, 'cleric': 225}
Level Count:
{1: 2600}


Printing page 125 to file
Total Attempts: 2717
Total Valid: 2700

Class Count:
{'druid': 212, 'paladin': 247, 'sorcerer': 214, 'rogue': 227, 'ranger': 230, 'bard': 213, 'warlock': 223, 'fighter': 223, 'wizard': 222, 'monk': 211, 'barbarian': 243, 'cleric': 235}
Level Count:
{1: 2700}


Printing page 126 to file
Total Attempts: 2817
Total Valid: 2800

Class Count:
{'druid': 220, 'paladin': 258, 'sorcerer': 218, 'rogue': 233, 'ranger': 239, 'bard': 221, 'warlock': 228, 'fighter': 231, 'wizard': 229, 'monk': 220, 'barbarian': 250, 'cle

NoTraitsError at idx 16587 link: https://www.kassoon.com/dnd/5e/character-sheet/16587/
NoTraitsError at idx 16617 link: https://www.kassoon.com/dnd/5e/character-sheet/16617/

Printing page 148 to file
Total Attempts: 5032
Total Valid: 5000

Class Count:
{'druid': 405, 'paladin': 419, 'sorcerer': 406, 'rogue': 426, 'ranger': 437, 'bard': 405, 'warlock': 414, 'fighter': 430, 'wizard': 384, 'monk': 391, 'barbarian': 445, 'cleric': 438}
Level Count:
{1: 5000}


Printing page 149 to file
Total Attempts: 5132
Total Valid: 5100

Class Count:
{'druid': 417, 'paladin': 427, 'sorcerer': 419, 'rogue': 436, 'ranger': 441, 'bard': 410, 'warlock': 424, 'fighter': 435, 'wizard': 393, 'monk': 395, 'barbarian': 453, 'cleric': 450}
Level Count:
{1: 5100}


Printing page 150 to file
Total Attempts: 5232
Total Valid: 5200

Class Count:
{'druid': 426, 'paladin': 437, 'sorcerer': 432, 'rogue': 442, 'ranger': 445, 'bard': 420, 'warlock': 432, 'fighter': 441, 'wizard': 401, 'monk': 401, 'barbarian': 468, 'cle

NoTraitsError at idx 18927 link: https://www.kassoon.com/dnd/5e/character-sheet/18927/

Printing page 171 to file
Total Attempts: 7352
Total Valid: 7300

Class Count:
{'druid': 598, 'paladin': 586, 'sorcerer': 638, 'rogue': 610, 'ranger': 619, 'bard': 601, 'warlock': 574, 'fighter': 632, 'wizard': 561, 'monk': 581, 'barbarian': 654, 'cleric': 646}
Level Count:
{1: 7300}

NoTraitsError at idx 19075 link: https://www.kassoon.com/dnd/5e/character-sheet/19075/

Printing page 172 to file
Total Attempts: 7453
Total Valid: 7400

Class Count:
{'druid': 607, 'paladin': 597, 'sorcerer': 643, 'rogue': 616, 'ranger': 628, 'bard': 608, 'warlock': 585, 'fighter': 643, 'wizard': 572, 'monk': 589, 'barbarian': 659, 'cleric': 653}
Level Count:
{1: 7400}


Printing page 173 to file
Total Attempts: 7553
Total Valid: 7500

Class Count:
{'druid': 614, 'paladin': 605, 'sorcerer': 649, 'rogue': 619, 'ranger': 634, 'bard': 624, 'warlock': 596, 'fighter': 652, 'wizard': 582, 'monk': 596, 'barbarian': 664, 'cle

KeyboardInterrupt: 

In [None]:
# Scrape by page
master_page = requests.get("https://www.kassoon.com/dnd/5e/premade-characters/")
link_soup = BeautifulSoup(master_page.content, "html.parser")
chr_list = link_soup.find("tbody", {"id": "tblChrList"}).findAll("tr")
#print(chr_list[0])
base = "http://www.kassoon.com"

cap_page = 1000
all_chars_page = []
for idx,c in enumerate(chr_list):
    c_url = c.find("a", href=True)["href"]
    try:
        all_chars_page.append(scrape_page(base + c_url))
    except ValueError:
        print("ValError at idx " + str(idx) + " link: " + base + c_url)
    except IndexError:
        print("IndexError at idx " + str(idx) + " link: " + base + c_url)
    except DualSpecError:
        print("DualSpecError at idx " + str(idx) + " link: " + base + c_url)
    except NoTraitsError:
        print("NoTraitsError at idx " + str(idx) + " link: " + base + c_url)
    except NoInvError:
        print("NoInvError at idx " + str(idx) + " link: " + base + c_url)
    except NoProfError:
        print("NoProfError at idx " + str(idx) + " link: " + base + c_url)
    except BadItemError as e:
        print(e)
        print("BadItemError at idx " + str(idx) + " link: " + base + c_url)
    if len(all_chars_page) >= cap_page:
        break


In [100]:
# premade page test
original_stdout = sys.stdout
with open("temp_out.txt", "w", encoding='utf-8') as outfile:
    sys.stdout = outfile
    print(BeautifulSoup(requests.get("https://www.kassoon.com/dnd/5e/premade-characters/").content, "html.parser"))
    sys.stdout = original_stdout

In [None]:
# pattern testing
pattern = re.search("^([A-Za-z' ]+)", i)

In [139]:
# pattern testing 2
test = ["Darkvision: Thanks to your elf blood, you have superior vision in dark and dim conditions.You can",
       "ends, you gain the following benefits:",
       "hexblade's Curse: Starting at 1st level, you gain the ability to place a baleful curse on someone. As a bonus action, choose",
       "Hex Warrior:At 1st level , you acquire the traini",
       "Thieves’ Cant: You know thieves’ cant, a"]
for t in test:
    pattern = re.search("^([A-Za-z'’ ]+):", t)
    if pattern:
        print(pattern.group(0))
    else:
        print("Pattern Not Found")

Darkvision:
Pattern Not Found
hexblade's Curse:
Hex Warrior:
Thieves’ Cant:


In [None]:
type(BeautifulSoup)

In [5]:
import webbrowser

chrome_path = "C:\Program Files (x86)\Google\Chrome\Application\chrome.exe %s"

# 1: https://www.kassoon.com/dnd/5e/character-sheet/846/elliza-maren-drow-elf-warlock-1/
# 2: https://www.kassoon.com/dnd/5e/character-sheet/2384/vinheim-high-elf-wizard-1/
# 3: https://www.kassoon.com/dnd/5e/character-sheet/2382/mijira-imbixtellrhyst-dragonborn-rogue-1/
# 4: https://www.kassoon.com/dnd/5e/character-sheet/208/adam-fidge-variant-human-bard-5/

webbrowser.get(chrome_path).open("http://www.kassoon.com/dnd/5e/character-sheet/846/elliza-maren-drow-elf-warlock-1/")
webbrowser.get(chrome_path).open("http://www.kassoon.com/dnd/5e/character-sheet/2384/vinheim-high-elf-wizard-1/")
webbrowser.get(chrome_path).open("http://www.kassoon.com/dnd/5e/character-sheet/2382/mijira-imbixtellrhyst-dragonborn-rogue-1/")
webbrowser.get(chrome_path).open("http://www.kassoon.com/dnd/5e/character-sheet/208/adam-fidge-variant-human-bard-5/")

False