In [1]:
import requests
from itertools import chain
from collections import namedtuple
import logging

from lxml import etree
from pprint import pprint
from IPython.core.display import display, HTML


url = f"https://api.github.com/repos/BSData/wh40k/contents/"
files = requests.get(url).json()
catalogues = []
# 
for file in files:
    if file['type'] == 'file' and file['name'][0] != '.':
        catalogues.append(file['name'])

In [2]:
with open('src/battlescribe/gamesystem_schema.xsd') as fh:
    gamesystem_schema = etree.XMLSchema(etree.fromstring('\n'.join(fh.readlines())))

In [3]:
path = 'https://raw.githubusercontent.com/BSData/wh40k/master/{}'.format('Warhammer 40,000.gst')
r = requests.get(path)
base = etree.fromstring(r.content, etree.XMLParser(remove_blank_text=True, schema = None))

In [4]:
# look at other cats for better undestanding of sharedSelectionEntryGroups
# handle unit types from main rules, restrictions (min, max)
# print unit compositions unit (min, max), equipments (min, max)

# work with more cats:
# profileTypes, selectionEntries, forceEntries, rules, sharedRules, infoLinks, catalogueLinks, sharedInfoGroups, costTypes
# work with more of the base rules

In [5]:
def load_catalogue(root, base):
    sanity_check(root)
    namespace = {
        'c': 'http://www.battlescribe.net/schema/catalogueSchema',
        'gs': 'http://www.battlescribe.net/schema/gameSystemSchema'}
    data = {}
    data[root.attrib['id']] = {'type': 'catalogue_attributes', **root.attrib}
    
    convert_publications(root, data, namespace)
    convert_categories(root, base, data, namespace)
    convert_entries(root, data, namespace)
    convert_shared_selection_entries(root, data, namespace)
    convert_shared_profile(root, base, data, namespace)
    convert_shared_selection_entry_groups(root, data, namespace)

    return display_profiles(data)

In [6]:
def convert_publications(root, data, namespace):
    data['publications'] = {
        entry.attrib['id']: entry.attrib['name'] 
        for entry in root.xpath("/c:catalogue/c:publications/c:publication", namespaces=namespace)}

In [7]:
def convert_categories(root, base, data, namespace):
    data['categories'] = {
        entry.attrib['id']: entry.attrib['name']
        for entry in root.xpath("/c:catalogue/c:categoryEntries/c:categoryEntry", namespaces=namespace)}

    data['categories'].update({
        entry.attrib['id']: entry.attrib['name']
        for entry in base.xpath("/gs:gameSystem/gs:categoryEntries/gs:categoryEntry", namespaces=namespace)
    })

In [8]:
def convert_entries(root, data, namespace):
    data['entries'] = {
        entry.attrib['id']: {
            **entry.attrib, 
            "links": [link.attrib for link in entry.xpath("c:categoryLinks/c:categoryLink", namespaces=namespace)],
            "modifiers": [{**m.attrib} for m in entry.xpath("c:modifiers/c:modifier", namespaces=namespace)]}
        for entry in root.xpath("/c:catalogue/c:entryLinks/c:entryLink", namespaces=namespace)}

In [9]:
def extract_characterstics(p, schema, namespace):
    type_select = {
        'Unit': (Model, model_labels),
        'Weapon': (Weapon, weapon_labels),
        'Abilities': (Ability, ability_labels),
        'Psyker': (Psyker, psyker_label),
        'Psychic Power': (Power, power_labels),
        'Keywords': (Keywords, keywords_labels),
        'Wound Track': (WoundTrack, wound_track_labels),
        'Transport': (Transport, transport_labels),
        'Distort Fields': (DistortFields, distort_fields_labels)
    }
    
    struc, label_lookup = type_select[p.attrib['typeName']]
    if type_select.get(p.attrib['typeName']):
        return struc(name=p.attrib['name'],
            **{
                label_lookup[c.attrib['name']]: c.text 
                for c in p.xpath("{0}:characteristics/{0}:characteristic".format(schema), namespaces=namespace)
            })
    else:
        logging.warning("Unknown type {}".format(p.attrib['typeName']))

In [10]:
def make_selection_entry(entry, namespace):
    return {
        **entry.attrib,
        "profiles": [extract_characterstics(p, 'c', namespace) for p in entry.xpath("c:profiles/c:profile", namespaces=namespace)],
        "constraints": [c.attrib for c in entry.xpath("c:constraints/c:constraint", namespaces=namespace)],
        "info_links": [link.attrib for link in entry.xpath("c:infoLinks/c:infoLink", namespaces=namespace)],
        "selection_entries": [make_selection_entry(e, namespace) for e in entry.xpath("c:selectionEntries/c:selectionEntry", namespaces=namespace)],
        "entry_links": [c.attrib for c in entry.xpath("c:entryLinks/c:entryLink", namespaces=namespace)],
        "costs": [(c.attrib['name'], c.attrib['value']) for c in entry.xpath("c:costs/c:cost", namespaces=namespace) if float(c.attrib['value']) > 0],
        "category_links": [link.attrib for link in entry.xpath("c:categoryLinks/c:categoryLink", namespaces=namespace)],
    }

In [11]:
# Need Transport and Vehicle stats
# Keywords, Wound Track, Transport. Distort Fields
model_labels = {
    'M': 'movement', 
    'WS': 'weapon_skill', 
    'BS': 'ballistic_skill', 
    'S': 'strength', 
    'T': 'toughness', 
    'W': 'wounds', 
    'A': 'attacks', 
    'Ld': 'leadership', 
    'Save': 'armour'}

weapon_labels = {
    'Range': 'range', 
    'Type': 'type', 
    'S': 'strength', 
    'AP': 'armour_piercing', 
    'D': 'damage', 
    'Abilities': 'abilities'
}

ability_labels = {
    'Description': 'description'
}

psyker_label = {
    "Cast": "cast",
    "Deny": "deny",
    "Powers Known": "known_powers",
    "Other": "other",
}

power_labels = {
    "Warp Charge": "warp_charge",
    "Range": "range",
    "Details": "details",
}

keywords_labels = {
    'Keywords (Faction)': 'faction_keywords',
    'Keywords (Basic)': 'basic_keywords',
}

wound_track_labels = {
    "Remaining W": "remaining_wounds",
    "Characteristic 1": "characteristic1",
    "Characteristic 2": "characteristic2",
    "Characteristic 3": "characteristic3",
}

transport_labels = {
    'Capacity': 'capacity'
}

distort_fields_labels = {
    "Distance Moved": "distance_moved",
    "Invulnerable Save": "invulnerable_save",
}

Model = namedtuple(
    'Model', ['name', 'movement', 'weapon_skill', 'ballistic_skill', 'strength', 'toughness', 'wounds', 'attacks', 'leadership', 'armour'])
Weapon = namedtuple(
    'Weapon', ['name', 'range', 'type', 'strength', 'armour_piercing', 'damage', 'abilities'])
Ability = namedtuple(
    'Ability', ['name', 'description'])
Psyker = namedtuple(
    'Psyker', ['name', 'cast', 'deny', 'known_powers', 'other'])
Power = namedtuple(
    'Power', ['name', 'warp_charge', 'range', 'details']
)
Keywords = namedtuple(
    'Keywords', ['name', 'faction_keywords', 'basic_keywords']
)
WoundTrack = namedtuple(
    'WoundTrack', ['name', 'remaining_wounds', 'characteristic1', 'characteristic2', 'characteristic3']
)
Transport = namedtuple(
    'Transport', ['name', 'capacity']
)
DistortFields = namedtuple(
    'DistortFields', ['name', 'distance_moved', 'invulnerable_save']
)

In [12]:
def convert_shared_selection_entries(root, data, namespace):
    data['shared_selection_entries'] = {
        entry.attrib['id']: make_selection_entry(entry, namespace)
        for entry in root.xpath("/c:catalogue/c:sharedSelectionEntries/c:selectionEntry", namespaces=namespace)}

In [13]:
def convert_shared_selection_entry_groups(root, data, namespace):
    data['shared_selection_entry_groups'] = {
        shared_group.attrib['id']: {
            **shared_group.attrib,
            'selection_entries': [
                make_selection_entry(entry, namespace)
                for entry in shared_group.xpath("c:selectionEntries/c:selectionEntry", namespaces=namespace)],
            'links': [
                {**link.attrib, 'constraints': [c.attrib for c in link.xpath("c:constraints/c:constraint", namespaces=namespace)]}
                for link in shared_group.xpath("c:entryLinks/c:entryLink", namespaces=namespace)
            ]
        }
        for shared_group in root.xpath("/c:catalogue/c:sharedSelectionEntryGroups/c:selectionEntryGroup", namespaces=namespace)}

In [14]:
def convert_shared_profile(root, base, data, namespace):
    data['shared_profile'] = {
        shared_profile.attrib['id']: {
            'name': shared_profile.attrib['name'], 
            'type': shared_profile.attrib['typeName'],
            'characteristics': extract_characterstics(shared_profile, 'c', namespace)}
        for shared_profile in root.xpath("/c:catalogue/c:sharedProfiles/c:profile", namespaces=namespace)}

    data['shared_profile'].update({
        shared_profile.attrib['id']: {
            'name': shared_profile.attrib['name'], 
            'type': shared_profile.attrib['typeName'],
            'characteristics': extract_characterstics(shared_profile, 'gs', namespace)}
        for shared_profile in base.xpath("/gs:gameSystem/gs:sharedProfiles/gs:profile", namespaces=namespace)})

In [15]:
def profile_search(data, entry):
    """Searches for all the parts of a selection entry for child seleciton entries and profiles and entry_links"""
    if len(entry.get('selection_entries', [])) > 0:
        for y in entry['selection_entries']:
            for x in profile_search(data, y):
                yield x
    for p in entry.get('profiles', []):
        yield p
    for il in entry.get('info_links', []):
        if il['type'] == 'profile':
            yield data['shared_profile'][il['targetId']]['characteristics']
        else:
            logging.warning("Unknown info link type {}".format(il['type']))
    for l in entry.get('entry_links', []):
        if l['type'] == 'selectionEntry':
            target = data['shared_selection_entries'].get(l['targetId'])
            if target:
                for p in target['profiles']:
                    yield p
                for il in target['info_links']:
                    if il['type'] == 'profile':
                        yield data['shared_profile'][il['targetId']]['characteristics']
                    else:
                        logging.warning("Unknown info link type {}".format(il['type']))
            else:
                logging.warning("Unknown reference {} {} {}".format(l['type'], l.get('name', '<no-name>'), l['targetId']))
        elif l['type'] == 'selectionEntryGroup':
            shared_entry = data['shared_selection_entry_groups'].get(l['targetId'])
            if shared_entry:
                for ge in profile_search(data, shared_entry):
                    yield ge
            else:
                logging.warning("Unknown reference {} {} {}".format(l['type'], l.get('name', '<no-name>'), l['targetId']))
        else:
            logging.warning("Unknown type {}".format(l['type']))

In [16]:
def get_restrictions(entry):
    constraints = {}
    for c in entry['constraints']:
        assert c['field'] in ('selections', 'roster'), "Unknown field type {}".format(c)
        assert c['scope'] in ('force', 'roster'), "Unknown scope type {}".format(c)
        constraints[c['type']] = int(float(c['value']))
    if 'min' in constraints.keys() or 'max' in constraints.keys():
        return "{}-{}".format(constraints.get('min', 0), constraints.get('max', 'inf'))
    else:
        return ''

In [17]:
def find_troop_type(v, keywords):
    """Some units don't seem to have type listed as a category, only as a keyword"""
    for k in keywords:
        if k in (
            'HQ', 'Elites', 'Troops', 'Fast Attack', 'Heavy Support', 
            'Dedicated Transport', 'Flyer', 'Lord of War'):
            return k
    logging.warning("Could not get troop type {}".format(v['links']))
    return 'unknown'

In [18]:
def print_vehicle_wound_track(models, wound_track):
    if wound_track:
        characterstics = ['Name', 'Remaining Wounds']
        for attribute, value in zip(models[0]._fields, list(models[0])):
            if value == '*':
                characterstics.append(attribute)
        return format_table(
            characterstics, len(characterstics) * [1 / len(characterstics)], wound_track)
    else:
        return ''

In [19]:
# format to be displayed in tables
model_attributes = ['Name', 'Move', 'WS', 'BS', 'S', 'T', 'W', 'A', 'L', 'Sv']
model_widths = [0.3, 0.1, 0.075, 0.075, 0.075, 0.075, 0.075, 0.075, 0.075, 0.075]
weapon_attributes = ['Weapon', 'Range', 'Type', 'S', 'AP', 'D', 'Abilities']
weapon_widths = [0.3, 0.05, 0.1, 0.05, 0.05, 0.05, 0.4]
#assert sum(model_widths) == 1
assert sum(weapon_widths) == 1, "width is {}".format(sum(weapon_widths))

def format_table(column_names, column_widths, entries):
        if entries:
            return '''
        <table style="width:75%">
            <tr>
                {}
            </tr>
            {}
        </table> '''.format(
                ''.join(
                    '<th style="width: {w}%;"><b>{x}</b></th>'.format(x=x, w=w*100)
                    for x, w in zip(column_names, column_widths)),
                ''.join('<tr>{}</tr>'.format(
                    ''.join('<td>{}</td>'.format(i) for i in list(row) if i)) for row in entries))
        else:
            return ''

In [20]:
def display_profiles(data):
    names = []
    datasheets = []
    for k, v in data['entries'].items():
        entry = data['shared_selection_entries'].get(v['targetId'])
        if entry:
            if entry['hidden'] == 'false':
                name = v['name'] or entry['name']
                keywords = [data['categories'].get(l['targetId'], l.get('name', 'unknown')) for l in entry['category_links']]
                faction = [x.split("Faction: ")[1] for x in keywords if 'Faction' in x]
                other_keywords = [x for x in keywords if not 'Faction' in x]
                items = list(profile_search(data, entry))
                models = set(i for i in items if type(i) == Model)
                weapons = set(i for i in items if type(i) == Weapon)
                abilities = [i for i in items if type(i) == Ability]
                psyker = [i for i in items if type(i) == Psyker]
                power = [i for i in items if type(i) == Power]
                keywords = [i for i in items if type(i) == Keywords]
                wound_track = [i for i in items if type(i) == WoundTrack]
                transport = [i for i in items if type(i) == Transport]
                distort_field = [i for i in items if type(i) == DistortFields] # custom eldar thing

                try:
                    troop_type = data['categories'][[x for x in v['links'] if x['primary'] == 'true'][0]['targetId']]
                except:
                    troop_type = find_troop_type(v, other_keywords)

                entry_html = [
            entry['type'],
            '<h1>{} {}</h1>'.format(name, get_restrictions(entry)),
            '<p>{} [{}]</p>'.format('[{}]'.format(troop_type), ", ".join("{}:{}".format(x[0], int(float(x[1]))) for x in entry['costs'])),
            format_table(model_attributes, model_widths, models),
            print_vehicle_wound_track(list(models), wound_track),
            'Unit contains x of y, x of y with a,b,c<br>',
            format_table(weapon_attributes, weapon_widths, weapons),
            format_table([], [], transport),
            '<b>Abilities:</b><ul>{}</ul>'.format("<br>".join(["<li><b>{}</b> {}</li>".format(a.name, a.description) for a in abilities])),
            '<b>Faction Keywords:</b> {}<br>'.format(", ".join(faction)),
            '<b>Keywords:</b> {}'.format(", ".join(other_keywords))
                ]
                names.append(name)
                datasheets.append(widgets.HTML("".join(entry_html)))
            elif entry['hidden'] == 'true':
                logging.warning("Hidden Entry")
        else:
            logging.warning("No Entry {}".format(v.get('name', v)))
    accordion = widgets.Accordion(children=datasheets)
    for i, name in enumerate(names):
        accordion.set_title(i, name)
    return accordion

In [21]:
def download_catalogue(filename, catelogue_schema):
    path = 'https://raw.githubusercontent.com/BSData/wh40k/master/{}'.format(filename)
    r = requests.get(path)
#     root = etree.fromstring(r.content, etree.XMLParser(
#         remove_blank_text=True, compact=False, schema=catelogue_schema))
    return r.content

def convert_catalogue(filename):
    root = download_catalogue(filename)
    return load_catalogue(root, base)

In [22]:
# import lxml

# def merge_xml(filenames):
#     """This merges the cat files in each of the top level tags"""
#     merged = lxml.etree.fromstring("""
# <catalogue id="30b2-6f64-b85e-b4dc" name="Aeldari - Craftworlds" revision="97" battleScribeVersion="2.03" authorName="BSData Developers" authorContact="@FarseerV @WindstormSCR" authorUrl="https://discord.gg/KqPVhds" library="false" gameSystemId="28ec-711c-d87f-3aeb" gameSystemRevision="134" xmlns="http://www.battlescribe.net/schema/catalogueSchema">
# </catalogue>
#     """)
#     for filename in filenames:
#         print(filename)
#         path = 'https://raw.githubusercontent.com/BSData/wh40k/master/{}'.format(filename)
#         r = requests.get(path)
#         for element in lxml.etree.fromstring(r.content):
#             for child in element:
#                 section = next(merged.iter(element.tag), None)
#                 if not section:
#                     section = etree.SubElement(merged, element.tag)
#                 section.append(child)
#     return merged
    
# merged = merge_xml([c for c in catalogues if '.cat' in c])
# with open("merged.xml", "wb") as output:
#     output.write(etree.tostring(merged, pretty_print=True))

In [23]:
with open('src/battlescribe/catelogue_schema.xsd', 'rb') as fh:
    catelogue_schema = etree.XMLSchema(etree.fromstring(fh.read()))


for c in catalogues:
    if '.cat' in c:
        try:
            download_catalogue(c, catelogue_schema)
            print("Success", c)
        except Exception as e:
            print("Failed", c)
            print(e)

Success Aeldari - Craftworlds.cat
Success Aeldari - Drukhari.cat
Success Aeldari - FW Corsairs.cat
Success Aeldari - Harlequins.cat
Success Aeldari - Ynnari.cat
Success Chaos - Chaos Knights.cat
Success Chaos - Chaos Space Marines.cat
Success Chaos - Daemons.cat
Success Chaos - Dark Mechanicum.cat
Success Chaos - Death Guard.cat
Success Chaos - FW Heretic Astartes.cat
Success Chaos - FW Renegade and Heretics.cat
Success Chaos - Gellerpox Infected.cat
Success Chaos - Servants of the Abyss.cat
Success Chaos - Thousand Sons.cat
Success Chaos - Titanicus Traitoris.cat
Success Fallen.cat
Success Imperium - Adepta Sororitas.cat
Success Imperium - Adeptus Astra Telepathica.cat
Success Imperium - Adeptus Custodes.cat
Success Imperium - Adeptus Mechanicus.cat
Success Imperium - Astra Militarum - Library.cat
Success Imperium - Astra Militarum.cat
Success Imperium - Black Templars.cat
Success Imperium - Blackstone Fortress.cat
Success Imperium - Blood Angels.cat
Success Imperium - Dark Angels.cat

In [24]:
import ipywidgets as widgets

dropdown = widgets.Dropdown(
    options=catalogues,
    value=catalogues[0],
    description='Catelogue:',
    disabled=False,
)
output2 = widgets.Output()
display(dropdown, output2)

def on_value_change(change):
    output2.clear_output()
    output2.append_display_data(widgets.HTML("<h1>{}</h1>".format(change['new'])))
    output2.append_display_data(download_catalogue(change['new']))

dropdown.observe(on_value_change, names='value')
on_value_change({'new': catalogues[0]})

Dropdown(description='Catelogue:', options=('Aeldari - Craftworlds.cat', 'Aeldari - Drukhari.cat', 'Aeldari - …

Output()

HTML(value='<h1>Aeldari - Craftworlds.cat</h1>')

TypeError: download_catalogue() missing 1 required positional argument: 'catelogue_schema'