In [None]:
import os
import re
from bs4 import BeautifulSoup
from string import Template
from itertools import takewhile

root = os.path.join(os.getcwd(), 'html')

In [None]:
license = ''
with open('LICENSE', 'r', encoding='utf-8') as infile:
    license = infile.read()

In [None]:
# Utils
def clean_html(html):
    regex = re.compile('<.*?>')
    return re.sub(regex, '', html)

def lcp(*s):
    return ''.join(a for a,b in takewhile(lambda x: x[0] == x[1], zip(min(s), max(s))))

def to_pascal_case(snakeCase):
    return snakeCase.title().replace("_", "")

def to_snake_case(pascalCase):
    return '_'.join(re.findall('[A-Z][^A-Z]*', pascalCase)).upper()

In [None]:
def traverse(root):
    # root is the path to html doc
    files = [ os.path.join(root, f) for f in os.listdir(root) ]
    sources = []
    for filename in files:
        if (os.path.isdir(filename)):
            traverse(filename)
        elif (os.path.isfile(filename)):
            source = read_html_source(filename)
            if source is not None:
                sources.append(source)
    return sources

In [None]:
def read_html_source(filename):
    ext = os.path.splitext(filename)[1]
    if (ext != '.html'):
        return None
    with open(filename, 'r', encoding='utf-8') as infile:
        return infile.read()

In [None]:
def build_dict(sources):
    dictionary = {}
    for source in sources:
        soup = BeautifulSoup(source)
        refsect2 = soup.find_all('div', 'refsect2')
        for ref in refsect2:
            name = ref.find('a')['name']
            header = ref.find('h3').string
            if header is not None:
                dictionary[name] = { 'name': header, 'data': {} }
                # get some extra information for the function
                program = ref.find('pre', 'programlisting')
                if program is not None:
                    dictionary[name]['data']['program'] = program
                # get optional arguments information
                optional = ref.find('ul', 'itemizedlist')
                if optional is not None:
                    dictionary[name]['data']['optional'] = optional
        refsect3 = soup.find_all('div', 'refsect3')
        for ref in refsect3:
            components = ref.find('a')['name'].split('.')
            if len(components) <= 1:
                break
            name = components[0]
            subname = components[1]
            if name in dictionary:
                tbody = ref.find_all('tr')
                dictionary[name]['data'][subname] = tbody
    return dictionary

In [None]:
def traverse_dictionary(dictionary):
    tests = []
    for item in dictionary.values():
        if "enum " in item['name'] and 'members' in item['data']:
            compute_enum(item, tests)

    tests = ''.join(tests)
    # generate enum tests
    with open('VipsEnumTestTemplate.c', 'r', encoding='utf-8') as infile:
        tpl = infile.read()
        with open('test/VipsEnumTest.c', 'w', encoding='utf-8') as outfile:
            src = Template(tpl)
            src = src.substitute({ 'license': license, 'tests': tests })
            outfile.write(src)

In [None]:
# Enums

enum_overwrites = {
    "VIPS_OPERATION_NONE": 0,
    "VIPS_OPERATION_SEQUENTIAL": 1,
    "VIPS_OPERATION_SEQUENTIAL_UNBUFFERED": 2,
    "VIPS_OPERATION_NOCACHE": 4,
    "VIPS_OPERATION_DEPRECATED": 8,
    "VIPS_FOREIGN_NONE": 0,
    "VIPS_FOREIGN_PARTIAL": 1,
    "VIPS_FOREIGN_BIGENDIAN": 2,
    "VIPS_FOREIGN_SEQUENTIAL": 4,
    "VIPS_FOREIGN_ALL": 7,
}

def compute_enum(item, tests):
    members = []
    # remove 'enum' prefix
    name = item['name'].split(' ')[1]
    tbody = item['data']['members']
    for tr in tbody:
        member_name = tr.find('td', 'enum_member_name').p.contents[1]
        member_desc = tr.find('td', 'enum_member_description')
        description = None
        if member_desc and member_desc.p:
            description = member_desc.p.string
        members.append({ 'name': member_name, 'description': description})
    with open('EnumTemplate.java', 'r', encoding='utf-8') as infile:
        tpl = infile.read()
        sep = ',\n'
        cpt = 0
        values = []
        tests.append(f'    // {name}\n')
        for member in members:
            cname = member['name']
            value = to_pascal_case(cname)
            description = member['description']
            index = len(lcp(value, name))
            fcpt = cpt
            if cname in enum_overwrites:
                fcpt = enum_overwrites[cname]
            value = value[index:]
            tests.append(f'    assertEqualsNativeEnumValue(env, {cname}, "com/criteo/vips/enums/{name}", "{value}");\n')
            value = f"    {value}({fcpt})"
            cpt += 1
            if description is not None:
                value = f"    // {description}\n{value}"
            values.append(value)
        # generate enum class file
        with open(f'enums/{name}.java', 'w', encoding='utf-8') as outfile:
            src = Template(tpl)
            values = sep.join(values)
            src = src.substitute({ 'license': license, 'name': name, 'values': values })
            outfile.write(src)

In [None]:
def generate_wrapper():
    sources = traverse(root)
    dictionary = build_dict(sources)
    traverse_dictionary(dictionary)

generate_wrapper()