In [None]:
import re
import json
import requests
import pandas
import pathlib
from collections import OrderedDict
from markdown2 import markdown

Get raw data

In [None]:
rune_groups = dict()
stdlib_groups = dict()

In [None]:
for item in ["bar","buc","cen","col","dot","fas","ket","lus","mic","sig","tis","wut","zap"]:
    result = requests.get("https://raw.githubusercontent.com/urbit/developers.urbit.org/main/content/reference/hoon/rune/{}.md".format(item))
    if(result.status_code == 200):
        rune_groups[item] = result.text
    else:
        print(result.status_code)
        raise Exception(result.text)

In [None]:
for item in ["1a", "1b", "1c", "2a", "2b", "2c", "2d", "2e", "2e", "2f", "2g", "2h", "2i", "2j",
     "2k", "2l", "2m", "2n", "2o", "2p", "2q", "3a", "3b", "3c", "3d", "3e", "3f", "3g", "4a", "4b",
     "4c", "4d", "4e", "4f", "4g", "4h", "4i", "4j", "4k", "4l", "4m", "4n", "4o", "5a", "5b", "5c",
     "5d", "5e", "5f"]:
    result = requests.get("https://raw.githubusercontent.com/urbit/developers.urbit.org/main/content/reference/hoon/stdlib/{}.md".format(item))
    if(result.status_code == 200):
        stdlib_groups[item] = result.text
    else:
        print(result.status_code)
        raise Exception(result.text)

Convert tables to markdown

In [None]:
rune_groups.keys()

In [None]:
raw_runes = ""
for name in rune_groups.keys():
    start_ix = rune_groups[name].index("## `")
    raw_runes += "\n" + rune_groups[name][start_ix:]
raw_runes

In [None]:
raw_splits = list(filter(lambda x: len(x), raw_runes.split("\n## ")))
rune_keys = list(map(lambda x: x[1:3], raw_splits))
rune_dict = dict(zip(rune_keys, raw_splits))
rune_dict

In [None]:
print(rune_dict['|_'])

In [None]:
def convert_to_vs_markdown(a):
  # a = a.replace("`", "\`")
  match = re.search("{% table %}\n([\s\S]*?){% /table %}", a)
  while match:
    b = match.group(1).split("---\n")
    c = list(map(lambda x: x.split("\n- ")[1:], b))

    table_string = ""
    for i, d in enumerate(c):
      if(not d):
        continue
      x = d[1].split("```")
      if len(x) == 3:
        if(x[1].lower().startswith("hoon")):
          x[1] = x[1][5:]
        d[1] = "<pre>{}</pre>".format(x[1].replace("```hoon","```").replace("\n", "<p>").replace("|", "&verbar;"))
      else:
        d[1] = d[1].replace("\n","")
      d[0] = d[0].replace("\n","")
      table_string += "| {} | {} |\n".format(d[0],d[1])
      if(i == 0):
        table_string += "| :----: | :---: |\n"

    a = a[:match.start()] + table_string + a[match.end():]
    # print(table_string)
    match = re.search("{% table %}\n([\s\S]*?){% /table %}", a)
  return a

In [None]:
print(convert_to_vs_markdown(rune_dict['|_']))

In [None]:
parsed_runes = {k: convert_to_vs_markdown(v) for k,v in rune_dict.items()}
parsed_runes

In [None]:
print(parsed_runes['|_'])

In [None]:
html_runes = {k: markdown(v, extras='tables').replace("&verbar;", "|") for k,v in parsed_runes.items()}
html_runes

In [None]:
print(html_runes['|_'])

In [None]:
pathlib.Path("hoon-dictionary.json").write_text(json.dumps(html_runes, indent=4))