Skip to content

Commit

Permalink
Add Python instruction docs (#4290)
Browse files Browse the repository at this point in the history
  • Loading branch information
RubenRBS committed Nov 15, 2022
1 parent 1721f67 commit 61ff341
Show file tree
Hide file tree
Showing 5 changed files with 957 additions and 0 deletions.
1 change: 1 addition & 0 deletions etc/config/python.defaults.properties
@@ -1,5 +1,6 @@
compilers=&python3def
defaultCompiler=python38def
instructionSet=python

group.python3def.compilers=python35def:python36def:python37def:python38def:python39def:python310def:python311def
group.python3def.isSemVer=true
Expand Down
136 changes: 136 additions & 0 deletions etc/scripts/docenizers/docenizer-python.py
@@ -0,0 +1,136 @@
#! /usr/bin/env python3
# -*- coding: utf-8 -*-
import argparse
import json
import os
import sys
import urllib
from urllib import request
from urllib import parse

try:
from bs4 import BeautifulSoup
except ImportError:
raise ImportError(
"Please install BeautifulSoup (apt-get install python3-bs4 or pip install beautifulsoup4 should do it)")

parser = argparse.ArgumentParser(description='Docenizes HTML version of the official Python documentation')
parser.add_argument('-i', '--inputfolder', type=str,
help='Folder where the input files reside as .html. Default is ./python-inst-docs/',
default='python-inst-docs')
parser.add_argument('-o', '--outputpath', type=str, help='Final path of the .js file. Default is ./python-inst-docs.js',
default='./python-inst-docs.js')
parser.add_argument('-d', '--downloadfolder', type=str,
help='Folder where the archive will be downloaded and extracted', default='python-inst-docs')

# The maximum number of paragraphs from the description to copy.
MAX_DESC_PARAS = 5

# Where to extract the asmdoc archive.
ARCHIVE_URL = "https://docs.python.org/3/library/dis.html"
ARCHIVE_NAME = "dis.html"


class Instruction(object):
def __init__(self, name, names, tooltip, body):
self.name = name
self.names = names
self.tooltip = tooltip.rstrip(': ,')
self.body = body

def __str__(self):
return f"{self.name} = {self.tooltip}\n{self.body}"


def get_url_for_instruction(instr):
return f"https://docs.python.org/3/library/dis.html#opcode-{urllib.parse.quote(instr.name)}"


def download_asm_doc_archive(downloadfolder):
if not os.path.exists(downloadfolder):
print(f"Creating {downloadfolder} as download folder")
os.makedirs(downloadfolder)
elif not os.path.isdir(downloadfolder):
print(f"Error: download folder {downloadfolder} is not a directory")
sys.exit(1)
archive_name = os.path.join(downloadfolder, ARCHIVE_NAME)
print("Downloading archive...")
urllib.request.urlretrieve(ARCHIVE_URL, archive_name)


def get_description_paragraphs(opcode):
ps = opcode.find('dd').findAll('p')
return [p.text for p in ps]


def parse(f):
doc = BeautifulSoup(f, 'html.parser')
table = doc.find('section', {'id': 'python-bytecode-instructions'})

opcodes = table.findAll('dl', {'class': 'std opcode'})
instructions = []
for opcode in opcodes:
opcode_name = opcode.find('span', {'class': 'pre'}).text
opcode_desc = get_description_paragraphs(opcode)
instructions.append(Instruction(
opcode_name,
[opcode_name],
opcode_desc[0],
'\n'.join(opcode_desc))
)
return instructions


def parse_html(directory):
print("Parsing instructions...")
instructions = []
try:
with open(os.path.join(directory, ARCHIVE_NAME), encoding='utf-8') as f:
instructions = parse(f)
except Exception as e:
print(f"Error parsing {ARCHIVE_NAME}:\n{e}")

return instructions


def main():
args = parser.parse_args()
print(f"Called with: {args}")
# If we don't have the html folder already...
if not os.path.isdir(os.path.join(args.inputfolder, 'html')):
try:
download_asm_doc_archive(args.downloadfolder)
except IOError as e:
print("Error when downloading archive:")
print(e)
sys.exit(1)
instructions = parse_html(args.inputfolder)
instructions.sort(key=lambda b: b.name)
all_inst = set()
for inst in instructions:
if not all_inst.isdisjoint(inst.names):
print(f"Overlap in instruction names: {inst.names.intersection(all_inst)} for {inst.name}")
all_inst = all_inst.union(inst.names)
print(f"Writing {len(instructions)} instructions")
with open(args.outputpath, 'w') as f:
f.write("""
export function getAsmOpcode(opcode) {
if (!opcode) return;
switch (opcode.toUpperCase()) {
""")
for inst in instructions:
for name in sorted(inst.names):
f.write(f' case "{name}":\n')
f.write(' return {}'.format(json.dumps({
"tooltip": inst.tooltip,
"html": inst.body,
"url": get_url_for_instruction(inst)
}, indent=16, separators=(',', ': '), sort_keys=True))[:-1] + ' };\n\n')
f.write("""
}
}
""")


if __name__ == '__main__':
main()
1 change: 1 addition & 0 deletions lib/asm-docs/_all.ts
Expand Up @@ -28,3 +28,4 @@ export {AvrDocumentationProvider} from './avr';
export {JavaDocumentationProvider} from './java';
export {LLVMDocumentationProvider} from './llvm';
export {Mos6502DocumentationProvider} from './mos6502';
export {PythonDocumentationProvider} from './python';

0 comments on commit 61ff341

Please sign in to comment.