/
split_exercise.py
executable file
·56 lines (38 loc) · 1.83 KB
/
split_exercise.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
#!/usr/bin/env python
import sys
import os
import re
from bs4 import BeautifulSoup
if len(sys.argv) < 2:
print('Usage: split_exercise.py filename0.html [filename1.html] ...')
sys.exit(1)
all_exercises = []
for fn in sys.argv[1:]:
basename, ext = os.path.splitext(fn)
fn_solutions_stripped = basename + '_no_solutions' + ext
with open(fn, encoding='utf-8') as f:
html = f.read()
bs = BeautifulSoup(html, 'html.parser')
content = bs.find(id='notebook-container')
exercises = re.findall('<!-- exercise begin -->(.*?)<!-- exercise end -->',
str(content), flags=re.DOTALL)
print('{} exercises found in {}'.format(len(exercises), fn))
if exercises:
exercises.insert(0, '<h2>File: {}</h2>'.format(fn))
all_exercises.extend([BeautifulSoup(e, "html.parser").prettify()
for e in exercises])
html_no_exercise = re.sub('<!-- solution begin -->(.*?)<!-- solution end -->',
'', html, flags=re.DOTALL)
with open(fn_solutions_stripped, 'w', encoding='utf-8') as f:
f.write(html_no_exercise)
# We would like to replace the HTML inside `content` with the HTML of
# the exercizes. Unfortunately, Beautiful Soup does escaping upon
# assignment to `content.string`, so we just add a token here, and
# then do the replacement right before writing to file.
content.string = '{{REPLACE_ME}}'
with open(os.path.join(os.path.dirname(fn), 'exercises.html'), 'w', encoding='utf-8') as f:
exercise_text = '\n<hr/>\n'.join(all_exercises)
exercise_text = exercise_text.replace('<!-- solution begin', '<p><br/></p><!-- solution begin')
# Remove CSS rule that adds newlines after inline code snippets
bs = str(bs).replace('white-space: pre-wrap;', '')
f.write(bs.replace('{{REPLACE_ME}}', exercise_text))