Skip to content
This repository was archived by the owner on Mar 7, 2025. It is now read-only.

Commit a2a4df0

Browse files
committed
moved images to new format, updated with dockerfile and new processing scripts
1 parent 775762f commit a2a4df0

File tree

92 files changed

+152
-86
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

92 files changed

+152
-86
lines changed

.build_scripts/clean.py

Lines changed: 40 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
11
import os
22
import re
3-
from shutil import copyfile
3+
import shutil
4+
import pathlib
45

56
import frontmatter
67
import toml
78

89

10+
911
def main():
1012
extensions = {}
1113
with open('config.toml', 'r') as f:
@@ -31,19 +33,27 @@ def main():
3133
if ext != '.md':
3234
copyfile(os.path.join('content', item), os.path.join('pdf-build',item))
3335
# move image files
34-
if 'images' in os.listdir('content'):
35-
for path, _, files in os.walk('content/images'):
36-
for filename in files:
37-
ext = os.path.splitext(filename)[1]
38-
if ext != '.md':
39-
copyfile(os.path.join(path, filename), os.path.join('pdf-build',filename))
40-
else:
41-
for path, _, files in os.walk('content/pages'):
42-
for filename in files:
43-
ext = os.path.splitext(filename)[1]
44-
if ext != '.md':
45-
copyfile(os.path.join(path, filename), os.path.join('pdf-build',filename))
46-
images = [f for f in os.listdir('pdf-build') if re.search(r'.*\.(jpe?g|png|gif)$', f)]
36+
for path, _, files in os.walk('content/pages'):
37+
for filename in files:
38+
ext = os.path.splitext(filename)[1]
39+
if ext != '.md':
40+
copyfile(os.path.join(path, filename), os.path.join('pdf-build',filename))
41+
for path, _, files in os.walk('static/'):
42+
p = pathlib.Path(path)
43+
cleaned_path = p.relative_to(*p.parts[:2])
44+
pdf_path = pathlib.Path('pdf-build').joinpath(p.relative_to(*p.parts[:2]))
45+
if not os.path.exists(pdf_path):
46+
os.makedirs(pdf_path)
47+
for filename in files:
48+
ext = os.path.splitext(filename)[1]
49+
if ext.lower() in ['.jpg','.jpeg','.png','.gif']:
50+
copy_parents(os.path.join(path, filename),'pdf-build', 1)
51+
images = []
52+
for path, d, f in os.walk('pdf-build'):
53+
p = pathlib.Path(*pathlib.Path(path).parts[1:])
54+
for file in f:
55+
if re.search(r'.*\.(jpe?g|png|gif)$', file):
56+
images.append(os.path.join(p, file))
4757
for path, _, files in os.walk('content/pages'):
4858
for filename in files:
4959
ext = os.path.splitext(filename)[1]
@@ -53,15 +63,15 @@ def main():
5363
contents.append({'name':filename, 'content': content})
5464
else:
5565
continue
56-
full_pdf_content = ""
66+
full_pdf_content = "\n\n\pagebreak\n\n"
5767
content = clean_markdown("content", "_index.md", images)
5868
if content:
5969
full_pdf_content += content
6070
full_pdf_content += "\n\n\pagebreak\n\n"
6171
for item in sorted(contents, key=lambda k: k['name']):
6272
full_pdf_content += item['content']
6373
full_pdf_content += "\n\n\pagebreak\n\n"
64-
with open('pdf-build/' + site_name + ".md", 'w') as f:
74+
with open('pdf-build/' + site_name + ".fullsite.md", 'w') as f:
6575
f.write(full_pdf_content)
6676

6777
else:
@@ -76,18 +86,28 @@ def main():
7686
for filename in files:
7787
ext = os.path.splitext(filename)[1]
7888
if ext == '.md':
79-
content = clean_markdown(path, filename, lang_key ,default_lang)
89+
content = clean_markdown(path, filename, lang_key, default_lang)
8090
if content:
8191
contents.append({'name':filename, 'content': content})
8292
else:
8393
copyfile(os.path.join(path,filename), os.path.join('pdf-build',lang_key,filename))
84-
full_pdf_content = ""
94+
full_pdf_content = "\n\n\pagebreak\n\n"
8595
for item in sorted(contents, key=lambda k: k['name']):
8696
full_pdf_content += item['content']
8797
full_pdf_content += "\n\n\pagebreak\n\n"
8898
with open('pdf-build/' + site_name + ".md", 'w') as f:
8999
f.write(full_pdf_content)
90100

101+
def copy_parents(src, dest_folder, dir_offset=0):
102+
prev_offset = 0 if dir_offset == 0 else src.replace('/', '%', dir_offset - 1).find('/') + 1
103+
post_offset = src.rfind('/')
104+
105+
src_dirs = '' if post_offset == -1 else src[prev_offset:post_offset]
106+
src_filename = src[post_offset + 1:]
107+
108+
os.makedirs(f'{dest_folder}/{src_dirs}', exist_ok=True)
109+
shutil.copy(src, f'{dest_folder}/{src_dirs}/{src_filename}')
110+
91111

92112
def clean_markdown(path, filename, images, lang="", default_lang = "en"):
93113
post = frontmatter.load(os.path.join(path, filename))
@@ -97,9 +117,8 @@ def clean_markdown(path, filename, images, lang="", default_lang = "en"):
97117
guide['filename'] = filename
98118
content = post.content
99119
for image in images:
100-
replace_regex = r'(\!\[.*\]).*(\().*\/(' + re.escape(image) + r')([A-Za-z\s\"\'\-\,\.\;\:]*)(\))'
101-
print(replace_regex)
102-
content = re.sub(replace_regex, r'\1\2\3\5', content)
120+
replace_regex = r'(\!\[.*\]).*(\()(.*\/)(' + re.escape(str(image)) + r')([A-Za-z\s\"\'\-\,\.\;\:]*)(\))'
121+
content = re.sub(replace_regex, r'\1\2\4\5\6', content)
103122
guide['content'] = ''
104123
if title:
105124
guide['content'] += '# {0} \n\n'.format(title)

.build_scripts/pandoc.sh

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
cd $HOME/pdf-build
2+
ls .
3+
mkdir -p $HOME/public/pdfs
4+
mkdir -p $HOME/tmp
5+
for d in $(find . -mindepth 1 -type d); do
6+
mkdir $HOME/public/pdfs/"$d"
7+
done &&
8+
9+
for i in $(find . -mindepth 1); do
10+
if [ -f "$i" ] && [ "${i: -3}" == ".md" ]; then
11+
pdfreplace=${i//\.md/.pdf}
12+
pdffilename=${pdfreplace/\.\//}
13+
filepath=${i//\.\//$HOME/pdf-build/}
14+
if [[ "${filepath: -12}" == ".fullsite.md" ]]; then
15+
pandoc "$filepath" -o "$HOME/tmp/$pdffilename" --pdf-engine=xelatex -V geometry:margin=1in -V papersize:a4 -V mainfont:Archivo-Regular --toc --toc-depth=1
16+
gs -sDEVICE=pdfwrite -dCompatibilityLevel=1.4 -dPDFSETTINGS=/ebook -dPrinted=false -dNOPAUSE -dQUIET -dBATCH -sOutputFile="$HOME/public/pdfs/${pdffilename//\.fullsite/}" "$HOME/tmp/$pdffilename"
17+
else
18+
pandoc "$filepath" -o "$HOME/tmp/$pdffilename" --pdf-engine=xelatex -V geometry:margin=1in -V papersize:a4 -V mainfont:Archivo-Regular
19+
gs -sDEVICE=pdfwrite -dCompatibilityLevel=1.4 -dPDFSETTINGS=/ebook -dPrinted=false -dNOPAUSE -dQUIET -dBATCH -sOutputFile="$HOME/public/pdfs/$pdffilename" "$HOME/tmp/$pdffilename"
20+
fi
21+
fi
22+
done

Dockerfile

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
FROM node AS search
2+
COPY . /data
3+
WORKDIR /data
4+
RUN npm install
5+
RUN node .build_scripts/build_index.js
6+
7+
FROM jojomi/hugo AS hugo
8+
COPY --from=search /data /data
9+
WORKDIR /data
10+
RUN hugo
11+
12+
FROM python:3.7 AS clean
13+
COPY --from=hugo /data /data
14+
WORKDIR /data
15+
RUN mkdir pdf-build
16+
RUN pip3 install -r requirements.txt
17+
RUN python3 .build_scripts/clean.py
18+
19+
FROM dalibo/pandocker AS pandoc
20+
COPY --from=clean /data /data
21+
WORKDIR /data
22+
ENV HOME=/data
23+
RUN apt-get update
24+
RUN apt-get install ghostscript --yes
25+
RUN wget https://fonts.google.com/download?family=Archivo -O $HOME/archivo.zip
26+
RUN unzip $HOME/archivo.zip -d $HOME/.fonts
27+
RUN bash $HOME/.build_scripts/pandoc.sh
28+
29+
FROM python:3.7
30+
COPY --from=pandoc /data /data
31+
WORKDIR /data/public
32+
EXPOSE 7000
33+
CMD python3 -m http.server 7000

content/_index.md

Lines changed: 24 additions & 13 deletions
-65.8 KB
Binary file not shown.

content/images/part-ii/HDX.gif

-8.46 MB
Binary file not shown.
-575 KB
Binary file not shown.
-8.83 MB
Binary file not shown.
-2.38 MB
Binary file not shown.
-2.38 MB
Binary file not shown.

0 commit comments

Comments
 (0)