-
Notifications
You must be signed in to change notification settings - Fork 1
/
customize_DOCX_file.py
executable file
·335 lines (284 loc) · 11.7 KB
/
customize_DOCX_file.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# -*- mode: python; python-indent-offset: 4 -*-
#
# ./customize_DOCX_file.py --json file.json [--file cover_template.docx]
#
# Purpose: The program produces a customized DOCX by setting the custom DOCPROPERIYES to the values from the JSON file
# The JSON file can be produced by extract_custom_DOCX_properties.py
#
# Output: outputs a customized DOCX file: <input_filename>-modified.docx
#
# Example:
# ./customize_DOCX_file.py --json custom_values.json --file za5.docx
# produces za5-modified.docx
#
#
# Notes:
# Only one test json file has been run.
#
# The dates from Canvas are in ISO 8601 format.
#
# 2021-12-07 G. Q. Maguire Jr.
# Base on earlier JSON_to_cover.py
#
import re
import sys
import subprocess
import json
import argparse
import os # to make OS calls, here to get time zone info
import time
import pprint
from collections import defaultdict
import datetime
import isodate # for parsing ISO 8601 dates and times
import pytz # for time zones
from dateutil.tz import tzlocal
# for dealing with the DOCX file - which is a ZIP file
import zipfile
try:
import zlib
compression = zipfile.ZIP_DEFLATED
except:
compression = zipfile.ZIP_STORED
modes = { zipfile.ZIP_DEFLATED: 'deflated',
zipfile.ZIP_STORED: 'stored',
}
def lookup_value_for_name(name, dict_of_entries):
for e in dict_of_entries:
n=dict_of_entries[e].get(name)
if n:
return n
return None
def replace_value_for_name(name, new_value, xml_content):
offset=0
pattern1='<property '
offset=xml_content.find(pattern1, offset)
while offset >= 0:
pattern3='name="{}"'.format(name)
name_offset=xml_content.find(pattern3, offset+len(pattern1))
if name_offset:
pattern4='<vt:lpwstr>'
pattern4_end='</vt:lpwstr>'
value_offset=xml_content.find(pattern4, name_offset+len(pattern3))
value_end=xml_content.find(pattern4_end, value_offset+len(pattern4))
prefix=xml_content[:value_offset+len(pattern4)]
postfix=xml_content[value_end:]
return prefix + "{}".format(new_value) + postfix
return xml_content
def mark_first_field_as_dirty(content):
# <w:fldChar w:fldCharType="begin" w:dirty="true"/>
pattern='<w:fldChar w:fldCharType="begin"'
offset=content.find(pattern)
if offset >= 0:
prefix=content[:offset+len(pattern)]
postfix=content[offset+len(pattern):]
middle=' w:dirty="true"'
content=prefix + middle + postfix
return content
def mapping_JSON_to_field_names(top, name):
if name == 'Last name':
return top+'_Last_name'
if name == 'First name':
return top+'_First_name'
if name == 'Local User Id':
return top+'_Local User Id'
if name == 'E-mail':
return top+'_E-mail'
if name == 'L1':
return top+'_organization_L1'
if name == 'L2':
return top+'_organization_L2'
if name == 'L2':
return top+'_organization_L2'
if name == 'Other organisation':
return top+'_Other_organisation'
if name == 'Cycle':
return 'Cycle'
if name == 'Credits':
return 'Credits'
if top == 'Course code':
return 'Course_code'
if top == 'National Subject Categories':
return 'National Subject Categories'
if top == 'Degree1':
if name == 'subjectArea':
return 'subjectArea'
if name == 'programcode':
return 'programcode'
if name == 'Educational program':
return 'Educational program'
if name == 'Degree':
return 'Degree'
if top == 'Degree2':
if name == 'subjectArea':
return 'Second_subjectarea'
if name == 'programcode':
return 'Second_programcode'
if name == 'Educational program':
return 'Second_Educational_program'
if name == 'Degree':
return 'Second_degree'
if top == 'Cooperation':
if name == 'Partner_name':
return 'Cooperation_Partner_name'
def transform_file(content, dict_of_entries):
global Verbose_Flag
# <property fmtid="xxxx" pid="2" name="property_name"><vt:lpwstr>property_value</vt:lpwstr>
#
for k in dict_of_entries:
print("k={}".format(k))
if k in ['Author1', 'Author2', 'Examiner1', 'Supervisor1', 'Supervisor2', 'Supervisor3']:
if isinstance(dict_of_entries[k], dict):
for name in dict_of_entries[k].keys():
if name in ['Last name', 'First name', 'Local User Id', 'E-mail', 'Other organisation']:
new_value=dict_of_entries[k].get(name)
docx_name=mapping_JSON_to_field_names(k, name)
content=replace_value_for_name(docx_name, new_value, content)
#print("*** docx_name={0}, new_value={1}, content={2}".format(docx_name, new_value, content))
print("*** docx_name={0}, new_value={1}".format(docx_name, new_value))
elif name == 'organisation':
for level in dict_of_entries[k][name]:
new_value=dict_of_entries[k][name].get(level)
docx_name=mapping_JSON_to_field_names(k, level)
content=replace_value_for_name(docx_name, new_value, content)
print("*** docx_name={0}, new_value={1}".format(docx_name, new_value))
else:
print("should not get here - processing k={0} name={1}".format(k, name))
elif k in ['Cycle', 'Credits']:
if isinstance(dict_of_entries[k], int):
new_value=dict_of_entries[k]
content=replace_value_for_name(k, new_value, content)
print("*** name={0}, new_value={1}".format(k, new_value))
elif k in ['Course code', 'National Subject Categories']:
if isinstance(dict_of_entries[k], str):
new_value=dict_of_entries[k]
#print("k='{0}', name='{1}'".format(k, name))
docx_name=mapping_JSON_to_field_names(k, name)
content=replace_value_for_name(docx_name, new_value, content)
print("*** docx_name={0}, new_value={1}".format(docx_name, new_value))
elif k in ['Degree1', 'Degree2', 'Cooperation']:
if isinstance(dict_of_entries[k], dict):
for name in dict_of_entries[k].keys():
if name in ['subjectArea', 'programcode', 'Educational program', 'Degree', 'Partner_name']:
new_value=dict_of_entries[k].get(name)
docx_name=mapping_JSON_to_field_names(k, name)
content=replace_value_for_name(docx_name, new_value, content)
#print("*** docx_name={0}, new_value={1}, content={2}".format(docx_name, new_value, content))
print("*** docx_name={0}, new_value={1}".format(docx_name, new_value))
# elif isinstance(dict_of_entries[k], dict):
# for name in dict_of_entries[k].keys():
# new_value=lookup_value_for_name(name, dict_of_entries)
# content=replace_value_for_name(name, new_value, content)
# print("*** name={0}, new_value={1}, content={2}".format(name, new_value, content))
else:
print("type={} - do not know how to process".format(type(dict_of_entries[k])))
return content
def main(argv):
global Verbose_Flag
global testing
global Keep_picture_flag
argp = argparse.ArgumentParser(description="customize_DOCX_file.py: to customize a DOCX template")
argp.add_argument('-v', '--verbose', required=False,
default=False,
action="store_true",
help="Print lots of output to stdout")
argp.add_argument('-t', '--testing',
default=False,
action="store_true",
help="execute test code"
)
argp.add_argument('-j', '--json',
type=str,
default="event.json",
help="JSON file for extracted data"
)
argp.add_argument('--cycle',
type=int,
help="cycle of thesis"
)
argp.add_argument('--credits',
type=float,
help="number_of_credits of thesis"
)
argp.add_argument('--exam',
type=int,
help="type of exam"
)
argp.add_argument('--area',
type=str,
help="area of thesis"
)
argp.add_argument('--area2',
type=str,
help="area of thesis for combined Cinving. and Master's"
)
argp.add_argument('--trita',
type=str,
help="trita string for thesis"
)
argp.add_argument('--file',
type=str,
help="DOCX template"
)
argp.add_argument('-p', '--picture',
default=False,
action="store_true",
help="keep the optional picture"
)
args = vars(argp.parse_args(argv))
Verbose_Flag=args["verbose"]
Keep_picture_flag=args['picture']
testing=args["testing"]
if Verbose_Flag:
print("testing={}".format(testing))
json_filename=args["json"]
if not json_filename:
print("Unknown source for the JSON information: {}".format(json_filename))
return
# extras contains information from the command line options
with open(json_filename, 'r') as json_FH:
try:
json_string=json_FH.read()
dict_of_entries=json.loads(json_string)
except:
print("Error in reading={}".format(json_string))
return
if Verbose_Flag:
print("read JSON: {}".format(dict_of_entries))
input_filename=args['file']
document = zipfile.ZipFile(input_filename)
file_names=document.namelist()
if Verbose_Flag:
print("File names in ZIP zip file: {}".format(file_names))
word_document_file_name='word/document.xml'
word_docprop_custom_file_name='docProps/custom.xml'
if word_document_file_name not in file_names:
print("Missing file: {}".format(word_document_file_name))
return
output_filename="{}-modfied.docx".format(input_filename[:-5])
print("outputting modified data to {}".format(output_filename))
zipOut = zipfile.ZipFile(output_filename, 'w')
for fn in file_names:
if Verbose_Flag:
print("processing file: {}".format(fn))
# copy existing file to archive
if fn not in [word_docprop_custom_file_name, word_document_file_name]:
file_contents = document.read(fn)
else:
if Verbose_Flag:
print("processing {}".format(fn))
xml_content = document.read(fn).decode('utf-8')
if fn == word_docprop_custom_file_name:
file_contents = transform_file(xml_content, dict_of_entries)
elif fn == word_document_file_name:
file_contents = mark_first_field_as_dirty(xml_content)
else:
print("Unknown file {}".format(fn))
# in any case write the file_contents out
zipOut.writestr(fn, file_contents, compress_type=compression)
zipOut.close()
document.close()
if __name__ == '__main__':
sys.exit(main(sys.argv[1:]))