-
Notifications
You must be signed in to change notification settings - Fork 17
/
importjson.py
471 lines (428 loc) · 17.8 KB
/
importjson.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
from AccessControl.SecurityManagement import newSecurityManager
from castle.cms._scripts import mjson
from castle.cms._scripts.importtypes import get_import_type
from lxml.html import fromstring
from lxml.html import tostring
from OFS.interfaces import IFolder
import plone.api as api
from plone.app.blocks.layoutbehavior import ILayoutAware
from plone.app.redirector.interfaces import IRedirectionStorage
from plone.app.textfield.value import RichTextValue
from Products.CMFPlone.interfaces.constrains import ISelectableConstrainTypes
from zope.component import getUtility
from zope.component.hooks import setSite
import argparse
import logging
import os
import transaction
logger = logging.getLogger('castle.cms')
# ToDo:
# - do NOT export images
# - only images that are referenced in content
# - lookup reference, if image only used once, add as lead image
# - other images, if not referenced elsewhere, include inline
# as data uris
# - links that go to files should be moved to s3 and be converted
# into file objects
# - if default page and only page inside folder, replace folder
#
parser = argparse.ArgumentParser(
description='...')
parser.add_argument('--site-id', dest='site_id', default='Castle')
parser.add_argument(
'--export-directory', dest='export_directory', default='export')
parser.add_argument('--import-paths', dest='import_paths', default=False)
parser.add_argument('--skip-paths', dest='skip_paths', default=False)
parser.add_argument('--overwrite', dest='overwrite', default=False)
parser.add_argument('--admin-user', dest='admin_user', default='admin')
parser.add_argument('--ignore-uuids', dest='ignore_uuids', default=False)
parser.add_argument(
'--stop-if-exception', dest='stop_if_exception', default=False)
parser.add_argument(
'--pdb-if-exception', dest='pdb_if_exception', default=False)
parser.add_argument('--resumable', dest='resumable', default=False, action='store_true')
parser.add_argument(
'--delete-resumable-file', dest='delete_resumable_file', default=False, action='store_true')
parser.add_argument('--skip-existing', dest='skip_existing', default=True)
parser.add_argument(
'--skip-transitioning', dest='skip_transitioning', default=False)
parser.add_argument(
'--skip-types', dest='skip_types', default=','.join([
"collective.cover.content",
"FormFolder",
"FormMailerAdapter",
"FormTextField",
"FormStringField",
"FormThanksPage",
"FormSaveDataAdapter",
"FormSaveData2ContentAdapter",
"FormSelectionField",
"Topic"]))
parser.add_argument('--only-types', dest='only_types', default=False)
# Put files/videos/etc in to the repository paths by default,
# Set true to retain exported path
parser.add_argument('--retain-paths', dest='retain_paths', default=False)
args, _ = parser.parse_known_args()
ignore_uuids = args.ignore_uuids
stop_if_exception = args.stop_if_exception
pdb_if_exception = args.pdb_if_exception
retain_paths = args.retain_paths
resumable = args.resumable
delete_resumable_file = args.delete_resumable_file
successfully_imported_paths = []
imported_but_not_committed = []
if args.import_paths:
import_paths = args.import_paths.split(',')
else:
import_paths = False
if args.skip_paths:
skip_paths = args.skip_paths.split(',')
else:
skip_paths = False
if args.only_types:
only_types = args.only_types.split(',')
else:
only_types = False
if args.skip_types:
skip_types = args.skip_types.split(',')
else:
skip_types = False
user = app.acl_users.getUser(args.admin_user) # noqa
try:
newSecurityManager(None, user.__of__(app.acl_users)) # noqa
except Exception:
logger.error('Unknown admin user; '
'specify an existing Zope admin user with --admin-user '
'(default is admin)')
exit(-1)
site = app[args.site_id] # noqa
setSite(site)
def traverse(path):
folder = site # noqa
for part in path.strip('/').split('/'):
if part in folder.objectIds():
folder = folder[part]
else:
return
def relpath(obj):
return '/'.join(obj.getPhysicalPath())[len(
'/'.join(site.getPhysicalPath())) + 1:]
_importable_fields = (
'title',
'description',
)
def fix_html_images(obj):
try:
html = obj.text.raw
except Exception:
return
if not html:
return
changes = False
try:
dom = fromstring(html)
except Exception:
return
for el in dom.cssselect('img'):
src = el.attrib.get('src', '')
if 'resolveuid' not in src:
continue
if '@@images' in src:
continue
parts = src.split('/')
uid = parts[1]
scale = '/'.join(parts[2:])
if scale:
scale = scale.replace('image_', '')
src = 'resolveuid/%s/@@images/image' % uid
if scale:
src += '/' + scale
attribs = {
'src': src,
'data-linktype': 'image',
'data-val': uid
}
if scale:
attribs['data-scale'] = scale
el.attrib.update(attribs)
changes = True
if changes:
obj.text = RichTextValue(
tostring(dom), mimeType=obj.text.mimeType,
outputMimeType=obj.text.outputMimeType)
class CastleImporter(object):
imported_count = 0
date_functions = [
('modification_date', 'setModificationDate'),
('effective_date', 'setEffectiveDate'),
('expiration_date', 'setExpirationDate'),
]
@property
def successfully_imported_paths(self):
return [
args.export_directory + successfully_imported_path
for successfully_imported_path in successfully_imported_paths
]
def do_import(self):
self.import_folder(args.export_directory, container=site)
def import_object(self, filepath, container=None):
if resumable and filepath in self.successfully_imported_paths:
print("Skipping {}; Already successfully imported and resuming is enabled".format(filepath))
return
try:
with open(filepath, 'r') as import_file:
data = mjson.loads(import_file.read())
except Exception:
print("Skipping {}; Unable to read JSON data".format(filepath))
return
if filepath.endswith('__folder__'):
filepath = '/'.join(filepath.split('/')[:-1])
skipped = False
if data['portal_type'] in skip_types:
print('Skipping omitted type {type}'.format(
type=data['portal_type']
))
skipped = True
if only_types and data['portal_type'] not in only_types:
print("Skipping {type} at {path}, not in only_types.".format(
type=data['portal_type'],
path=filepath,
))
skipped = True
if import_paths:
do_import = False
for import_path in import_paths:
if filepath.startswith('{}/{}'.format(args.export_directory, import_path)):
do_import = True
if import_path.startswith(filepath[len(args.export_directory):].lstrip('/') + '/'):
# Don't skip folders on the way to import_paths
do_import = True
if not do_import:
print("Skipping {path}, not in import_paths".format(path=filepath))
skipped = True
if skip_paths:
for skip_path in skip_paths:
if filepath.lower().startswith('{}/{}'.format(args.export_directory, skip_path)):
print("Skipping {path}, in skip_paths".format(path=filepath))
skipped = True
if skipped:
if os.path.isdir(filepath) and len(os.listdir(filepath)):
logger.warn(
'{path} contains additional content that will be skipped.'.format(path=filepath)
)
return
original_path = filepath[len(args.export_directory):]
if retain_paths:
importtype = get_import_type(data, original_path, 'retain_paths')
else:
importtype = get_import_type(data, original_path)
path = importtype.get_path()
if container is None:
logger.warn('Skipped {} because of creation error'.format(filepath))
return
_id = path.split('/')[-1]
create = True
if _id in container.objectIds():
if args.overwrite:
existing = container[_id]
if IFolder.providedBy(existing):
if len(existing.objectIds()):
print("OVERWRITE: Deleting non-empty container {path}".format(path=path))
else:
print("OVERWRITE: Deleting content item at {path}".format(path=path))
api.content.delete(container[_id])
else:
create = False
creation_data = importtype.get_data()
pc_data = importtype.get_post_creation_data()
creation_data['container'] = container
aspect = ISelectableConstrainTypes(container, None)
if aspect:
if (aspect.getConstrainTypesMode() != 1 or
[creation_data['type']] != aspect.getImmediatelyAddableTypes()):
aspect.setConstrainTypesMode(1)
aspect.setImmediatelyAddableTypes([creation_data['type']])
if create:
if ignore_uuids and '_plone.uuid' in creation_data:
del creation_data['_plone.uuid']
obj = None
if not args.overwrite and (_id in container.objectIds()):
print(
'Skipping {path}, already exists. Use --overwrite to create anyway.'.format(path=path)
)
return
elif (not ignore_uuids and api.content.get(UID=creation_data['_plone.uuid']) is not None):
logger.warn(
'Skipping {path}, content with its UUID already exists.'
'Use --ignore-uuids to create anyway.'.format(path=path)
)
return
else:
try:
obj = api.content.create(safe_id=True, **creation_data)
print('Created {path}'.format(path=path))
imported_but_not_committed.append(path)
self.imported_count += 1
if self.imported_count % 50 == 0:
print('%i processed, committing' % self.imported_count)
transaction.commit()
successfully_imported_paths.extend(imported_but_not_committed)
with open('./.successfullyimportedpaths', 'a') as fout:
fout.writelines('\n'.join(imported_but_not_committed) + '\n')
fout.flush()
del imported_but_not_committed[0:]
except api.exc.InvalidParameterError:
if stop_if_exception:
logger.error('Error creating content {}'.format(filepath), exc_info=True)
if pdb_if_exception:
import pdb
pdb.set_trace()
raise
logger.error('Error creating content {}'.format(filepath), exc_info=True)
return
# TODO check default folder pages came over as folder with rich text tile
# TODO any folder pages without default page should have content listing tile
else:
obj = container[_id]
for key, value in creation_data.items():
if key not in ('id', 'type'):
setattr(obj, key, value)
if obj is not None:
if path != original_path:
storage = getUtility(IRedirectionStorage)
rpath = os.path.join('/'.join(site.getPhysicalPath()),
original_path.strip('/'))
storage.add(rpath, "/".join(obj.getPhysicalPath()))
obj.contentLayout = importtype.layout
importtype.post_creation(obj, post_creation_data=pc_data)
if not args.skip_transitioning and data['state']:
# transition item only if it needs it
state = api.content.get_state(obj=obj)
if state != data['state']:
try:
print('Transitioning %s to %s' % (obj.id, data['state']))
api.content.transition(obj, to_state=data['state'])
except Exception:
logger.error("Error transitioning %s to %s, maybe workflows"
" don't match up" % (obj.id, data['state']))
# pass
if stop_if_exception:
if pdb_if_exception:
import pdb
pdb.set_trace()
raise
# set workflow / review history
if 'review_history' in data:
review_history = data['review_history']
wtool = api.portal.get_tool(name='portal_workflow')
# loop over all workflow chains (usually only 1)
for workflow_id in wtool.getChainFor(obj):
obj.workflow_history[workflow_id] = review_history
else:
logger.warn('No review history on {obj}'.format(obj=obj))
fix_html_images(obj)
obj.reindexObject()
self.fix_dates(obj, data)
return obj
def fix_dates(self, obj, data):
for key, index_function_name in self.date_functions:
indexed_date = data['data'].get(key, None)
indexer = getattr(obj, index_function_name, None)
if indexed_date and indexer:
formatted_attribute = key.replace('_', ' ')
try:
indexer(indexed_date)
obj.reindexObject([key])
info_message = ' set {attribute} to {date}'.format(
attribute=formatted_attribute,
date=indexed_date,
)
logger.info(info_message)
except Exception:
warn_message = 'Could not set {attribute} on {obj}'.format(
attribute=formatted_attribute,
obj=obj,
)
logger.warn(warn_message)
def import_folder(self, path, container):
this_folder = os.path.join(path, '__folder__')
if path is not args.export_directory:
folder = None
id = path.split('/')[-1:][0]
if container and id in container.objectIds():
if args.overwrite:
api.content.delete(container[id])
else:
folder = container[id]
if not folder:
if os.path.exists(this_folder):
folder = self.import_object(this_folder, container)
else:
folder = self.create_plain_folder(id, container)
container = folder
folders = []
objects = []
for filename in os.listdir(path):
if filename in ('.DS_Store', '__folder__'):
continue
filepath = os.path.join(path, filename)
if os.path.isdir(filepath):
folders.append(filepath)
else:
objects.append(filepath)
for path in folders:
self.import_folder(path, container)
for path in objects:
try:
self.import_object(path, container)
except Exception:
logger.error(
'Error importing {path}'.format(path=filepath),
exc_info=True,
)
if stop_if_exception:
if pdb_if_exception:
import pdb
pdb.set_trace()
raise
# app._p_jar.invalidateCache() # noqa
# app._p_jar.sync() # noqa
def create_plain_folder(self, id, container):
logger.info("Creating plain Folder (no __folder__ file found), %s" % id)
folder = api.content.create(
type='Folder',
id=id,
title=id.capitalize(),
container=container)
bdata = ILayoutAware(folder)
bdata.contentLayout = '++contentlayout++castle/folder-query.html'
if not args.skip_transitioning:
api.content.transition(folder, to_state='published')
return folder
if __name__ == '__main__':
print('------------------------------')
print('Start importing')
print('------------------------------')
if resumable:
print('------------------------------')
print('Resuming enabled, checking for ./.successfullyimportedpaths and loading')
# create if it doesn't exist:
with open('.successfullyimportedpaths', 'a+'):
pass
with open('.successfullyimportedpaths', 'r') as fin:
for line in fin.readlines():
successfully_imported_paths.append(line.replace('\n', ''))
print('{} paths loaded'.format(len(successfully_imported_paths)))
print('------------------------------')
if args.overwrite:
print('------------------------------')
print('Importing with overwrite enabled')
print('------------------------------')
importer = CastleImporter()
importer.do_import()
print('Created {count} Content Items'.format(count=importer.imported_count))
transaction.commit()
print('Import completed')
if delete_resumable_file:
print('Deleting .successfullyimportedpaths')
os.remove('.successfullyimportedpaths')