-
Notifications
You must be signed in to change notification settings - Fork 111
/
create_sibling.py
820 lines (757 loc) · 34.1 KB
/
create_sibling.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
# emacs: -*- mode: python; py-indent-offset: 4; tab-width: 4; indent-tabs-mode: nil -*-
# ex: set sts=4 ts=4 sw=4 noet:
# ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
#
# See COPYING file distributed along with the datalad package for the
# copyright and license terms.
#
# ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
"""High-level interface for creation of publication target via SSH
"""
__docformat__ = 'restructuredtext'
from six import text_type
from distutils.version import LooseVersion
from glob import glob
import logging
from os.path import join as opj, relpath, normpath, dirname, curdir
import datalad
from datalad import ssh_manager
from datalad.cmd import CommandError
from datalad.consts import WEB_HTML_DIR, WEB_META_LOG
from datalad.consts import TIMESTAMP_FMT
from datalad.dochelpers import exc_str
from datalad.distribution.siblings import Siblings
from datalad.distribution.siblings import _DelayedSuper
from datalad.distribution.add_sibling import _check_deps
from datalad.distribution.dataset import EnsureDataset, Dataset, \
datasetmethod, require_dataset
from datalad.interface.annotate_paths import AnnotatePaths
from datalad.interface.base import Interface
from datalad.interface.base import build_doc
from datalad.interface.utils import eval_results
from datalad.interface.common_opts import recursion_limit, recursion_flag
from datalad.interface.common_opts import as_common_datasrc
from datalad.interface.common_opts import publish_by_default
from datalad.interface.common_opts import publish_depends
from datalad.interface.common_opts import inherit_opt
from datalad.interface.common_opts import annex_wanted_opt
from datalad.interface.common_opts import annex_group_opt
from datalad.interface.common_opts import annex_groupwanted_opt
from datalad.support.annexrepo import AnnexRepo
from datalad.support.constraints import EnsureStr, EnsureNone, EnsureBool
from datalad.support.constraints import EnsureChoice
from datalad.support.exceptions import InsufficientArgumentsError
from datalad.support.exceptions import MissingExternalDependency
from datalad.support.network import RI
from datalad.support.network import is_ssh
from datalad.support.sshconnector import sh_quote
from datalad.support.param import Parameter
from datalad.utils import make_tempfile
from datalad.utils import _path_
from datalad.utils import slash_join
from datalad.utils import assure_list
lgr = logging.getLogger('datalad.distribution.create_sibling')
def _create_dataset_sibling(
name,
ds,
hierarchy_basepath,
ssh,
replicate_local_structure,
ssh_url,
target_dir,
target_url,
target_pushurl,
existing,
shared,
group,
publish_depends,
publish_by_default,
as_common_datasrc,
annex_wanted,
annex_group,
annex_groupwanted,
inherit
):
"""Everyone is very smart here and could figure out the combinatorial
affluence among provided tiny (just slightly over a dozen) number of options
and only a few pages of code
"""
localds_path = ds.path
ds_name = relpath(localds_path, start=hierarchy_basepath)
if not replicate_local_structure:
ds_name = '' if ds_name == curdir \
else '-{}'.format(ds_name.replace("/", "-"))
remoteds_path = target_dir.replace(
"%RELNAME",
ds_name)
else:
# TODO: opj depends on local platform, not the remote one.
# check how to deal with it. Does windows ssh server accept
# posix paths? vice versa? Should planned SSH class provide
# tools for this issue?
# see gh-1188
remoteds_path = normpath(opj(target_dir, ds_name))
# construct a would-be ssh url based on the current dataset's path
ssh_url.path = remoteds_path
ds_sshurl = ssh_url.as_str()
# configure dataset's git-access urls
ds_target_url = target_url.replace('%RELNAME', ds_name) \
if target_url else ds_sshurl
# push, configure only if needed
ds_target_pushurl = None
if ds_target_url != ds_sshurl:
# not guaranteed that we can push via the primary URL
ds_target_pushurl = target_pushurl.replace('%RELNAME', ds_name) \
if target_pushurl else ds_sshurl
lgr.info("Considering to create a target dataset {0} at {1} of {2}".format(
localds_path, remoteds_path, ssh_url.hostname))
# Must be set to True only if exists and existing='reconfigure'
# otherwise we might skip actions if we say existing='reconfigure'
# but it did not even exist before
only_reconfigure = False
if remoteds_path != '.':
# check if target exists
# TODO: Is this condition valid for != '.' only?
path_exists = True
try:
out, err = ssh("ls {}".format(sh_quote(remoteds_path)))
except CommandError as e:
if "No such file or directory" in e.stderr and \
remoteds_path in e.stderr:
path_exists = False
else:
raise # It's an unexpected failure here
if path_exists:
_msg = "Target path %s already exists." % remoteds_path
# path might be existing but be an empty directory, which should be
# ok to remove
try:
lgr.debug(
"Trying to rmdir %s on remote since might be an empty dir",
remoteds_path
)
# should be safe since should not remove anything unless an empty dir
ssh("rmdir {}".format(sh_quote(remoteds_path)))
path_exists = False
except CommandError as e:
# If fails to rmdir -- either contains stuff no permissions
# TODO: fixup encode/decode dance again :-/ we should have got
# unicode/str here by now. I guess it is the same as
# https://github.com/ReproNim/niceman/issues/83
# where I have reused this Runner thing
try:
# ds_name is unicode which makes _msg unicode so we must be
# unicode-ready
err_str = text_type(e.stderr)
except UnicodeDecodeError:
err_str = e.stderr.decode(errors='replace')
_msg += " And it fails to rmdir (%s)." % (err_str.strip(),)
if path_exists:
if existing == 'error':
raise RuntimeError(_msg)
elif existing == 'skip':
lgr.info(_msg + " Skipping")
return
elif existing == 'replace':
lgr.info(_msg + " Replacing")
# enable write permissions to allow removing dir
ssh("chmod +r+w -R {}".format(sh_quote(remoteds_path)))
# remove target at path
ssh("rm -rf {}".format(sh_quote(remoteds_path)))
# if we succeeded in removing it
path_exists = False
# Since it is gone now, git-annex also should forget about it
remotes = ds.repo.get_remotes()
if name in remotes:
# so we had this remote already, we should announce it dead
# XXX what if there was some kind of mismatch and this name
# isn't matching the actual remote UUID? should have we
# checked more carefully?
lgr.info(
"Announcing existing remote %s dead to annex and removing",
name
)
if isinstance(ds.repo, AnnexRepo):
ds.repo.set_remote_dead(name)
ds.repo.remove_remote(name)
elif existing == 'reconfigure':
lgr.info(_msg + " Will only reconfigure")
only_reconfigure = True
else:
raise ValueError(
"Do not know how to handle existing={}".format(
repr(existing)))
if not path_exists:
ssh("mkdir -p {}".format(sh_quote(remoteds_path)))
if inherit and shared is None:
# here we must analyze current_ds's super, not the super_ds
delayed_super = _DelayedSuper(ds)
# inherit from the setting on remote end
shared = CreateSibling._get_ds_remote_shared_setting(
delayed_super, name, ssh)
if group:
# Either repository existed before or a new directory was created for it,
# set its group to a desired one if was provided with the same chgrp
ssh("chgrp -R {} {}".format(
sh_quote(text_type(group)),
sh_quote(remoteds_path)))
# don't (re-)initialize dataset if existing == reconfigure
if not only_reconfigure:
# init git and possibly annex repo
if not CreateSibling.init_remote_repo(
remoteds_path, ssh, shared, ds,
description=target_url):
return
if target_url and not is_ssh(target_url):
# we are not coming in via SSH, hence cannot assume proper
# setup for webserver access -> fix
ssh('git -C {} update-server-info'.format(sh_quote(remoteds_path)))
else:
# TODO -- we might still want to reconfigure 'shared' setting!
pass
# at this point we have a remote sibling in some shape or form
# -> add as remote
lgr.debug("Adding the siblings")
# TODO generator, yield the now swallowed results
Siblings.__call__(
'configure',
dataset=ds,
name=name,
url=ds_target_url,
pushurl=ds_target_pushurl,
recursive=False,
fetch=True,
as_common_datasrc=as_common_datasrc,
publish_by_default=publish_by_default,
publish_depends=publish_depends,
annex_wanted=annex_wanted,
annex_group=annex_group,
annex_groupwanted=annex_groupwanted,
inherit=inherit
)
# check git version on remote end
lgr.info("Adjusting remote git configuration")
if ssh.get_git_version() and ssh.get_git_version() >= LooseVersion("2.4"):
# allow for pushing to checked out branch
try:
ssh("git -C {} config receive.denyCurrentBranch updateInstead".format(
sh_quote(remoteds_path)))
except CommandError as e:
lgr.error("git config failed at remote location %s.\n"
"You will not be able to push to checked out "
"branch. Error: %s", remoteds_path, exc_str(e))
else:
lgr.error("Git version >= 2.4 needed to configure remote."
" Version detected on server: %s\nSkipping configuration"
" of receive.denyCurrentBranch - you will not be able to"
" publish updates to this repository. Upgrade your git"
" and run with --existing=reconfigure",
ssh.get_git_version())
# enable metadata refresh on dataset updates to publication server
lgr.info("Enabling git post-update hook ...")
try:
CreateSibling.create_postupdate_hook(
remoteds_path, ssh, ds)
except CommandError as e:
lgr.error("Failed to add json creation command to post update "
"hook.\nError: %s" % exc_str(e))
return remoteds_path
@build_doc
class CreateSibling(Interface):
"""Create a dataset sibling on a UNIX-like SSH-accessible machine
Given a local dataset, and SSH login information this command creates
a remote dataset repository and configures it as a dataset sibling to
be used as a publication target (see `publish` command).
Various properties of the remote sibling can be configured (e.g. name
location on the server, read and write access URLs, and access
permissions.
Optionally, a basic web-viewer for DataLad datasets can be installed
at the remote location.
This command supports recursive processing of dataset hierarchies, creating
a remote sibling for each dataset in the hierarchy. By default, remote
siblings are created in hierarchical structure that reflects the
organization on the local file system. However, a simple templating
mechanism is provided to produce a flat list of datasets (see
--target-dir).
"""
# XXX prevent common args from being added to the docstring
_no_eval_results = True
_params_ = dict(
# TODO: Figure out, whether (and when) to use `sshurl` as push url
dataset=Parameter(
args=("--dataset", "-d",),
doc="""specify the dataset to create the publication target for. If
no dataset is given, an attempt is made to identify the dataset
based on the current working directory""",
constraints=EnsureDataset() | EnsureNone()),
sshurl=Parameter(
args=("sshurl",),
metavar='SSHURL',
nargs='?',
doc="""Login information for the target server. This can be given
as a URL (ssh://host/path) or SSH-style (user@host:path).
Unless overridden, this also serves the future dataset's access
URL and path on the server.""",
constraints=EnsureStr()),
name=Parameter(
args=('-s', '--name',),
metavar='NAME',
doc="""sibling name to create for this publication target.
If `recursive` is set, the same name will be used to label all
the subdatasets' siblings. When creating a target dataset fails,
no sibling is added""",
constraints=EnsureStr() | EnsureNone(),
nargs="?"),
target_dir=Parameter(
args=('--target-dir',),
metavar='PATH',
doc="""path to the directory *on the server* where the dataset
shall be created. By default the SSH access URL is used to
identify this directory. If a relative path is provided here,
it is interpreted as being relative to the user's home
directory on the server.\n
Additional features are relevant for recursive processing of
datasets with subdatasets. By default, the local
dataset structure is replicated on the server. However, it is
possible to provide a template for generating different target
directory names for all (sub)datasets. Templates can contain
certain placeholder that are substituted for each (sub)dataset.
For example: "/mydirectory/dataset%%RELNAME".\nSupported
placeholders:\n
%%RELNAME - the name of the datasets, with any slashes replaced by
dashes\n""",
constraints=EnsureStr() | EnsureNone()),
target_url=Parameter(
args=('--target-url',),
metavar='URL',
doc=""""public" access URL of the to-be-created target dataset(s)
(default: `sshurl`). Accessibility of this URL determines the
access permissions of potential consumers of the dataset.
As with `target_dir`, templates (same set of placeholders)
are supported. Also, if specified, it is provided as the annex
description\n""",
constraints=EnsureStr() | EnsureNone()),
target_pushurl=Parameter(
args=('--target-pushurl',),
metavar='URL',
doc="""In case the `target_url` cannot be used to publish to the
dataset, this option specifies an alternative URL for this
purpose. As with `target_url`, templates (same set of
placeholders) are supported.\n""",
constraints=EnsureStr() | EnsureNone()),
recursive=recursion_flag,
recursion_limit=recursion_limit,
existing=Parameter(
args=("--existing",),
constraints=EnsureChoice('skip', 'replace', 'error', 'reconfigure'),
metavar='MODE',
doc="""action to perform, if a sibling is already configured under the
given name and/or a target directory already exists.
In this case, a dataset can be skipped ('skip'), an existing target
directory be forcefully re-initialized, and the sibling (re-)configured
('replace', implies 'reconfigure'), the sibling configuration be updated
only ('reconfigure'), or to error ('error').""",),
inherit=inherit_opt,
shared=Parameter(
args=("--shared",),
metavar='false|true|umask|group|all|world|everybody|0xxx',
doc="""if given, configures the access permissions on the server
for multi-users (this could include access by a webserver!).
Possible values for this option are identical to those of
`git init --shared` and are described in its documentation.""",
constraints=EnsureStr() | EnsureBool() | EnsureNone()),
group=Parameter(
args=("--group",),
metavar="GROUP",
doc="""Filesystem group for the repository. Specifying the group is
particularly important when [CMD: --shared=group CMD][PY:
shared="group" PY]""",
constraints=EnsureStr() | EnsureNone()
),
ui=Parameter(
args=("--ui",),
metavar='false|true|html_filename',
doc="""publish a web interface for the dataset with an
optional user-specified name for the html at publication
target. defaults to `index.html` at dataset root""",
constraints=EnsureBool() | EnsureStr()),
as_common_datasrc=as_common_datasrc,
publish_depends=publish_depends,
publish_by_default=publish_by_default,
annex_wanted=annex_wanted_opt,
annex_group=annex_group_opt,
annex_groupwanted=annex_groupwanted_opt,
since=Parameter(
args=("--since",),
constraints=EnsureStr() | EnsureNone(),
doc="""limit processing to datasets that have been changed since a given
state (by tag, branch, commit, etc). This can be used to create siblings
for recently added subdatasets."""),
)
@staticmethod
@datasetmethod(name='create_sibling')
@eval_results
def __call__(sshurl, name=None, target_dir=None,
target_url=None, target_pushurl=None,
dataset=None,
recursive=False,
recursion_limit=None,
existing='error',
shared=None,
group=None,
ui=False,
as_common_datasrc=None,
publish_by_default=None,
publish_depends=None,
annex_wanted=None, annex_group=None, annex_groupwanted=None,
inherit=False,
since=None):
#
# nothing without a base dataset
#
ds = require_dataset(dataset, check_installed=True,
purpose='creating a sibling')
refds_path = ds.path
#
# all checks that are possible before we start parsing the dataset
#
# possibly use sshurl to get the name in case if not specified
if not sshurl:
if not inherit:
raise InsufficientArgumentsError(
"needs at least an SSH URL, if no inherit option"
)
if name is None:
raise ValueError(
"Neither SSH URL, nor the name of sibling to inherit from "
"was specified"
)
# It might well be that we already have this remote setup
try:
sshurl = CreateSibling._get_remote_url(ds, name)
except Exception as exc:
lgr.debug('%s does not know about url for %s: %s', ds, name, exc_str(exc))
elif inherit:
raise ValueError(
"For now, for clarity not allowing specifying a custom sshurl "
"while inheriting settings"
)
# may be could be safely dropped -- still WiP
if not sshurl:
# TODO: may be more back up before _prep?
super_ds = ds.get_superdataset()
if not super_ds:
raise ValueError(
"Could not determine super dataset for %s to inherit URL"
% ds
)
super_url = CreateSibling._get_remote_url(super_ds, name)
# for now assuming hierarchical setup
# (TODO: to be able to destinguish between the two, probably
# needs storing datalad.*.target_dir to have %RELNAME in there)
sshurl = slash_join(super_url, relpath(ds.path, super_ds.path))
# check the login URL
sshri = RI(sshurl)
if not is_ssh(sshri):
raise ValueError(
"Unsupported SSH URL: '{0}', "
"use ssh://host/path or host:path syntax".format(sshurl))
if not name:
# use the hostname as default remote name
name = sshri.hostname
lgr.debug(
"No sibling name given, use URL hostname '%s' as sibling name",
name)
if since == '':
# consider creating siblings only since the point of
# the last update
# XXX here we assume one to one mapping of names from local branches
# to the remote
active_branch = ds.repo.get_active_branch()
since = '%s/%s' % (name, active_branch)
#
# parse the base dataset to find all subdatasets that need processing
#
to_process = []
for ap in AnnotatePaths.__call__(
dataset=refds_path,
# only a single path!
path=refds_path,
recursive=recursive,
recursion_limit=recursion_limit,
action='create_sibling',
# both next should not happen anyways
unavailable_path_status='impossible',
nondataset_path_status='error',
modified=since,
return_type='generator',
on_failure='ignore'):
if ap.get('status', None):
# this is done
yield ap
continue
if ap.get('type', None) != 'dataset' or ap.get('state', None) == 'absent':
# this can happen when there is `since`, but we have no
# use for anything but datasets here
continue
checkds_remotes = Dataset(ap['path']).repo.get_remotes() \
if ap.get('state', None) != 'absent' \
else []
if publish_depends:
# make sure dependencies are valid
# TODO: inherit -- we might want to automagically create
# those dependents as well???
unknown_deps = set(assure_list(publish_depends)).difference(checkds_remotes)
if unknown_deps:
ap['status'] = 'error'
ap['message'] = (
'unknown sibling(s) specified as publication dependency: %s',
unknown_deps)
yield ap
continue
if name in checkds_remotes and existing in ('error', 'skip'):
ap['status'] = 'error' if existing == 'error' else 'notneeded'
ap['message'] = (
"sibling '%s' already configured (specify alternative name, or force "
"reconfiguration via --existing",
name)
yield ap
continue
to_process.append(ap)
if not to_process:
# we ruled out all possibilities
# TODO wait for gh-1218 and make better return values
lgr.info("No datasets qualify for sibling creation. "
"Consider different settings for --existing "
"or --since if this is unexpected")
return
if target_dir is None:
if sshri.path:
target_dir = sshri.path
else:
target_dir = '.'
# TODO: centralize and generalize template symbol handling
replicate_local_structure = "%RELNAME" not in target_dir
# request ssh connection:
lgr.info("Connecting ...")
assert(sshurl is not None) # delayed anal verification
ssh = ssh_manager.get_connection(sshurl)
if not ssh.get_annex_version():
raise MissingExternalDependency(
'git-annex',
msg='on the remote system')
#
# all checks done and we have a connection, now do something
#
# loop over all datasets, ordered from top to bottom to make test
# below valid (existing directories would cause the machinery to halt)
# But we need to run post-update hook in depth-first fashion, so
# would only collect first and then run (see gh #790)
yielded = set()
remote_repos_to_run_hook_for = []
for currentds_ap in \
sorted(to_process, key=lambda x: x['path'].count('/')):
current_ds = Dataset(currentds_ap['path'])
path = _create_dataset_sibling(
name,
current_ds,
ds.path,
ssh,
replicate_local_structure,
sshri,
target_dir,
target_url,
target_pushurl,
existing,
shared,
group,
publish_depends,
publish_by_default,
as_common_datasrc,
annex_wanted,
annex_group,
annex_groupwanted,
inherit
)
if not path:
# nothing new was created
# TODO is 'notneeded' appropriate in this case?
currentds_ap['status'] = 'notneeded'
# TODO explain status in 'message'
yield currentds_ap
yielded.add(currentds_ap['path'])
continue
remote_repos_to_run_hook_for.append((path, currentds_ap))
# publish web-interface to root dataset on publication server
if current_ds.path == ds.path and ui:
lgr.info("Uploading web interface to %s" % path)
try:
CreateSibling.upload_web_interface(path, ssh, shared, ui)
except CommandError as e:
currentds_ap['status'] = 'error'
currentds_ap['message'] = (
"failed to push web interface to the remote datalad repository (%s)",
exc_str(e))
yield currentds_ap
yielded.add(currentds_ap['path'])
continue
# in reverse order would be depth first
lgr.info("Running post-update hooks in all created siblings")
# TODO: add progressbar
for path, currentds_ap in remote_repos_to_run_hook_for[::-1]:
# Trigger the hook
lgr.debug("Running hook for %s", path)
try:
ssh("cd {} && hooks/post-update".format(
sh_quote(_path_(path, ".git"))))
except CommandError as e:
currentds_ap['status'] = 'error'
currentds_ap['message'] = (
"failed to run post-update hook under remote path %s (%s)",
path, exc_str(e))
yield currentds_ap
yielded.add(currentds_ap['path'])
continue
if not currentds_ap['path'] in yielded:
# if we were silent until now everything is just splendid
currentds_ap['status'] = 'ok'
yield currentds_ap
@staticmethod
def _get_ds_remote_shared_setting(ds, name, ssh):
"""Figure out setting of sharedrepository for dataset's `name` remote"""
shared = None
try:
current_super_url = CreateSibling._get_remote_url(
ds, name)
current_super_ri = RI(current_super_url)
out, err = ssh('git -C {} config --get core.sharedrepository'.format(
# TODO -- we might need to expanduser taking .user into account
# but then it must be done also on remote side
sh_quote(current_super_ri.path))
)
shared = out.strip()
if err:
lgr.warning("Got stderr while calling ssh: %s", err)
except CommandError as e:
lgr.debug(
"Could not figure out remote shared setting of %s for %s due "
"to %s",
ds, name, exc_str(e)
)
# could well be ok if e.g. not shared
# TODO: more detailed analysis may be?
return shared
@staticmethod
def _get_remote_url(ds, name):
"""A little helper to get url from pushurl or from url if not defined"""
# take pushurl if present, if not -- just a url
url = ds.config.get('remote.%s.pushurl' % name) or \
ds.config.get('remote.%s.url' % name)
if not url:
raise ValueError(
"%s had neither pushurl or url defined for %s" % (ds, name)
)
return url
@staticmethod
def init_remote_repo(path, ssh, shared, dataset, description=None):
cmd = "git -C {} init{}".format(
sh_quote(path),
" --shared='{}'".format(sh_quote(shared)) if shared else '')
try:
ssh(cmd)
except CommandError as e:
lgr.error("Initialization of remote git repository failed at %s."
"\nError: %s\nSkipping ..." % (path, exc_str(e)))
return False
if isinstance(dataset.repo, AnnexRepo):
# init remote git annex repo (part fix of #463)
try:
ssh(
"git -C {} annex init {}".format(
sh_quote(path),
sh_quote(description)
if description else '')
)
except CommandError as e:
lgr.error("Initialization of remote git annex repository failed at %s."
"\nError: %s\nSkipping ..." % (path, exc_str(e)))
return False
return True
@staticmethod
def create_postupdate_hook(path, ssh, dataset):
# location of post-update hook file, logs folder on remote target
hooks_remote_dir = opj(path, '.git', 'hooks')
# make sure hooks directory exists (see #1251)
ssh('mkdir -p {}'.format(sh_quote(hooks_remote_dir)))
hook_remote_target = opj(hooks_remote_dir, 'post-update')
# create json command for current dataset
log_filename = 'datalad-publish-hook-$(date +%s).log' % TIMESTAMP_FMT
hook_content = r'''#!/bin/bash
git update-server-info
#
# DataLad
#
# (Re)generate meta-data for DataLad Web UI and possibly init new submodules
dsdir="$(dirname $0)/../.."
logfile="$dsdir/{WEB_META_LOG}/{log_filename}"
if [ ! -e "$dsdir/.git" ]; then
echo Assumption of being under .git has failed >&2
exit 1
fi
mkdir -p "$dsdir/{WEB_META_LOG}" # assure logs directory exists
( which datalad > /dev/null \
&& ( cd "$dsdir"; GIT_DIR="$PWD/.git" datalad ls -a --json file .; ) \
|| echo "E: no datalad found - skipping generation of indexes for web frontend"; \
) &> "$logfile"
# Some submodules might have been added and thus we better init them
( cd "$dsdir"; git submodule update --init || : ; ) >> "$logfile" 2>&1
'''.format(WEB_META_LOG=WEB_META_LOG, **locals())
with make_tempfile(content=hook_content) as tempf:
# create post_update hook script
# upload hook to dataset
ssh.copy(tempf, hook_remote_target)
# and make it executable
ssh('chmod +x {}'.format(sh_quote(hook_remote_target)))
@staticmethod
def upload_web_interface(path, ssh, shared, ui):
# path to web interface resources on local
webui_local = opj(dirname(datalad.__file__), 'resources', 'website')
# local html to dataset
html_local = opj(webui_local, "index.html")
# name and location of web-interface html on target
html_targetname = {True: ui, False: "index.html"}[isinstance(ui, str)]
html_target = opj(path, html_targetname)
# upload ui html to target
ssh.copy(html_local, html_target)
# upload assets to the dataset
webresources_local = opj(webui_local, 'assets')
webresources_remote = opj(path, WEB_HTML_DIR)
ssh('mkdir -p {}'.format(sh_quote(webresources_remote)))
ssh.copy(webresources_local, webresources_remote, recursive=True)
# minimize and upload js assets
for js_file in glob(opj(webresources_local, 'js', '*.js')):
with open(js_file) as asset:
try:
from jsmin import jsmin
# jsmin = lambda x: x # no minimization
minified = jsmin(asset.read()) # minify asset
except ImportError:
lgr.warning(
"Will not minify web interface javascript, no jsmin available")
minified = asset.read() # no minify available
with make_tempfile(content=minified) as tempf: # write minified to tempfile
js_name = js_file.split('/')[-1]
ssh.copy(tempf, opj(webresources_remote, 'assets', 'js', js_name)) # and upload js
# explicitly make web+metadata dir of dataset world-readable, if shared set to 'all'
mode = None
if shared in (True, 'true', 'all', 'world', 'everybody'):
mode = 'a+rX'
elif shared == 'group':
mode = 'g+rX'
elif str(shared).startswith('0'):
mode = shared
if mode:
ssh('chmod {} -R {} {}'.format(
mode,
sh_quote(dirname(webresources_remote)),
sh_quote(opj(path, 'index.html'))))