-
Notifications
You must be signed in to change notification settings - Fork 111
/
create_sibling_gitlab.py
598 lines (551 loc) · 23.3 KB
/
create_sibling_gitlab.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
# emacs: -*- mode: python; py-indent-offset: 4; tab-width: 4; indent-tabs-mode: nil -*-
# ex: set sts=4 ts=4 sw=4 noet:
# ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
#
# See COPYING file distributed along with the datalad package for the
# copyright and license terms.
#
# ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
"""High-level interface for creating a publication target on a GitLab instance
"""
__docformat__ = 'restructuredtext'
import logging
from ..interface.base import (
build_doc,
Interface,
)
from ..interface.common_opts import (
recursion_flag,
recursion_limit,
publish_depends,
)
from ..interface.utils import eval_results
from ..support.param import Parameter
from ..support.constraints import (
EnsureChoice,
EnsureNone,
EnsureStr,
)
from ..utils import (
assure_list,
)
from ..distribution.dataset import (
datasetmethod,
EnsureDataset,
require_dataset,
resolve_path,
)
from ..dochelpers import exc_str
# bound methods
from ..distribution.siblings import Siblings
from ..local.subdatasets import Subdatasets
lgr = logging.getLogger('datalad.distributed.create_sibling_gitlab')
known_layout_labels = ('hierarchy', 'collection', 'flat')
known_access_labels = ('http', 'ssh', 'ssh+http')
@build_doc
class CreateSiblingGitlab(Interface):
"""Create dataset sibling at a GitLab site
A Git repository can be created at any location/path a given user has
appropriate permissions for. API access and authentication are implemented
via python-gitlab, and all its features are supported. A particular GitLab
site must be configured in a named section of a python-gitlab.cfg file
(see https://python-gitlab.readthedocs.io/en/stable/cli.html#configuration
for details), such as::
[mygit]
url = https://git.example.com
api_version = 4
private_token = abcdefghijklmnopqrst
Subsequently, this site is identified by its name ('mygit' in the example
above).
(Recursive) sibling creation for all, or a selected subset of subdatasets
is supported. Three different project layouts for nested datasets are
supported (see --layout):
"hierarchy"
Each dataset is placed into its own group, and the actual GitLab
project for a dataset is put in a project named "_repo_" inside
this group. Using this layout, arbitrarily deep hierarchies of
nested datasets can be represented, while the hierarchical structure
is reflected in the project path. This is the default layout, if
no project path is specified.
"flat"
All datasets are placed in the same group. The name of a project
is its relative path within the root dataset, with all path separator
characters replaced by '--'.
"collection"
This is a hybrid layout, where the root dataset is placed in a "_repo_"
project inside a group, and all nested subdatasets are represented
inside the group using a "flat" layout.
GitLab cannot host dataset content. However, in combination with
other data sources (and siblings), publishing a dataset to GitLab can
facilitate distribution and exchange, while still allowing any dataset
consumer to obtain actual data content from alternative sources.
*Configuration*
All configuration switches and options for GitLab sibling creation can
be provided arguments to the command. However, it is also possible to
specify a particular setup in a dataset's configuration. This is
particularly important when managing large collections of datasets.
Configuration options are:
"datalad.gitlab-default-site"
Name of the default GitLab site (see --site)
"datalad.gitlab-SITENAME-siblingname"
Name of the sibling configured for the local dataset that points
to the GitLab instance SITENAME (see --name)
"datalad.gitlab-SITENAME-layout"
Project layout used at the GitLab instance SITENAME (see --layout)
"datalad.gitlab-SITENAME-access"
Access method used for the GitLab instance SITENAME (see --access)
"datalad.gitlab-SITENAME-project"
Project location/path used for a datasets at GitLab instance
SITENAME (see --project). Configuring this is useful for deriving
project paths for subdatasets, relative to superdataset.
"""
_params_ = dict(
path=Parameter(
args=('path',),
metavar='PATH',
nargs='*',
doc="""selectively create siblings for any datasets underneath a given
path. By default only the root dataset is considered."""),
dataset=Parameter(
args=("--dataset", "-d",),
doc="""reference or root dataset. If no path constraints are given,
a sibling for this dataset will be created. In this and all other
cases, the reference dataset is also consulted for the GitLab
configuration, and desired project layout. If no dataset is given,
an attempt is made to identify the dataset based on the current
working directory""",
constraints=EnsureDataset() | EnsureNone()),
site=Parameter(
args=('--site',),
metavar='SITENAME',
doc="""name of the GitLab site to create a sibling at. Must match an
existing python-gitlab configuration section with location and
authentication settings (see
https://python-gitlab.readthedocs.io/en/stable/cli.html#configuration).
By default the dataset configuration is consulted.
""",
constraints=EnsureNone() | EnsureStr()),
project=Parameter(
args=('--project',),
metavar='NAME/LOCATION',
doc="""project path at the GitLab site. If a subdataset of the
reference dataset is processed, its project path is automatically
determined by the `layout` configuration, by default.
""",
constraints=EnsureNone() | EnsureStr()),
layout=Parameter(
args=('--layout',),
constraints=EnsureChoice(None, *known_layout_labels),
doc="""layout of projects at the GitLab site, if a collection, or
a hierarchy of datasets and subdatasets is to be created.
By default the dataset configuration is consulted.
"""),
recursive=recursion_flag,
recursion_limit=recursion_limit,
name=Parameter(
args=('-s', '--name',),
metavar='NAME',
doc="""name to represent the GitLab sibling remote in the local
dataset installation. If not specified a name is looked up in the
dataset configuration, or defaults to the `site` name""",
constraints=EnsureStr() | EnsureNone()),
existing=Parameter(
args=("--existing",),
constraints=EnsureChoice('skip', 'error', 'reconfigure'),
doc="""desired behavior when already existing or configured
siblings are discovered. 'skip': ignore; 'error': fail, if access
URLs differ; 'reconfigure': use the existing repository and
reconfigure the local dataset to use it as a sibling""",),
access=Parameter(
args=("--access",),
constraints=EnsureChoice(None, *known_access_labels),
doc="""access method used for data transfer to and from the sibling.
'ssh': read and write access used the SSH protocol; 'http': read and
write access use HTTP requests; 'ssh+http': read access is done via
HTTP and write access performed with SSH. Dataset configuration is
consulted for a default, 'http' is used otherwise.""",),
description=Parameter(
args=("--description",),
doc="""brief description for the GitLab project (displayed on the
site)""",
constraints=EnsureStr() | EnsureNone()),
publish_depends=publish_depends,
dryrun=Parameter(
args=("--dryrun",),
action="store_true",
doc="""If this flag is set, no communication with GitLab is
performed, and no repositories will be created. Instead
would-be repository names and configurations are reported for all
relevant datasets
"""),
)
@staticmethod
@datasetmethod(name='create_sibling_gitlab')
@eval_results
def __call__(
path=None,
site=None,
project=None,
layout=None,
dataset=None,
recursive=False,
recursion_limit=None,
name=None,
existing='error',
access=None,
publish_depends=None,
description=None,
dryrun=False):
path = resolve_path(assure_list(path), ds=dataset) \
if path else None
if project and (recursive or (path and len(path) > 1)):
raise ValueError(
'Providing a GitLab project name/location cannot be combined '
'with recursive operation or multiple paths, as each dataset '
'needs to be mapped onto its own individual project.')
# what to operate on
ds = require_dataset(
dataset, check_installed=True, purpose='create GitLab sibling(s)')
# cache for objects of gitlab sites (we could face different ones
# in a single hierarchy, cache them to avoid duplicate initialization
# while still being able to process each dataset individually
siteobjs = dict()
# which datasets to process?
if path is None:
for r in _proc_dataset(
ds, ds,
site, project, name, layout, existing, access,
dryrun, siteobjs, publish_depends, description):
yield r
if path or recursive:
# also include any matching subdatasets
for subds in ds.subdatasets(
path=path,
# we can only operate on present datasets
fulfilled=True,
recursive=recursive,
recursion_limit=recursion_limit,
contains=None,
bottomup=False,
result_xfm='datasets',
return_type='generator'):
for r in _proc_dataset(
ds, subds,
site, project, name, layout, existing, access,
dryrun, siteobjs, publish_depends, description):
yield r
return
def _proc_dataset(refds, ds, site, project, remotename, layout, existing,
access, dryrun, siteobjs, depends, description):
# basic result setup
res_kwargs = dict(
action='create_sibling_gitlab',
refds=refds.path,
path=ds.path,
type='dataset',
logger=lgr,
)
if description:
res_kwargs['description'] = description
if site is None:
# always try pulling the base config from a parent dataset
# even if paths were given (may be overwritten later)
basecfgsite = ds.config.get('datalad.gitlab-default-site', None)
# let the dataset config overwrite the target site, if none
# was given
site = refds.config.get(
'datalad.gitlab-default-site', basecfgsite) \
if site is None else site
if site is None:
# this means the most top-level dataset has no idea about
# gitlab, and no site was specified as an argument
# fail rather then give an error result, as this is very
# unlikely to be intentional
raise ValueError(
'No GitLab site was specified (--site) or configured '
'in {} (datalad.gitlab.default-site)'.format(ds))
res_kwargs['site'] = site
# determine target remote name, unless given
if remotename is None:
remotename_var = 'datalad.gitlab-{}-siblingname'.format(site)
remotename = ds.config.get(
remotename_var,
# use config from parent, if needed
refds.config.get(
remotename_var,
# fall back on site name, if nothing else can be used
site))
res_kwargs['sibling'] = remotename
# check against existing remotes
dremotes = {
r['name']: r
for r in ds.siblings(
action='query',
# fastest possible
get_annex_info=False,
recursive=False,
result_renderer='disabled')
}
if existing == 'skip' and remotename in dremotes:
# we have a conflict of target remote and the
# set of existing remotes
yield dict(
res_kwargs,
status='notneeded',
)
return
# TODO for existing == error, check against would be gitlab URL
# cannot be done in, needs an idea of the project path config
# and an API call to gitlab
if layout is None:
# figure out the layout of projects on the site
# use the reference dataset as default, and fall back
# on 'hierarchy' as the most generic method of representing
# the filesystem in a group/subgroup structure
layout_var = 'datalad.gitlab-{}-layout'.format(site)
layout = ds.config.get(
layout_var, refds.config.get(
layout_var, 'hierarchy'))
if layout not in known_layout_labels:
raise ValueError(
"Unknown site layout '{}' given or configured, "
"known ones are: {}".format(layout, known_layout_labels))
if access is None:
access_var = 'datalad.gitlab-{}-access'.format(site)
access = ds.config.get(
access_var, refds.config.get(
access_var, 'http'))
if access not in known_access_labels:
raise ValueError(
"Unknown site access '{}' given or configured, "
"known ones are: {}".format(access, known_access_labels))
project_var = 'datalad.gitlab-{}-project'.format(site)
process_root = refds == ds
if project is None:
# look for a specific config in the dataset
project = ds.config.get(project_var, None)
if project and process_root and layout == 'collection':
# the root of a collection
project = '{}/_repo_'.format(project)
elif project is None and not process_root:
# check if we can build one from the refds config
ref_project = refds.config.get(project_var, None)
if ref_project:
# layout-specific derivation of a path from
# the reference dataset configuration
rproject = ds.pathobj.relative_to(refds.pathobj).as_posix()
if layout == 'hierarchy':
project = '{}/{}/_repo_'.format(ref_project, rproject)
elif layout == 'collection':
project = '{}/{}'.format(
ref_project,
rproject.replace('/', '--'))
else:
project = '{}--{}'.format(
ref_project,
rproject.replace('/', '--'))
if project is None:
yield dict(
res_kwargs,
status='error',
message='No project path specified, and no configuration '
'to derive one',
)
return
res_kwargs['project'] = project
if dryrun:
# this is as far as we can get without talking to GitLab
yield dict(
res_kwargs,
status='ok',
dryrun=True,
)
return
# and now talk to GitLab for real
site_api = siteobjs[site] if site in siteobjs else GitLabSite(site)
site_project = site_api.get_project(project)
if site_project is None:
try:
site_project = site_api.create_project(project, description)
# report success
yield dict(
res_kwargs,
# relay all attributes
project_attributes=site_project,
status='ok',
)
except Exception as e:
yield dict(
res_kwargs,
# relay all attributes
status='error',
message=('Failed to create GitLab project: %s', exc_str(e))
)
return
else:
# there already is a project
if existing == 'error':
# be nice and only actually error if there is a real mismatch
if remotename not in dremotes:
yield dict(
res_kwargs,
project_attributes=site_project,
status='error',
message=(
"There is already a project at '%s' on site '%s', "
"but no sibling with name '%s' is configured, "
"maybe use --existing=reconfigure",
project, site, remotename,
)
)
return
elif access in ('ssh', 'ssh+http') \
and dremotes[remotename].get(
'url', None) != site_project.get(
# use False as a default so that there is a
# mismatch, complain if both are missing
'ssh_url_to_repo', False):
yield dict(
res_kwargs,
project_attributes=site_project,
status='error',
message=(
"There is already a project at '%s' on site '%s', "
"but SSH access URL '%s' does not match '%s', "
"maybe use --existing=reconfigure",
project, site,
dremotes[remotename].get('url', None),
site_project.get('ssh_url_to_repo', None)
)
)
return
elif access == 'http' \
and dremotes[remotename].get(
'url', None) != site_project.get(
# use False as a default so that there is a
# mismatch, veen if both are missing
'http_url_to_repo', False):
yield dict(
res_kwargs,
project_attributes=site_project,
status='error',
message=(
"There is already a project at '%s' on site '%s', "
"but HTTP access URL '%s' does not match '%s', "
"maybe use --existing=reconfigure",
project, site,
dremotes[remotename].get('url', None),
site_project.get('http_url_to_repo', None)
)
)
return
yield dict(
res_kwargs,
project_attributes=site_project,
status='notneeded',
message=(
"There is already a project at '%s' on site '%s'",
project, site,
)
)
# first make sure that annex doesn't touch this one
# but respect any existing config
ignore_var = 'remote.{}.annex-ignore'.format(remotename)
if ignore_var not in ds.config:
ds.config.add(ignore_var, 'true', where='local')
for res in ds.siblings(
'configure',
name=remotename,
url=site_project['http_url_to_repo']
if access in ('http', 'ssh+http')
else site_project['ssh_url_to_repo'],
pushurl=site_project['ssh_url_to_repo']
if access in ('ssh', 'ssh+http')
else None,
recursive=False,
publish_depends=depends,
result_renderer='disabled',
return_type='generator'):
yield res
class GitLabSite(object):
def __init__(self, site):
import gitlab
self.gitlab = gitlab
try:
self.site = gitlab.Gitlab.from_config(site)
except gitlab.config.GitlabDataError as e:
raise ValueError(
'{}, please configure access to this GitLab instance'.format(
str(e)))
def get_project(self, path):
try:
return self.site.projects.get(path).attributes
except self.gitlab.GitlabGetError as e:
lgr.debug("Project with path '%s' does not yet exist at site '%s'",
path, self.site.url)
return None
def create_project(self, path, description=None):
path_l = path.split('/')
namespace_id = self._obtain_namespace(path_l)
# check for options:
# https://gitlab.com/help/api/projects.md#create-project
props = dict(
name=path_l[-1],
namespace_id=namespace_id,
)
if description:
props['description'] = description
project = self.site.projects.create(props)
return project.attributes
def _obtain_namespace(self, path_l):
if len(path_l) == 1:
# no nesting whatsoever
return None
try:
namespace_id = self.site.groups.get(
'/'.join(path_l[:-1])).get_id()
lgr.debug("Found existing parent group '%s' with ID %s",
'/'.join(path_l[:-1]), namespace_id)
except self.gitlab.GitlabGetError as e:
try:
if len(path_l) > 2:
parent_group = self.site.groups.get(
'/'.join(path_l[:-2]))
else:
parent_group = None
except self.gitlab.GitlabGetError as e:
raise ValueError(
"No parent group {} for project {} found, "
"and a group {} also does not exist. At most one "
"parent group would be created.".format(
'/'.join(path_l[:-1]),
'/'.join(path_l),
'/'.join(path_l[:-2]),
))
# create the group for the target project
try:
# prevent failure due to specification of a users personal
# group, always exists, cannot and must not be created
self.site.auth()
if len(path_l) == 2 \
and path_l[0] == self.site.user.attributes.get(
'username', None):
# attempt to create a personal project in the users
# top-level personal group-- this is the same as
# having no parent namespace, don't attempt to
# create the group
return None
namespace_id = self.site.groups.create(dict(
name=path_l[-2],
path=path_l[-2],
parent_id=parent_group.get_id() if parent_group else None)
).get_id()
except self.gitlab.GitlabCreateError as e:
raise RuntimeError(
"Failed to create parent group '{}' under {}: {}".format(
path_l[-2],
repr(parent_group.attributes['full_path'])
if parent_group else 'the account root',
str(e)),
)
return namespace_id