-
Notifications
You must be signed in to change notification settings - Fork 5
/
model.py
551 lines (445 loc) · 14.3 KB
/
model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
"""
Contains the CLI data model
"""
import abc
import enum
import itertools
import re
import typing
import unicodedata
from abc import abstractmethod
import spacy
from dataclasses import InitVar, dataclass, field
from ruamel.yaml import YAML, yaml_object
from spacy import tokens
from word2number import w2n
from acclimatise import cli_types
from acclimatise.name_generation import generate_name, segment_string
from acclimatise.nlp import wordsegment
from acclimatise.yaml import yaml
def useless_name(name: typing.List[str]):
"""
Returns true if this name (sequence of strings) shouldn't be used as a variable name because it's too short and
uninformative. This includes an entirely numeric name, which is almost never what you want
"""
joined = "".join(name)
if len(name) < 1 or len(joined) <= 1 or joined.isnumeric():
return True
# Numeric names are not useful
try:
if all([w2n.word_to_num(tok) is not None for tok in name]):
return True
except Exception:
pass
return False
@yaml_object(yaml)
@dataclass
class Command:
"""
Class representing an entire command or subcommand, e.g. `bwa mem` or `grep`
"""
def __post_init__(self):
# Store certain special flags in their own fields
if self.help_flag is None:
for flag in self.named:
if (
"--help" in flag.synonyms
or "-help" in flag.synonyms
or ("-h" in flag.synonyms and isinstance(flag.args, EmptyFlagArg))
):
self.help_flag = flag
self.named.remove(flag)
if self.version_flag is None:
for flag in self.named:
if "--version" in flag.synonyms:
self.version_flag = flag
self.named.remove(flag)
if self.usage_flag is None:
for flag in self.named:
if "--usage" in flag.synonyms:
self.usage_flag = flag
self.named.remove(flag)
@property
def as_filename(self) -> str:
"""
Returns a sample filename that might be used to store this command (without a suffix)
"""
return "_".join(self.command).replace("-", "_")
@property
def depth(self) -> int:
"""
Returns the "depth" of this command, aka how many ancestors it has. An orphan command has depth 0, a subcommand
has depth 1, a sub-sub-command has depth 2 etc
"""
cmd = self
depth = 0
while cmd.parent is not None:
cmd = cmd.parent
depth += 1
return depth
def command_tree(self) -> typing.Generator["Command", None, None]:
"""
Returns a generator over the entire command tree. e.g. if this command has 2 subcommands, each with 2
subcommands, this will return a generator with 7 Commands
"""
yield self
for command in self.subcommands:
yield from command.command_tree()
positional: typing.List["Positional"]
"""
All positional arguments supported by this command
"""
named: typing.List["Flag"]
"""
All named arguments (flags) supported by this command
"""
command: typing.List[str]
"""
The command line used to invoke this command, e.g. ["bwa", "mem"]
"""
parent: typing.Optional["Command"] = None
"""
The parent command, if this is a subcommand
"""
subcommands: typing.List["Command"] = field(default_factory=list)
"""
A list of subcommands of this command, e.g. "bwa" has the subcommand "bwa mem"
"""
help_flag: typing.Optional["Flag"] = None
"""
If identified, this is the flag that returns help text
"""
usage_flag: typing.Optional["Flag"] = None
"""
If identified, this is the flag that returns usage examples
"""
version_flag: typing.Optional["Flag"] = None
"""
If identified, this is the flag that returns the version of the executable
"""
help_text: typing.Optional[str] = None
"""
Optionally, the entire help text that was used to generate this Command
"""
generated_using: typing.Optional[str] = None
"""
Optionally, the flag that was used to generate this command. Often this will be the same as the help_flag
"""
@yaml_object(yaml)
@dataclass(unsafe_hash=True)
class CliArgument:
"""
A generic parent class for both named and positional CLI arguments
"""
description: str
"""
Description of the function of this argument
"""
@abstractmethod
def argument_name(self) -> typing.List[str]:
return []
@abstractmethod
def full_name(self) -> str:
"""
Return a human-readable representation of this argument
"""
pass
@abstractmethod
def get_type(self) -> cli_types.CliType:
"""
Return a type object indicating the type of data this argument holds. e.g. If it's an array type this will be a
CliList.
"""
pass
@yaml_object(yaml)
@dataclass
class Positional(CliArgument):
"""
A positional command-line argument. This probably means that it is required, and has no arguments like flags do
"""
def full_name(self) -> str:
"""
Getting the full name for a positional argument is easy - it's just the parameter name
"""
return self.name
position: int
"""
The position in the command line that this argument must occupy
"""
name: str
"""
The name of this argument
"""
description: str
"""
A description of the function of this argument
"""
optional: bool = False
"""
If true, this argument is not required
"""
def get_type(self) -> cli_types.CliType:
# Try the the flag name, then the description in that order
name_type = infer_type(self.name)
if name_type is not None:
return name_type
flag_type = infer_type(self.full_name())
if flag_type is not None:
return flag_type
return cli_types.CliString()
@yaml_object(yaml)
@dataclass(unsafe_hash=True)
class Flag(CliArgument):
"""
Represents one single flag, with all synonyms for it, and all arguments, e.g. `-h, --help`
"""
synonyms: typing.List[str]
"""
A list of different ways to invoke this same option, e.g. ``-v`` and ``--verbose``
"""
description: typing.Optional[str]
"""
A description of the function of this flag
"""
args: "FlagArg"
"""
Describes the arguments to this flag, e.g. ``-n 1`` has a single numeric argument
"""
optional: bool = True
"""
If true, this flag is not required (the default)
"""
def argument_text(self) -> typing.List[str]:
return self.args.text()
def variable_name(
self, description_name: typing.List[str] = []
) -> typing.List[str]:
"""
Returns a list of words that should be used in a variable name for this argument
"""
# The super method returns the best name from the flag name or the description. If neither is sufficient, use
# the argument to generate a name
best = super().variable_name(description_name)
if useless_name(best):
nfa = list(self._name_from_arg)
if not useless_name(nfa):
return nfa
return best
@property
def _name_from_arg(self) -> typing.Iterable[str]:
"""
Generate a 1-3 word variable name for this flag, by parsing the description
"""
if self.args is not None and hasattr(self.args, "name"):
return segment_string(self.args.name)
else:
return []
def get_type(self) -> cli_types.CliType:
# Try the argument name, then the flag name, then the description in that order
arg_type = self.args.get_type()
if arg_type is not None:
return arg_type
flag_type = infer_type(self.full_name())
if flag_type is not None:
return flag_type
description_type = infer_type(self.description)
if description_type is not None:
return description_type
return cli_types.CliString()
def full_name(self) -> str:
"""
Getting the full name for a named flag is slightly harder, we need to find the longest synonym
"""
return self.longest_synonym
@staticmethod
def from_synonyms(
synonyms: typing.Iterable["FlagSynonym"], description: typing.Optional[str]
):
"""
Creates a usable Flag object by combining the synonyms provided
"""
synonym_str = []
args = None
arg_count = float("-inf")
for synonym in synonyms:
synonym_str.append(synonym.name)
if synonym.argtype.num_args() > arg_count:
arg_count = synonym.argtype.num_args()
args = synonym.argtype
return Flag(synonyms=synonym_str, args=args, description=description)
@property
def longest_synonym(self) -> str:
"""
Returns the longest synonym this flag has. e.g. for `-h, --help`, it will return `--help`
"""
return max(self.synonyms, key=lambda synonym: len(synonym))
@property
def shortest_synonym(self) -> str:
"""
Returns the shortest synonym this flag has. e.g. for `-h, --help`, it will return `-h`
"""
return min(self.synonyms, key=lambda synonym: len(synonym))
@yaml_object(yaml)
@dataclass
class FlagSynonym:
"""
Internal class for storing the arguments for a single synonym
"""
name: str
"""
The entire flag string, e.g. "-n" or "--lines"
"""
argtype: "FlagArg"
"""
The number and type of arguments that this flag takes
"""
@property
def capital(self):
return "".join(
[
segment.capitalize()
for segment in re.split("[-_]", self.name.lstrip("-"))
]
)
int_re = re.compile("(int(eger)?)|size|length|max|min", flags=re.IGNORECASE)
str_re = re.compile("str(ing)?", flags=re.IGNORECASE)
float_re = re.compile("float|decimal", flags=re.IGNORECASE)
bool_re = re.compile("bool(ean)?", flags=re.IGNORECASE)
file_re = re.compile("file|path", flags=re.IGNORECASE)
dir_re = re.compile("folder|directory", flags=re.IGNORECASE)
def infer_type(string) -> typing.Optional[cli_types.CliType]:
"""
Reads a string (argument description etc) to find hints about what type this argument might be. This is
generally called by the get_type() methods
"""
if bool_re.match(string):
return cli_types.CliBoolean()
elif float_re.match(string):
return cli_types.CliFloat()
elif int_re.match(string):
return cli_types.CliInteger()
elif file_re.match(string):
return cli_types.CliFile()
elif dir_re.match(string):
return cli_types.CliDir()
elif str_re.match(string):
return cli_types.CliString()
else:
return cli_types.CliString()
@yaml_object(yaml)
@dataclass
class FlagArg(abc.ABC):
"""
The data model for the argument or arguments for a flag, for example a flag might have no arguments, it might have
one argument, it might accept one option from a list of options, or it might accept an arbitrary number of inputs
"""
def text(self) -> typing.List[str]:
"""
Returns the text of the argument, e.g. for name generation purposes
"""
return []
@abc.abstractmethod
def get_type(self) -> cli_types.CliType:
"""
Return a type object indicating the type of data this argument holds. e.g. If it's an array type this will be a
CliList.
"""
pass
@abc.abstractmethod
def num_args(self) -> int:
"""
Calculate the multiplicity of this argument
"""
pass
@yaml_object(yaml)
@dataclass
class EmptyFlagArg(FlagArg):
"""
A flag that has no arguments, e.g. `--quiet` that is either present or not present
"""
def num_args(self) -> int:
return 0
def get_type(self):
return cli_types.CliBoolean()
@yaml_object(yaml)
@dataclass
class OptionalFlagArg(FlagArg):
"""
When the flag has multiple arguments, some of which are optional, e.g.
-I FLOAT[,FLOAT[,INT[,INT]]]
"""
names: list
"""
Names of each argument
"""
separator: str
"""
Separator between each argument
"""
def text(self) -> typing.List[str]:
return list(
itertools.chain.from_iterable(
[wordsegment.segment(name) for name in self.names]
)
)
def num_args(self) -> int:
return len(self.names)
def get_type(self):
return cli_types.CliTuple([infer_type(arg) for arg in self.names])
@yaml_object(yaml)
@dataclass
class SimpleFlagArg(FlagArg):
"""
When a flag has one single argument, e.g. `-e PATTERN`, where PATTERN is the argument
"""
name: str
"""
Name of this argument
"""
def text(self) -> typing.List[str]:
return list(wordsegment.segment(self.name))
def num_args(self) -> int:
return 1
def get_type(self):
return infer_type(self.name)
@yaml_object(yaml)
@dataclass
class RepeatFlagArg(FlagArg):
"""
When a flag accepts 1 or more arguments, e.g. `--samout SAMOUTS [SAMOUTS ...]`
"""
name: str
"""
The name of this argument
"""
def text(self) -> typing.List[str]:
return list(wordsegment.segment(self.name))
def num_args(self) -> int:
return 1
def get_type(self):
t = infer_type(self.name)
return cli_types.CliList(t)
@yaml_object(yaml)
@dataclass
class ChoiceFlagArg(FlagArg):
"""
When a flag accepts one option from a list of options, e.g. `-s {yes,no,reverse}`
"""
choices: typing.Set[str]
"""
Set of possible choices that could be used for this argument
"""
def text(self) -> typing.List[str]:
return list(
itertools.chain.from_iterable(
[wordsegment.segment(name) for name in self.choices]
)
)
def get_type(self):
e = enum.Enum(
"".join([choice.capitalize() for choice in self.choices]),
list(self.choices),
)
return cli_types.CliEnum(e)
def num_args(self) -> int:
return 1