This repository has been archived by the owner on Nov 29, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 30
/
__init__.py
692 lines (548 loc) · 24.9 KB
/
__init__.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
'''Color your output to terminal'''
import re
import sys
import time
__version__ = '0.10.6'
# Color types, their color codes if it's style, their default reset codes, and
# RegEx's for detecting their color type.
COLOR_TYPES = {
'fg': {
'reset': b'\x1b[39m',
're': re.compile(br'\x1b\[(?:3[0-79]|9[0-7]|38;[0-9;]+)m')
},
'bg': {
'reset': b'\x1b[49m',
're': re.compile(br'\x1b\[(?:4[0-79]|10[0-7]|48;[0-9;]+)m')
},
'blink': {
'code': b'\x1b[5m',
'reset': b'\x1b[25m',
're': re.compile(br'\x1b\[2?5m')
},
'bold': {
'code': b'\x1b[1m',
'reset': b'\x1b[22m', # Normal intensity
're': re.compile(br'\x1b\[(?:1|2?2)m') # Any intensity type
},
'invert': {
'code': b'\x1b[7m',
'reset': b'\x1b[27m',
're': re.compile(br'\x1b\[2?7m')
},
'italic': {
'code': b'\x1b[3m',
'reset': b'\x1b[23m',
're': re.compile(br'\x1b\[2?3m')
},
'strike': {
'code': b'\x1b[9m',
'reset': b'\x1b[29m',
're': re.compile(br'\x1b\[2?9m')
},
'underline': {
'code': b'\x1b[4m',
'reset': b'\x1b[24m',
're': re.compile(br'\x1b\[2?4m')
}
}
# The format of a palette color
PALETTE_COLOR_RE = re.compile(r'\b([bf])\.([a-z0-9-_]+)\b')
# Select Graphic Rendition sequence (any type or only colors)
SGR_RE = re.compile(br'\x1b\x5b[\x30-\x3f]*[\x20-\x2f]*\x6d')
SGR_COLOR_RE = re.compile(br'\x1b\x5b[0-9;]*\x6d')
class Color:
'''A color and its ANSI escape codes.'''
# pylint: disable=too-many-instance-attributes
def __init__(self, color, palette=None, rgb=None):
'''Constructor.
Args:
color (str): A string which must contain:
* one foreground color (hex color `f#` or palette color `f.`),
* one background color (hex color `b#` or palette color `b.`),
* at least one style (blink, bold, invert, italic, strike,
underline), or
* a combination of the above, seperated by spaces.
Example: `"b#123123 f.status-1 bold"`
palette (chromaterm.Palette): Palette to resolve palette colors.
rgb (bool): Whether the color is meant for RGB-enabled terminals or
not. `False` will downscale the RGB colors to xterm-256. `None`
will detect support for RGB and fallback to xterm-256.
Raises:
TypeError: If `color` is not a string. If `rgb` is not a boolean.
ValueError: If the format of `color` is invalid. If palette color is
used with `palette=None`.
'''
self.palette = palette
self.rgb = rgb
self.color = color
@property
def color(self):
'''String that represents the color.'''
return self._color
@color.setter
def color(self, value):
if not isinstance(value, str):
raise TypeError('color must be a string')
color = value = value.lower().strip()
styles = tuple(k for k, v in COLOR_TYPES.items() if v.get('code'))
color_re = r'(([bf]#[0-9a-f]{6}|' + '|'.join(styles) + r')(\s+|$))+'
color_code = color_reset = b''
color_types = []
if PALETTE_COLOR_RE.search(value):
if not self.palette:
raise ValueError(
'palette color name present, but no palette specified')
value = self.palette.resolve(value)
if not re.fullmatch(color_re, value):
raise ValueError(f'invalid color format {repr(value)}')
# Colors
for target, hex_code in re.findall(r'([bf])#([0-9a-f]{6})', value):
if target == 'f':
target, color_type = b'\x1b[38;', 'fg'
else:
target, color_type = b'\x1b[48;', 'bg'
if color_type in [x[0] for x in color_types]:
raise ValueError('color accepts one foreground and one '
'background colors')
# Break down hex color to RGB integers
rgb_int = [int(hex_code[i:i + 2], 16) for i in (0, 2, 4)]
if self.rgb:
target += b'2;'
color_id = b'%d;%d;%d' % tuple(rgb_int)
else:
target += b'5;'
color_id = b'%d' % self.rgb_to_xterm256(*rgb_int)
color_code = color_code + target + color_id + b'm'
color_reset = COLOR_TYPES[color_type]['reset'] + color_reset
color_types.append((color_type, target + color_id + b'm'))
# Styles
for style in dict.fromkeys(re.findall('|'.join(styles), value)):
color_code = color_code + COLOR_TYPES[style]['code']
color_reset = COLOR_TYPES[style]['reset'] + color_reset
color_types.append((style, COLOR_TYPES[style]['code']))
self._color = ' '.join(dict.fromkeys(color.split()))
self.color_code = color_code
self.color_reset = color_reset
self.color_types = color_types
@property
def rgb(self):
'''Flag for RGB-support. When changed, updates `self.color`.'''
return self._rgb
@rgb.setter
def rgb(self, value):
if value is not None and not isinstance(value, bool):
raise TypeError('rgb must be a boolean')
self._rgb = value
# Update the color if present; it won't be during __init__
if hasattr(self, '_color'):
self.color = self._color
@staticmethod
def decode_sgr(source_color_code, is_reset=False):
'''Decodes an SGR, splitting it into a list of colors, each being a list
containing color code (bytes), is reset (bool), and color type (bytes)
which corresponds to `COLOR_TYPES`.
Args:
source_color_code (bytes): Bytes to be split into individual colors.
is_reset (bool): Consider all identified colors as resets.
'''
# Includes non-color characters; don't touch it
if not SGR_COLOR_RE.search(source_color_code):
return [[source_color_code, False, None]]
make_sgr = lambda code_id: b'\x1b[' + code_id + b'm'
colors = []
codes = source_color_code.lstrip(b'\x1b[').rstrip(b'm').split(b';')
skip = 0
for index, code in enumerate(codes):
# Code processed by an index look-ahead; skip it
if skip:
skip -= 1
continue
# Full reset
if code == b'' or int(code) == 0:
colors.append([make_sgr(b'0'), True, None])
# Multi-code SGR
elif code in (b'38', b'48'):
color_type = 'fg' if code == b'38' else 'bg'
# xterm-256
if len(codes) > index + 2 and codes[index + 1] == b'5':
skip = 2
code = b';'.join(codes[index:index + 3])
# RGB
elif len(codes) > index + 4 and codes[index + 1] == b'2':
skip = 4
code = b';'.join(codes[index:index + 5])
# Does not conform to format; do not touch code
else:
return [[source_color_code, False, None]]
colors.append([make_sgr(code), is_reset, color_type])
# Single-code SGR
else:
color = [make_sgr(b'%d' % int(code)), False, None]
for name, color_type in COLOR_TYPES.items():
if color_type['re'].search(color[0]):
color[1] = is_reset or color[0] == color_type['reset']
color[2] = name
# Types don't overlap; only one can match
break
colors.append(color)
return colors
@staticmethod
def rgb_to_xterm256(_r, _g, _b):
'''Downscale from 24-bit RGB to xterm-256.'''
def index(value, steps):
'''Returns index of the step closest to value.'''
return steps.index(min(steps, key=lambda x: abs(x - value)))
def distance(new_r, new_g, new_b):
'''Magnify the differences (like stdev, but avg/sqrt not needed).'''
return (new_r - _r)**2 + (new_g - _g)**2 + (new_b - _b)**2
# Steps between 2 shades https://www.ditig.com/256-colors-cheat-sheet,
# the index of the closest step, and the distance to the input color
rgb_steps = (0, 95, 135, 175, 215, 255)
rgb_index = [index(x, rgb_steps) for x in (_r, _g, _b)]
rgb_distance = distance(*[rgb_steps[x] for x in rgb_index])
gray_steps = tuple(range(8, 239, 10))
gray_index = index((_r + _g + _b) // 3, gray_steps)
gray_distance = distance(*[gray_steps[gray_index]] * 3)
if gray_distance < rgb_distance:
return 232 + gray_index
return 16 + (36 * rgb_index[0]) + (6 * rgb_index[1]) + rgb_index[2]
@staticmethod
def strip_colors(data):
'''Returns data after stripping the existing colors and a list of inserts
containing the stripped colors. The format of the insert is that of
`Config.get_inserts`.
Args:
data (bytes): Bytes from which the colors should be stripped.
'''
inserts = []
match = SGR_RE.search(data)
while match:
start, end = match.span()
# Existing colors are marked as resets to indicate that they can be
# updated if ChromaTerm already matched the same data
for color in Color.decode_sgr(match.group(), is_reset=True):
color.insert(0, start)
inserts.insert(0, color)
# Next color's start index ignores length of this color
data = data[:start] + data[end:]
match = SGR_RE.search(data)
return data, inserts
class Palette:
'''A color palette that maps names to RGB hex codes.'''
def __init__(self):
'''Constructor.'''
self.colors = {}
def add_color(self, name, value):
'''Adds a color to the palette.
Args:
name (str): The color name to be referenced. Accepts `[a-z0-9-_]+`.
value (str): A hex color, like `#123abc`.
Raises:
ValueError: If `name` is reserved, already exists, or uses invalid
characters. If `value` uses invalid characters.
TypeError: If `name` or `value` are not strings.
'''
if not isinstance(name, str):
raise TypeError('color name must be a string')
if not isinstance(value, str):
raise TypeError('color value must be a string')
name = name.lower().strip()
value = value.lower().strip()
if name in COLOR_TYPES:
raise ValueError('color name is reserved')
if name in self.colors:
raise ValueError('a color with the same name already exists')
if not re.fullmatch(r'[a-z0-9-_]+', name):
raise ValueError('name accepts alphanumerics, dashes, and '
'underscores only')
if not re.fullmatch(r'#[0-9a-f]{6}', value):
raise ValueError('palette color must be in `#123abc` format')
self.colors[name] = value
def resolve(self, color):
'''Returns `color` after resolving palette colors to their appropriate
values (e.g. `b.color_name` to `b#123abc`).
Args:
color (str): the string that describes a color.
Raises:
TypeError: If `color` is not a string.
ValueError: If a color is not found in the palette.
'''
if not isinstance(color, str):
raise TypeError('color must be a string')
color = color.lower().strip()
for match in reversed(list(PALETTE_COLOR_RE.finditer(color))):
start, end = match.span()
target = match.group(1)
name = match.group(2)
if name not in self.colors:
raise ValueError(f'color {repr(name)} not in palette')
color = color[:start] + f'{target}{self.colors[name]}' + color[end:]
return color
class Rule:
'''A rule containing a regex and colors corresponding to the regex's groups.'''
# pylint: disable=import-outside-toplevel,too-many-arguments,too-many-instance-attributes
def __init__(self,
regex,
color=None,
description=None,
exclusive=False,
pcre=False):
'''Constructor.
Args:
regex (str): Regular expression for getting matches in data.
color (chromaterm.Color, dict): Color used to highlight the entire
match. Can be a dictionary of {group: color} format.
description (str): String to help identify the rule.
exclusive (bool): Whether other rules should overlap with this one.
pcre (bool): Whether to use PCRE2 or default to Python's RE.
Raises:
TypeError: If `color` is not an instance of `Color` or not `None`.
'''
if description is not None and not isinstance(description, str):
raise TypeError('description must be a string')
if not isinstance(exclusive, bool):
raise TypeError('exclusive must be a boolean')
self.colors = {}
self.description = description
self.exclusive = exclusive
self.pcre = pcre
self.regex = regex
if not isinstance(color, dict):
color = {0: color}
for group, value in color.items():
self.set_color(value, group)
@property
def color(self):
'''Color used for highlight the full match (group 0) of regex.'''
return self.colors.get(0)
@color.setter
def color(self, value):
self.set_color(value)
@property
def pcre(self):
'''True when the PCRE engine is used. False means Python's RE.'''
return self._pcre
@pcre.setter
def pcre(self, value):
if not isinstance(value, bool):
raise TypeError('pcre must be a boolean')
self._pcre = value
# Recompile the regex if present; it won't be during __init__
if hasattr(self, '_regex'):
self.regex = self._regex
@property
def regex(self):
'''The regex pattern.'''
return self._regex
@regex.setter
def regex(self, value):
if not isinstance(value, str):
raise TypeError('regex must be a string')
self._regex = value
if self.pcre:
import chromaterm.pcre
self._regex_object = chromaterm.pcre.Pattern(value.encode())
else:
self._regex_object = re.compile(value.encode())
def get_matches(self, data):
'''Returns a list of tuples, each containing a start index, an end index,
and the `Color` object for that match.
Args:
data (bytes): Bytes to match regex against.
'''
matches = []
for match in self._regex_object.finditer(data):
for group in self.colors:
start, end = match.span(group)
# Ignore zero-length matches, like unmatched optional groups
# New lines in data can only come from exclusive rules
if start != end and b'\n' not in data[start:end]:
matches.append((start, end, self.colors[group]))
return matches
def set_color(self, color, group=0):
'''Sets a color to be used when highlighting. The group can be used to
limit the parts of the match which are highlighted. Group 0 (the default)
will highlight the entire match. If a color already exists for the group,
it is overwritten. If `color` is None, the color of `group` is cleared.
Args:
color (chromaterm.Color): A color for highlighting the matched input.
group (int, str): The regex group to be be highlighted with the color.
Raises:
TypeError: If `color` is not an instance of `Color` or None. If
`group` is not an integer or string.
ValueError: If `group` does not exist in the regular expression.
'''
if not isinstance(group, (int, str)):
raise TypeError('group must be an integer or a string')
if isinstance(group, str):
if not self._regex_object.groupindex.get(group):
raise ValueError(f'named group {repr(group)} not in regex')
# Resolve the named group to its index
group = self._regex_object.groupindex[group]
if color is None:
self.colors.pop(group, None)
return
if not isinstance(color, Color):
raise TypeError('color must be a chromaterm.Color')
if group > self._regex_object.groups:
raise ValueError(f'regex has {self._regex_object.groups} group(s);'
f' {group} is invalid')
self.colors[group] = color
# Sort by group number to ensure deterministic highlighting
self.colors = {k: self.colors[k] for k in sorted(self.colors)}
class Config:
'''An aggregation of multiple rules which highlights by performing the regex
matching of the rules before any colors are added.'''
def __init__(self, benchmark=False):
'''Constructor.
Args:
Benchmark (bool): Measure usage (duration, match count) of the rules.
'''
self._reset_codes = {k: v['reset'] for k, v in COLOR_TYPES.items()}
self.benchmark = benchmark
self.benchmark_results = {}
self.rules = []
@staticmethod
def get_insert_index(start, end, inserts):
'''Returns a tuple containing the start and end indices for where they
should be inserted into the inserts list in order to maintain the
position-based descending (reverse) order.
Args:
start (int): The start position of a match.
end (int): The end position of a match.
inserts (list): A list of inserts, where the first item of each insert
is the position.
'''
start_index = end_index = None
index = -1
# Arrange the inserts in reverse order (index magic over data)
for index, (position, _, _, _) in enumerate(inserts):
if start_index is None and start >= position:
start_index = index
# In the case of overlapping matches, other colors exist between
# the start and the end, so the end index needs to be located
# independently
if end_index is None and end > position:
end_index = index
if start_index is not None and end_index is not None:
return start_index, end_index
# If an index wasn't found, then it belongs at the end of the list
if start_index is None:
start_index = index + 1
if end_index is None:
end_index = index + 1
return start_index, end_index
def get_inserts(self, data, inserts):
'''Returns a list containing the inserts for the color codes relative to
data. An insert is a list containing a position (index relative to data),
the code to be inserted, a boolean indicating if its a reset code or not,
and The color type which corresponds to COLOR_TYPES or `None` if it's a
full SGR reset.
The list of inserts is ordered in descending order based on the position
of each insert relative to the data. This makes them easy to insert into
data without calculating index offsets.
Args:
data (bytes): Bytes from which the inserts are gathered.
inserts (list): Any pre-existing inserts to be added to it.
'''
# A lot of the code here is difficult to comprehend directly, because the
# intent might not be immediately visible. You may find it easier to take
# a test-driven approach by looking at the test_config_highlight_* tests
for start, end, color in self.get_matches(data):
start_index, end_index = self.get_insert_index(start, end, inserts)
# Each color type requires tracking of its respective type
for color_type, color_code in color.color_types:
# Find the last color before the end of this match (if any) and
# use it as the reset code for this color
for insert in inserts[end_index:]:
if insert[3] == color_type:
reset = insert[1]
break
# No type (a full reset); use the default for this type
if insert[2] and insert[3] is None:
reset = COLOR_TYPES[color_type]['reset']
break
else:
reset = self._reset_codes[color_type]
start_insert = [start, color_code, False, color_type]
end_insert = [end, reset, True, color_type]
# Replace every color reset of the current color type with our
# color code to prevent them from interrupting this color
for insert in inserts[end_index:start_index]:
if insert[2] and insert[3] in (color_type, None):
# A full reset is moved forward to our reset (replaced)
if insert[3] is None:
end_insert[1:4] = insert[1:4]
insert[1:4] = color_code, False, color_type
# Relative to data, the inserts are added in reverse order LI-FO
inserts.insert(start_index, start_insert)
inserts.insert(end_index, end_insert)
# Advance to ensure the slices above search appropriately if
# multiple color types exist
start_index += 1
end_index += 1
return inserts
def get_matches(self, data):
'''Returns a list of tuples, each of which containing a start index, an
end index, and the `Color` object for that match. The tuples of the
latter rules are towards the end of the list.
Args:
data (bytes): Bytes against which each rule is matched.
'''
matches = []
for rule in self.rules:
if self.benchmark:
duration, count = self.benchmark_results.get(rule, (0, 0))
checkpoint = time.perf_counter()
rule_matches = rule.get_matches(data)
duration += time.perf_counter() - checkpoint
count += len(rule_matches)
self.benchmark_results[rule] = (duration, count)
else:
rule_matches = rule.get_matches(data)
# If overlap is not allowed, replace any matches with \n to prevent
# overlap while maintaining correct indices on other rules' matches
if rule.exclusive:
for start, end, _ in rule_matches:
data = data[:start] + b'\n' * (end - start) + data[end:]
matches += rule_matches
return matches
def highlight(self, data):
'''Returns a highlighted bytes of `data`. The matches from the rules
are gathered prior to inserting any color codes, making it so the rules
can match without the color codes interfering.
Args:
data (bytes): Bytes to highlight.
'''
data, inserts = Color.strip_colors(data)
inserts = self.get_inserts(data, inserts)
resets_to_update = list(self._reset_codes)
for position, color_code, is_reset, color_type in inserts:
data = data[:position] + color_code + data[position:]
if resets_to_update:
# A full reset; default the remaining resets
if color_type is None and is_reset:
for key in resets_to_update:
self._reset_codes[key] = COLOR_TYPES[key]['reset']
resets_to_update = []
elif color_type in resets_to_update:
self._reset_codes[color_type] = color_code
resets_to_update.remove(color_type)
return data
def print_benchmark_results(self, descending=True, file=sys.stderr):
'''Prints the benchmark results, sorted by time spent.
Args:
descending (bool): Ordering of the results.
file (object): The file to which the results are printed.
'''
total = sum(x[0] for x in self.benchmark_results.values())
if self.benchmark_results:
print('Benchmark results (time spent, match count):', file=file)
for rule, (duration, count) in sorted(self.benchmark_results.items(),
key=lambda x: x[1],
reverse=descending):
print(
f'{duration / total:^7.2%} {duration:.3f}s {count:<7} '
f'{rule.description or repr(rule.regex[:30])}',
file=file,
)