/
logging.py
465 lines (393 loc) · 22.7 KB
/
logging.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
from __future__ import (
absolute_import,
unicode_literals,
)
import logging
import logging.handlers
import socket
from typing import (
Any,
Dict,
Iterable,
List,
Mapping,
Optional,
Tuple,
Type,
TypeVar,
Union,
cast,
)
import six
from pysoa.common.compatibility import ContextVar
from pysoa.common.types import Context
__all__ = (
'PySOALogContextFilter',
'RecursivelyCensoredDictWrapper',
'SyslogHandler',
)
class PySOALogContextFilter(logging.Filter):
def __init__(self): # type: () -> None
super(PySOALogContextFilter, self).__init__('')
def filter(self, record): # type: (logging.LogRecord) -> bool
context = self.get_logging_request_context()
if context:
setattr(record, 'correlation_id', context.get('correlation_id') or '--')
setattr(record, 'request_id', context.get('request_id') or '--')
else:
setattr(record, 'correlation_id', '--')
setattr(record, 'request_id', '--')
setattr(record, 'service_name', self._service_name or 'unknown')
setattr(record, 'action_name', self.get_logging_action_name() or '(n/a)')
return True
_context_stack = ContextVar('logging_context_stack', default=None) # type: ContextVar[Optional[List[Context]]]
_action_stack = ContextVar('logging_action_stack', default=None) # type: ContextVar[Optional[List[six.text_type]]]
_service_name = None
@classmethod
def set_logging_request_context(cls, **context): # type: (Any) -> None
value = cls._context_stack.get()
if not value:
value = []
cls._context_stack.set(value)
value.append(context)
@classmethod
def clear_logging_request_context(cls): # type: () -> None
value = cls._context_stack.get()
if value:
value.pop()
@classmethod
def get_logging_request_context(cls): # type: () -> Optional[Context]
value = cls._context_stack.get()
if value:
return value[-1]
return None
@classmethod
def set_logging_action_name(cls, action_name): # type: (six.text_type) -> None
value = cls._action_stack.get()
if not value:
value = []
cls._action_stack.set(value)
value.append(action_name)
@classmethod
def clear_logging_action_name(cls): # type: () -> None
value = cls._action_stack.get()
if value:
value.pop()
@classmethod
def get_logging_action_name(cls): # type: () -> Optional[six.text_type]
value = cls._action_stack.get()
if value:
return value[-1]
return None
@classmethod
def set_service_name(cls, service_name): # type: (six.text_type) -> None
cls._service_name = service_name
_VT = TypeVar('_VT')
class RecursivelyCensoredDictWrapper(object):
# Some performance info about this set
#
# In [1]: %timeit 'password' in SENSITIVE_FIELDS
# The slowest run took 16.69 times longer than the fastest. This could mean that an intermediate result is being
# cached.
# 10000000 loops, best of 3: 129 ns per loop
#
# In [2]: %timeit 'nope' in SENSITIVE_FIELDS
# 10000000 loops, best of 3: 131 ns per loop
#
# In [3]: %timeit random_string()
# 100000 loops, best of 3: 10.4 us per loop
#
# In [4]: %timeit random_string() in SENSITIVE_FIELDS
# 100000 loops, best of 3: 10.7 us per loop
#
# Suffice it to say, a lookup against this set should always take less than 250 ns.
SENSITIVE_FIELDS = frozenset({
# Authentication and related
'password', 'pass', 'passwd', 'passphrase', 'pass_phrase', 'pass-phrase', 'passPhrase',
'passwords', 'passwds', 'passphrases', 'pass_phrases', 'pass-phrases', 'passPhrases',
'private', 'private_key', 'private-key', 'privateKey',
'privates', 'private_keys', 'private-keys', 'privateKeys',
'secret', 'secret_key', 'secret-key', 'secretKey',
'secrets', 'secret_keys', 'secret-keys', 'secretKeys',
'security_code', 'security-code', 'securityCode',
'security_codes', 'security-codes', 'securityCodes',
'auth', 'token', 'auth_token', 'auth-token', 'authToken',
'authorization', 'authorization_token', 'authorization-token', 'authorizationToken',
'authentication', 'authentication_token', 'authentication-token', 'authenticationToken',
# Credit cards, banking, and related
'credit_card', 'credit-card', 'creditCard', 'credit_card_number', 'credit-card-number', 'creditCardNumber',
'card_number', 'card-number', 'cardNumber', 'cc_number', 'cc-number', 'ccNumber', 'ccn',
'credit_cards', 'credit-cards', 'creditCards', 'credit_card_numbers', 'credit-card-numbers',
'creditCardNumbers',
'card_numbers', 'card-numbers', 'cardNumbers', 'cc_numbers', 'cc-numbers', 'ccNumbers', 'ccns',
'cid', 'csc', 'csn', 'cvc', 'cvc2', 'cvd', 'cve', 'cvn2', 'cvv', 'cvv2', 'icvv',
'cids', 'cscs', 'csns', 'cvcs', 'cvc2s', 'cvds', 'cves', 'cvn2s', 'cvvs', 'cvv2s', 'icvvs',
'card_id', 'card-id', 'cardId', 'card_identification', 'card-identification', 'cardIdentification',
'card_ids', 'card-ids', 'cardIds', 'card_identifications', 'card-identifications', 'cardIdentifications',
'card_identification_code', 'card-identification-code', 'cardIdentificationCode',
'card_identification_codes', 'card-identification-codes', 'cardIdentificationCodes',
'card_identification_number', 'card-identification-number', 'cardIdentificationNumber',
'card_identification_numbers', 'card-identification-numbers', 'cardIdentificationNumbers',
'card_identification_value', 'card-identification-value', 'cardIdentificationValue',
'card_identification_values', 'card-identification-values', 'cardIdentificationValues',
'card_security', 'card-security', 'cardSecurity',
'card_securities', 'card-securities', 'cardSecurities',
'card_security_code', 'card-security-code', 'cardSecurityCode',
'card_security_codes', 'card-security-codes', 'cardSecurityCodes',
'card_security_number', 'card-security-number', 'cardSecurityNumber',
'card_security_numbers', 'card-security-numbers', 'cardSecurityNumbers',
'card_security_value', 'card-security-value', 'cardSecurityValue',
'card_security_values', 'card-security-values', 'cardSecurityValues',
'card_validation', 'card-validation', 'cardValidation',
'card_validations', 'card-validations', 'cardValidations',
'card_validation_code', 'card-validation-code', 'cardValidationCode',
'card_validation_codes', 'card-validation-codes', 'cardValidationCodes',
'card_validation_number', 'card-validation-number', 'cardValidationNumber',
'card_validation_numbers', 'card-validation-numbers', 'cardValidationNumbers',
'card_validation_value', 'card-validation-value', 'cardValidationValue',
'card_validation_values', 'card-validation-values', 'cardValidationValues',
'card_verification', 'card-verification', 'cardVerification',
'card_verifications', 'card-verifications', 'cardVerifications',
'card_verification_code', 'card-verification-code', 'cardVerificationCode',
'card_verification_codes', 'card-verification-codes', 'cardVerificationCodes',
'card_verification_number', 'card-verification-number', 'cardVerificationNumber',
'card_verification_numbers', 'card-verification-numbers', 'cardVerificationNumbers',
'card_verification_value', 'card-verification-value', 'cardVerificationValue',
'card_verification_values', 'card-verification-values', 'cardVerificationValues',
'account_number', 'account-number', 'accountNumber',
'account_numbers', 'account-numbers', 'accountNumbers',
'bank_account', 'bank-account', 'bankAccount',
'bank_accounts', 'bank-accounts', 'bankAccounts',
'bank_account_number', 'bank-account-number', 'bankAccountNumber',
'bank_account_numbers', 'bank-account-numbers', 'bankAccountNumbers',
'pin', 'pin_code', 'pin-code', 'pinCode', 'pin_number', 'pin-number', 'pinNumber',
'pins', 'pin_codes', 'pin-codes', 'pinCodes', 'pin_numbers', 'pin-numbers', 'pinNumbers',
'personal_id_number', 'personal-id-number', 'personalIdNumber',
'personal_id_numbers', 'personal-id-numbers', 'personalIdNumbers',
'personal_identification_number', 'personal-identification-number', 'personalIdentificationNumber',
'personal_identification_numbers', 'personal-identification-numbers', 'personalIdentificationNumbers',
})
CENSOR_TYPES = cast(Tuple[Type, ...], six.string_types) + cast(Tuple[Type, ...], six.integer_types)
CENSORED_STRING = '**********'
def __init__(self, wrapped_dict): # type: (Mapping[six.text_type, Any]) -> None
"""
Wraps a dict to censor its contents. The first time `repr` is called, it copies the dict, recursively
censors sensitive fields, caches the result, and returns the censored dict repr-ed. All future calls use
the cache.
:param wrapped_dict: The `dict` that should be censored
"""
if not isinstance(wrapped_dict, dict):
raise ValueError('wrapped_dict must be a dict')
self._wrapped_dict = wrapped_dict # type: Mapping[six.text_type, Any]
self._dict_cache = None # type: Optional[Mapping[six.text_type, Any]]
self._repr_cache = None # type: Optional[six.text_type]
def _get_repr_cache(self): # type: () -> str
if not self._dict_cache:
self._dict_cache = self._copy_and_censor_dict(self._wrapped_dict)
return repr(self._dict_cache)
def __repr__(self): # type: () -> str
if not self._repr_cache:
self._repr_cache = self._get_repr_cache()
return self._repr_cache
def __str__(self): # type: () -> str
return self.__repr__()
def __bytes__(self): # type: () -> six.binary_type
# If this method is called, we must be in Python 3, which means __str__ must be returning a string
return cast(six.text_type, self.__str__()).encode('utf-8')
def __unicode__(self): # type: () -> six.text_type
# If this method is called, we must be in Python 2, which means __str__ must be returning bytes
return cast(six.binary_type, self.__str__()).decode('utf-8')
@classmethod
def _copy_and_censor_unknown_value(cls, v, should_censor_values):
# type: (_VT, bool) -> Union[_VT, six.text_type]
if isinstance(v, dict):
return cls._copy_and_censor_dict(v) # type: ignore
if isinstance(v, (list, tuple, set, frozenset)):
return cls._copy_and_censor_iterable(v, should_censor_values) # type: ignore
if should_censor_values and v and isinstance(v, cls.CENSOR_TYPES) and not isinstance(v, bool):
return cls.CENSORED_STRING
return v
@classmethod
def _copy_and_censor_dict(cls, d): # type: (Mapping[six.text_type, Any]) -> Mapping[six.text_type, Any]
# This should only be _marginally_ slower than copy.deepcopy
return {k: cls._copy_and_censor_unknown_value(v, k in cls.SENSITIVE_FIELDS) for k, v in six.iteritems(d)}
@classmethod
def _copy_and_censor_iterable(cls, i, should_censor_values):
# type: (Iterable[_VT], bool) -> Iterable[Union[_VT, six.text_type]]
return type(i)(cls._copy_and_censor_unknown_value(v, should_censor_values) for v in i) # type: ignore
IP_MTU_DISCOVER = 10 # Position of the IP Path MTU Discovery flag in request packets
IP_MTU_DISCOVER_DO = 2 # "Don't fragment" value of the IP Path MTU Discovery flag in request packets
WORST_CASE_MTU_IP = 576
MTU_ATTEMPTS = 65535, 2000, 1500, 1280
DATAGRAM_HEADER_LENGTH_IN_BYTES = 28
def _discover_minimum_mtu_to_target(address, port): # type: (six.text_type, int) -> int
s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
s.connect((address, port))
s.setsockopt(socket.IPPROTO_IP, IP_MTU_DISCOVER, IP_MTU_DISCOVER_DO)
for attempt in MTU_ATTEMPTS:
try:
s.send(b'#' * (attempt - DATAGRAM_HEADER_LENGTH_IN_BYTES))
return attempt
except socket.error as e:
if 'too long' not in e.strerror:
# We can't rely on the error code, because it varies from platform to platform, but the message always
# contains "too long" if we're getting an "MTU exceeded" ICMP response. IF this isn't an "MTU exceeded"
# response, then something went wrong, and we can't determine the MTU, so fall back to worst case.
break
# Now we try again with a lower MTU, which might still be too high
return WORST_CASE_MTU_IP
_Addr = Union[six.text_type, Tuple[six.text_type, int]]
class SyslogHandler(logging.handlers.SysLogHandler):
"""
A more advanced Syslog logging handler that attempts to understand the MTU of the underlying connection and then
tailor Syslog packets to match that MTU, either by truncating or splitting logging packets. This contrasts to the
superclass, which will simply drop packets that exceed the MTU (optionally logging an error about the failure).
Notes:
The maximum UDP packet header in bytes is 28 bytes (20 bytes IP header + 8 bytes UDP header). Note that the
optional 40-byte IP header options could make this 68 bytes, but this is rarely used, and this handler does not
use it.
The Syslog priority and facility are encoded into a single 32-bit (4-byte) value.
"""
OVERFLOW_BEHAVIOR_FRAGMENT = 0
OVERFLOW_BEHAVIOR_TRUNCATE = 1
_MINIMUM_MTU_CACHE = {} # type: Dict[six.text_type, int]
def __init__(
self,
address=('localhost', logging.handlers.SYSLOG_UDP_PORT), # type: _Addr
facility=logging.handlers.SysLogHandler.LOG_USER, # type: int
socket_type=None, # type: Optional[int]
overflow=OVERFLOW_BEHAVIOR_FRAGMENT, # type: int
):
super(SyslogHandler, self).__init__(address, facility, socket_type) # type: ignore
# This is a hack, but necessary due to https://github.com/python/typeshed/issues/3244, which is causing errors
self.address = getattr(self, 'address') # type: Tuple[six.text_type, int]
self.socket = getattr(self, 'socket') # type: socket.socket
self.socktype = getattr(self, 'socktype') # type: int
self.unixsocket = getattr(self, 'unixsocket') # type: bool
if not self.unixsocket and self.socktype == socket.SOCK_DGRAM:
if address[0] not in self._MINIMUM_MTU_CACHE:
# The MTU is unlikely to change while the process is running, and checking it is expensive
self._MINIMUM_MTU_CACHE[address[0]] = _discover_minimum_mtu_to_target(address[0], 9999)
self.maximum_length = self._MINIMUM_MTU_CACHE[address[0]] - DATAGRAM_HEADER_LENGTH_IN_BYTES
self.overflow = overflow
else:
self.maximum_length = 1048576 # Let's not send more than a megabyte to Syslog, even over TCP/Unix ... crazy
self.overflow = self.OVERFLOW_BEHAVIOR_TRUNCATE
def emit(self, record): # type: (logging.LogRecord) -> None
"""
Emits a record. The record is sent carefully, according to the following rules, to ensure that data is not
lost by exceeding the MTU of the connection.
- If the byte-encoded record length plus prefix length plus suffix length plus priority length is less than the
maximum allowed length, then a single packet is sent, containing the priority, prefix, full record, and
suffix, in that order.
- If it's greater than or equal to the maximum allowed length and the overflow behavior is set to "truncate,"
the record is cleanly truncated (being careful not to split in the middle of a multi-byte character), and
then a single packet is sent, containing the priority, prefix, truncated record, and suffix, in that order.
- If it's greater than or equal to the maximum allowed length and the overflow behavior is set to "fragment,"
the record preamble (things like file name, logger name, correlation ID, etc.) is extracted from the start
of the record to calculate a new chunk length. The remainder of the record (which should just be the true
message and any exception info) is then chunked (being careful not to split in the middle of a multi-byte
character) into lengths less than or equal to the chunk length, and then the record is sent as multiple
packets, each packet containing the priority, prefix, record preamble, message chunk, and suffix, in that
order.
"""
# noinspection PyBroadException
try:
formatted_message = self.format(record) # type: six.text_type
encoded_message = formatted_message.encode('utf-8') # type: six.binary_type
prefix = suffix = b''
ident = getattr(self, 'ident', None) # type: Optional[Union[six.text_type, six.binary_type]]
if ident:
prefix = ident.encode('utf-8') if isinstance(ident, six.text_type) else ident
if getattr(self, 'append_nul', True):
suffix = '\000'.encode('utf-8')
priority = '<{:d}>'.format(
self.encodePriority(self.facility, self.mapPriority(record.levelname)) # type: ignore
).encode('utf-8')
message_length = len(encoded_message)
message_length_limit = self.maximum_length - len(prefix) - len(suffix) - len(priority)
if message_length < message_length_limit:
parts = [priority + prefix + encoded_message + suffix] # type: List[six.binary_type]
elif self.overflow == self.OVERFLOW_BEHAVIOR_TRUNCATE:
truncated_message, _ = self._cleanly_slice_encoded_string(encoded_message, message_length_limit)
parts = [priority + prefix + truncated_message + suffix]
else:
# This can't work perfectly, but it's pretty unusual for a message to go before machine-parseable parts
# in the formatted record. So we split the record on the message part. Everything before the split
# becomes the preamble and gets repeated every packet. Everything after the split gets chunked. There's
# no reason to match on more than the first 40 characters of the message--the chances of that matching
# the wrong part of the record are astronomical.
try:
index = formatted_message.index(record.getMessage()[:40])
start_of_message, to_chunk = formatted_message[:index], formatted_message[index:]
except (TypeError, ValueError):
# We can't locate the message in the formatted record? That's unfortunate. Let's make something up.
start_of_message, to_chunk = '{} '.format(formatted_message[:30]), formatted_message[30:]
start_of_message_bytes = start_of_message.encode('utf-8')
to_chunk_bytes = to_chunk.encode('utf-8')
# 12 is the length of "... (cont'd)" in bytes
chunk_length_limit = message_length_limit - len(start_of_message_bytes) - 12
i = 1
parts = []
remaining_message = to_chunk_bytes
while remaining_message:
message_id = b''
subtractor = 0
if i > 1:
# If this is not the first message, we determine message # so that we can subtract that length
message_id = '{}'.format(i).encode('utf-8')
# 14 is the length of "(cont'd #) ..." in bytes
subtractor = 14 + len(message_id)
chunk, remaining_message = self._cleanly_slice_encoded_string(
remaining_message,
chunk_length_limit - subtractor,
)
if i > 1:
# If this is not the first message, we prepend the chunk to indicate continuation
chunk = b"(cont'd #" + message_id + b') ...' + chunk
i += 1
if remaining_message:
# If this is not the last message, we append the chunk to indicate continuation
chunk = chunk + b"... (cont'd)"
parts.append(priority + prefix + start_of_message_bytes + chunk + suffix)
self._send(parts)
except Exception:
self.handleError(record)
def _send(self, parts):
for message in parts:
if self.unixsocket:
try:
self.socket.send(message)
except OSError:
self.socket.close()
self._connect_unixsocket(self.address) # type: ignore
self.socket.send(message)
elif self.socktype == socket.SOCK_DGRAM:
self.socket.sendto(message, self.address)
else:
self.socket.sendall(message)
@staticmethod
def _cleanly_slice_encoded_string(encoded_string, length_limit):
# type: (six.binary_type, int) -> Tuple[six.binary_type, six.binary_type]
"""
Takes a byte string (a UTF-8 encoded string) and splits it into two pieces such that the first slice is no
longer than argument `length_limit`, then returns a tuple containing the first slice and remainder of the
byte string, respectively. The first slice may actually be shorter than `length_limit`, because this ensures
that the string does not get split in the middle of a multi-byte character.
This works because the first byte in a multi-byte unicode character encodes how many bytes compose that
character, so we can determine empirically if we are splitting in the middle of the character and correct for
that.
You can read more about how this works here: https://en.wikipedia.org/wiki/UTF-8#Description
:param encoded_string: The encoded string to split in two
:param length_limit: The maximum length allowed for the first slice of the string
:return: A tuple of (slice, remaining)
"""
sliced = encoded_string[:length_limit] # type: six.binary_type
remaining = encoded_string[length_limit:] # type: six.binary_type
try:
sliced.decode('utf-8')
except UnicodeDecodeError as e:
# This means that we sliced in the middle of a multi-byte character, which of course is bad. So we back up
# to the beginning of the multi-byte character and slice there, instead
sliced, remaining = sliced[:e.start], sliced[e.start:] + remaining
return sliced, remaining