-
Notifications
You must be signed in to change notification settings - Fork 1.1k
/
utils.py
2378 lines (2011 loc) · 89.5 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# Copyright 2012-2014 Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"). You
# may not use this file except in compliance with the License. A copy of
# the License is located at
#
# http://aws.amazon.com/apache2.0/
#
# or in the "license" file accompanying this file. This file is
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific
# language governing permissions and limitations under the License.
import base64
import re
import time
import logging
import datetime
import hashlib
import binascii
import functools
import weakref
import random
import os
import socket
import cgi
import warnings
import dateutil.parser
from dateutil.tz import tzutc
import botocore
import botocore.awsrequest
import botocore.httpsession
from botocore.compat import (
json, quote, zip_longest, urlsplit, urlunsplit,
six, urlparse, total_seconds, get_tzinfo_options, get_md5,
MD5_AVAILABLE
)
from botocore.vendored.six.moves.urllib.request import getproxies, proxy_bypass
from botocore.exceptions import (
InvalidExpressionError, ConfigNotFound, InvalidDNSNameError, ClientError,
MetadataRetrievalError, EndpointConnectionError, ReadTimeoutError,
ConnectionClosedError, ConnectTimeoutError, SSOTokenLoadError,
PendingAuthorizationExpiredError, UnsupportedS3ArnError,
UnsupportedS3AccesspointConfigurationError, InvalidRegionError,
UnsupportedOutpostResourceError,
UnsupportedS3ControlConfigurationError, UnsupportedS3ControlArnError,
InvalidHostLabelError, InvalidIMDSEndpointError, HTTPClientError,
UnsupportedS3ConfigurationError,
)
from urllib3.exceptions import LocationParseError
logger = logging.getLogger(__name__)
DEFAULT_METADATA_SERVICE_TIMEOUT = 1
METADATA_BASE_URL = 'http://169.254.169.254/'
METADATA_BASE_URL_IPv6 = 'http://[fe80:ec2::254%eth0]/'
# These are chars that do not need to be urlencoded.
# Based on rfc2986, section 2.3
SAFE_CHARS = '-._~'
LABEL_RE = re.compile(r'[a-z0-9][a-z0-9\-]*[a-z0-9]')
RETRYABLE_HTTP_ERRORS = (
ReadTimeoutError, EndpointConnectionError, ConnectionClosedError,
ConnectTimeoutError,
)
S3_ACCELERATE_WHITELIST = ['dualstack']
# Vendoring IPv6 validation regex patterns from urllib3
# https://github.com/urllib3/urllib3/blob/7e856c0/src/urllib3/util/url.py
IPV4_PAT = r"(?:[0-9]{1,3}\.){3}[0-9]{1,3}"
HEX_PAT = "[0-9A-Fa-f]{1,4}"
LS32_PAT = "(?:{hex}:{hex}|{ipv4})".format(hex=HEX_PAT, ipv4=IPV4_PAT)
_subs = {"hex": HEX_PAT, "ls32": LS32_PAT}
_variations = [
# 6( h16 ":" ) ls32
"(?:%(hex)s:){6}%(ls32)s",
# "::" 5( h16 ":" ) ls32
"::(?:%(hex)s:){5}%(ls32)s",
# [ h16 ] "::" 4( h16 ":" ) ls32
"(?:%(hex)s)?::(?:%(hex)s:){4}%(ls32)s",
# [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
"(?:(?:%(hex)s:)?%(hex)s)?::(?:%(hex)s:){3}%(ls32)s",
# [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
"(?:(?:%(hex)s:){0,2}%(hex)s)?::(?:%(hex)s:){2}%(ls32)s",
# [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32
"(?:(?:%(hex)s:){0,3}%(hex)s)?::%(hex)s:%(ls32)s",
# [ *4( h16 ":" ) h16 ] "::" ls32
"(?:(?:%(hex)s:){0,4}%(hex)s)?::%(ls32)s",
# [ *5( h16 ":" ) h16 ] "::" h16
"(?:(?:%(hex)s:){0,5}%(hex)s)?::%(hex)s",
# [ *6( h16 ":" ) h16 ] "::"
"(?:(?:%(hex)s:){0,6}%(hex)s)?::",
]
UNRESERVED_PAT = r"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789._!\-~"
IPV6_PAT = "(?:" + "|".join([x % _subs for x in _variations]) + ")"
ZONE_ID_PAT = "(?:%25|%)(?:[" + UNRESERVED_PAT + "]|%[a-fA-F0-9]{2})+"
IPV6_ADDRZ_PAT = r"\[" + IPV6_PAT + r"(?:" + ZONE_ID_PAT + r")?\]"
IPV6_ADDRZ_RE = re.compile("^" + IPV6_ADDRZ_PAT + "$")
def ensure_boolean(val):
"""Ensures a boolean value if a string or boolean is provided
For strings, the value for True/False is case insensitive
"""
if isinstance(val, bool):
return val
else:
return val.lower() == 'true'
def is_json_value_header(shape):
"""Determines if the provided shape is the special header type jsonvalue.
:type shape: botocore.shape
:param shape: Shape to be inspected for the jsonvalue trait.
:return: True if this type is a jsonvalue, False otherwise
:rtype: Bool
"""
return (hasattr(shape, 'serialization') and
shape.serialization.get('jsonvalue', False) and
shape.serialization.get('location') == 'header' and
shape.type_name == 'string')
def get_service_module_name(service_model):
"""Returns the module name for a service
This is the value used in both the documentation and client class name
"""
name = service_model.metadata.get(
'serviceAbbreviation',
service_model.metadata.get(
'serviceFullName', service_model.service_name))
name = name.replace('Amazon', '')
name = name.replace('AWS', '')
name = re.sub(r'\W+', '', name)
return name
def normalize_url_path(path):
if not path:
return '/'
return remove_dot_segments(path)
def normalize_boolean(val):
"""Returns None if val is None, otherwise ensure value
converted to boolean"""
if val is None:
return val
else:
return ensure_boolean(val)
def remove_dot_segments(url):
# RFC 3986, section 5.2.4 "Remove Dot Segments"
# Also, AWS services require consecutive slashes to be removed,
# so that's done here as well
if not url:
return ''
input_url = url.split('/')
output_list = []
for x in input_url:
if x and x != '.':
if x == '..':
if output_list:
output_list.pop()
else:
output_list.append(x)
if url[0] == '/':
first = '/'
else:
first = ''
if url[-1] == '/' and output_list:
last = '/'
else:
last = ''
return first + '/'.join(output_list) + last
def validate_jmespath_for_set(expression):
# Validates a limited jmespath expression to determine if we can set a
# value based on it. Only works with dotted paths.
if not expression or expression == '.':
raise InvalidExpressionError(expression=expression)
for invalid in ['[', ']', '*']:
if invalid in expression:
raise InvalidExpressionError(expression=expression)
def set_value_from_jmespath(source, expression, value, is_first=True):
# This takes a (limited) jmespath-like expression & can set a value based
# on it.
# Limitations:
# * Only handles dotted lookups
# * No offsets/wildcards/slices/etc.
if is_first:
validate_jmespath_for_set(expression)
bits = expression.split('.', 1)
current_key, remainder = bits[0], bits[1] if len(bits) > 1 else ''
if not current_key:
raise InvalidExpressionError(expression=expression)
if remainder:
if current_key not in source:
# We've got something in the expression that's not present in the
# source (new key). If there's any more bits, we'll set the key
# with an empty dictionary.
source[current_key] = {}
return set_value_from_jmespath(
source[current_key],
remainder,
value,
is_first=False
)
# If we're down to a single key, set it.
source[current_key] = value
class _RetriesExceededError(Exception):
"""Internal exception used when the number of retries are exceeded."""
pass
class BadIMDSRequestError(Exception):
def __init__(self, request):
self.request = request
class IMDSFetcher(object):
_RETRIES_EXCEEDED_ERROR_CLS = _RetriesExceededError
_TOKEN_PATH = 'latest/api/token'
_TOKEN_TTL = '21600'
def __init__(self, timeout=DEFAULT_METADATA_SERVICE_TIMEOUT,
num_attempts=1, base_url=METADATA_BASE_URL,
env=None, user_agent=None, config=None):
self._timeout = timeout
self._num_attempts = num_attempts
self._base_url = self._select_base_url(base_url, config)
if env is None:
env = os.environ.copy()
self._disabled = env.get('AWS_EC2_METADATA_DISABLED', 'false').lower()
self._disabled = self._disabled == 'true'
self._user_agent = user_agent
self._session = botocore.httpsession.URLLib3Session(
timeout=self._timeout,
proxies=get_environ_proxies(self._base_url),
)
def get_base_url(self):
return self._base_url
def _select_base_url(self, base_url, config):
if config is None:
config = {}
requires_ipv6 = ensure_boolean(config.get('imds_use_ipv6', False))
custom_metadata_endpoint = config.get('ec2_metadata_service_endpoint')
if requires_ipv6 and custom_metadata_endpoint:
logger.warn("Custom endpoint and IMDS_USE_IPV6 are both set. Using custom endpoint.")
chosen_base_url = None
if base_url != METADATA_BASE_URL:
chosen_base_url = base_url
elif custom_metadata_endpoint:
chosen_base_url = custom_metadata_endpoint
elif requires_ipv6:
chosen_base_url = METADATA_BASE_URL_IPv6
else:
chosen_base_url = METADATA_BASE_URL
logger.debug("IMDS ENDPOINT: %s" % chosen_base_url)
if not is_valid_uri(chosen_base_url):
raise InvalidIMDSEndpointError(endpoint=chosen_base_url)
return chosen_base_url
def _fetch_metadata_token(self):
self._assert_enabled()
url = self._base_url + self._TOKEN_PATH
headers = {
'x-aws-ec2-metadata-token-ttl-seconds': self._TOKEN_TTL,
}
self._add_user_agent(headers)
request = botocore.awsrequest.AWSRequest(
method='PUT', url=url, headers=headers)
for i in range(self._num_attempts):
try:
response = self._session.send(request.prepare())
if response.status_code == 200:
return response.text
elif response.status_code in (404, 403, 405):
return None
elif response.status_code in (400,):
raise BadIMDSRequestError(request)
except ReadTimeoutError:
return None
except RETRYABLE_HTTP_ERRORS as e:
logger.debug(
"Caught retryable HTTP exception while making metadata "
"service request to %s: %s", url, e, exc_info=True)
except HTTPClientError as e:
if isinstance(e.kwargs.get('error'), LocationParseError):
raise InvalidIMDSEndpointError(endpoint=url, error=e)
else:
raise
return None
def _get_request(self, url_path, retry_func, token=None):
"""Make a get request to the Instance Metadata Service.
:type url_path: str
:param url_path: The path component of the URL to make a get request.
This arg is appended to the base_url that was provided in the
initializer.
:type retry_func: callable
:param retry_func: A function that takes the response as an argument
and determines if it needs to retry. By default empty and non
200 OK responses are retried.
:type token: str
:param token: Metadata token to send along with GET requests to IMDS.
"""
self._assert_enabled()
if retry_func is None:
retry_func = self._default_retry
url = self._base_url + url_path
headers = {}
if token is not None:
headers['x-aws-ec2-metadata-token'] = token
self._add_user_agent(headers)
for i in range(self._num_attempts):
try:
request = botocore.awsrequest.AWSRequest(
method='GET', url=url, headers=headers)
response = self._session.send(request.prepare())
if not retry_func(response):
return response
except RETRYABLE_HTTP_ERRORS as e:
logger.debug(
"Caught retryable HTTP exception while making metadata "
"service request to %s: %s", url, e, exc_info=True)
raise self._RETRIES_EXCEEDED_ERROR_CLS()
def _add_user_agent(self, headers):
if self._user_agent is not None:
headers['User-Agent'] = self._user_agent
def _assert_enabled(self):
if self._disabled:
logger.debug("Access to EC2 metadata has been disabled.")
raise self._RETRIES_EXCEEDED_ERROR_CLS()
def _default_retry(self, response):
return (
self._is_non_ok_response(response) or
self._is_empty(response)
)
def _is_non_ok_response(self, response):
if response.status_code != 200:
self._log_imds_response(response, 'non-200', log_body=True)
return True
return False
def _is_empty(self, response):
if not response.content:
self._log_imds_response(response, 'no body', log_body=True)
return True
return False
def _log_imds_response(self, response, reason_to_log, log_body=False):
statement = (
"Metadata service returned %s response "
"with status code of %s for url: %s"
)
logger_args = [
reason_to_log, response.status_code, response.url
]
if log_body:
statement += ", content body: %s"
logger_args.append(response.content)
logger.debug(statement, *logger_args)
class InstanceMetadataFetcher(IMDSFetcher):
_URL_PATH = 'latest/meta-data/iam/security-credentials/'
_REQUIRED_CREDENTIAL_FIELDS = [
'AccessKeyId', 'SecretAccessKey', 'Token', 'Expiration'
]
def retrieve_iam_role_credentials(self):
try:
token = self._fetch_metadata_token()
role_name = self._get_iam_role(token)
credentials = self._get_credentials(role_name, token)
if self._contains_all_credential_fields(credentials):
return {
'role_name': role_name,
'access_key': credentials['AccessKeyId'],
'secret_key': credentials['SecretAccessKey'],
'token': credentials['Token'],
'expiry_time': credentials['Expiration'],
}
else:
# IMDS can return a 200 response that has a JSON formatted
# error message (i.e. if ec2 is not trusted entity for the
# attached role). We do not necessarily want to retry for
# these and we also do not necessarily want to raise a key
# error. So at least log the problematic response and return
# an empty dictionary to signal that it was not able to
# retrieve credentials. These error will contain both a
# Code and Message key.
if 'Code' in credentials and 'Message' in credentials:
logger.debug('Error response received when retrieving'
'credentials: %s.', credentials)
return {}
except self._RETRIES_EXCEEDED_ERROR_CLS:
logger.debug("Max number of attempts exceeded (%s) when "
"attempting to retrieve data from metadata service.",
self._num_attempts)
except BadIMDSRequestError as e:
logger.debug("Bad IMDS request: %s", e.request)
return {}
def _get_iam_role(self, token=None):
return self._get_request(
url_path=self._URL_PATH,
retry_func=self._needs_retry_for_role_name,
token=token,
).text
def _get_credentials(self, role_name, token=None):
r = self._get_request(
url_path=self._URL_PATH + role_name,
retry_func=self._needs_retry_for_credentials,
token=token,
)
return json.loads(r.text)
def _is_invalid_json(self, response):
try:
json.loads(response.text)
return False
except ValueError:
self._log_imds_response(response, 'invalid json')
return True
def _needs_retry_for_role_name(self, response):
return (
self._is_non_ok_response(response) or
self._is_empty(response)
)
def _needs_retry_for_credentials(self, response):
return (
self._is_non_ok_response(response) or
self._is_empty(response) or
self._is_invalid_json(response)
)
def _contains_all_credential_fields(self, credentials):
for field in self._REQUIRED_CREDENTIAL_FIELDS:
if field not in credentials:
logger.debug(
'Retrieved credentials is missing required field: %s',
field)
return False
return True
def merge_dicts(dict1, dict2, append_lists=False):
"""Given two dict, merge the second dict into the first.
The dicts can have arbitrary nesting.
:param append_lists: If true, instead of clobbering a list with the new
value, append all of the new values onto the original list.
"""
for key in dict2:
if isinstance(dict2[key], dict):
if key in dict1 and key in dict2:
merge_dicts(dict1[key], dict2[key])
else:
dict1[key] = dict2[key]
# If the value is a list and the ``append_lists`` flag is set,
# append the new values onto the original list
elif isinstance(dict2[key], list) and append_lists:
# The value in dict1 must be a list in order to append new
# values onto it.
if key in dict1 and isinstance(dict1[key], list):
dict1[key].extend(dict2[key])
else:
dict1[key] = dict2[key]
else:
# At scalar types, we iterate and merge the
# current dict that we're on.
dict1[key] = dict2[key]
def lowercase_dict(original):
"""Copies the given dictionary ensuring all keys are lowercase strings. """
copy = {}
for key in original:
copy[key.lower()] = original[key]
return copy
def parse_key_val_file(filename, _open=open):
try:
with _open(filename) as f:
contents = f.read()
return parse_key_val_file_contents(contents)
except OSError:
raise ConfigNotFound(path=filename)
def parse_key_val_file_contents(contents):
# This was originally extracted from the EC2 credential provider, which was
# fairly lenient in its parsing. We only try to parse key/val pairs if
# there's a '=' in the line.
final = {}
for line in contents.splitlines():
if '=' not in line:
continue
key, val = line.split('=', 1)
key = key.strip()
val = val.strip()
final[key] = val
return final
def percent_encode_sequence(mapping, safe=SAFE_CHARS):
"""Urlencode a dict or list into a string.
This is similar to urllib.urlencode except that:
* It uses quote, and not quote_plus
* It has a default list of safe chars that don't need
to be encoded, which matches what AWS services expect.
If any value in the input ``mapping`` is a list type,
then each list element wil be serialized. This is the equivalent
to ``urlencode``'s ``doseq=True`` argument.
This function should be preferred over the stdlib
``urlencode()`` function.
:param mapping: Either a dict to urlencode or a list of
``(key, value)`` pairs.
"""
encoded_pairs = []
if hasattr(mapping, 'items'):
pairs = mapping.items()
else:
pairs = mapping
for key, value in pairs:
if isinstance(value, list):
for element in value:
encoded_pairs.append('%s=%s' % (percent_encode(key),
percent_encode(element)))
else:
encoded_pairs.append('%s=%s' % (percent_encode(key),
percent_encode(value)))
return '&'.join(encoded_pairs)
def percent_encode(input_str, safe=SAFE_CHARS):
"""Urlencodes a string.
Whereas percent_encode_sequence handles taking a dict/sequence and
producing a percent encoded string, this function deals only with
taking a string (not a dict/sequence) and percent encoding it.
If given the binary type, will simply URL encode it. If given the
text type, will produce the binary type by UTF-8 encoding the
text. If given something else, will convert it to the text type
first.
"""
# If its not a binary or text string, make it a text string.
if not isinstance(input_str, (six.binary_type, six.text_type)):
input_str = six.text_type(input_str)
# If it's not bytes, make it bytes by UTF-8 encoding it.
if not isinstance(input_str, six.binary_type):
input_str = input_str.encode('utf-8')
return quote(input_str, safe=safe)
def _parse_timestamp_with_tzinfo(value, tzinfo):
"""Parse timestamp with pluggable tzinfo options."""
if isinstance(value, (int, float)):
# Possibly an epoch time.
return datetime.datetime.fromtimestamp(value, tzinfo())
else:
try:
return datetime.datetime.fromtimestamp(float(value), tzinfo())
except (TypeError, ValueError):
pass
try:
# In certain cases, a timestamp marked with GMT can be parsed into a
# different time zone, so here we provide a context which will
# enforce that GMT == UTC.
return dateutil.parser.parse(value, tzinfos={'GMT': tzutc()})
except (TypeError, ValueError) as e:
raise ValueError('Invalid timestamp "%s": %s' % (value, e))
def parse_timestamp(value):
"""Parse a timestamp into a datetime object.
Supported formats:
* iso8601
* rfc822
* epoch (value is an integer)
This will return a ``datetime.datetime`` object.
"""
for tzinfo in get_tzinfo_options():
try:
return _parse_timestamp_with_tzinfo(value, tzinfo)
except OSError as e:
logger.debug('Unable to parse timestamp with "%s" timezone info.',
tzinfo.__name__, exc_info=e)
raise RuntimeError('Unable to calculate correct timezone offset for '
'"%s"' % value)
def parse_to_aware_datetime(value):
"""Converted the passed in value to a datetime object with tzinfo.
This function can be used to normalize all timestamp inputs. This
function accepts a number of different types of inputs, but
will always return a datetime.datetime object with time zone
information.
The input param ``value`` can be one of several types:
* A datetime object (both naive and aware)
* An integer representing the epoch time (can also be a string
of the integer, i.e '0', instead of 0). The epoch time is
considered to be UTC.
* An iso8601 formatted timestamp. This does not need to be
a complete timestamp, it can contain just the date portion
without the time component.
The returned value will be a datetime object that will have tzinfo.
If no timezone info was provided in the input value, then UTC is
assumed, not local time.
"""
# This is a general purpose method that handles several cases of
# converting the provided value to a string timestamp suitable to be
# serialized to an http request. It can handle:
# 1) A datetime.datetime object.
if isinstance(value, datetime.datetime):
datetime_obj = value
else:
# 2) A string object that's formatted as a timestamp.
# We document this as being an iso8601 timestamp, although
# parse_timestamp is a bit more flexible.
datetime_obj = parse_timestamp(value)
if datetime_obj.tzinfo is None:
# I think a case would be made that if no time zone is provided,
# we should use the local time. However, to restore backwards
# compat, the previous behavior was to assume UTC, which is
# what we're going to do here.
datetime_obj = datetime_obj.replace(tzinfo=tzutc())
else:
datetime_obj = datetime_obj.astimezone(tzutc())
return datetime_obj
def datetime2timestamp(dt, default_timezone=None):
"""Calculate the timestamp based on the given datetime instance.
:type dt: datetime
:param dt: A datetime object to be converted into timestamp
:type default_timezone: tzinfo
:param default_timezone: If it is provided as None, we treat it as tzutc().
But it is only used when dt is a naive datetime.
:returns: The timestamp
"""
epoch = datetime.datetime(1970, 1, 1)
if dt.tzinfo is None:
if default_timezone is None:
default_timezone = tzutc()
dt = dt.replace(tzinfo=default_timezone)
d = dt.replace(tzinfo=None) - dt.utcoffset() - epoch
if hasattr(d, "total_seconds"):
return d.total_seconds() # Works in Python 2.7+
return (d.microseconds + (d.seconds + d.days * 24 * 3600) * 10**6) / 10**6
def calculate_sha256(body, as_hex=False):
"""Calculate a sha256 checksum.
This method will calculate the sha256 checksum of a file like
object. Note that this method will iterate through the entire
file contents. The caller is responsible for ensuring the proper
starting position of the file and ``seek()``'ing the file back
to its starting location if other consumers need to read from
the file like object.
:param body: Any file like object. The file must be opened
in binary mode such that a ``.read()`` call returns bytes.
:param as_hex: If True, then the hex digest is returned.
If False, then the digest (as binary bytes) is returned.
:returns: The sha256 checksum
"""
checksum = hashlib.sha256()
for chunk in iter(lambda: body.read(1024 * 1024), b''):
checksum.update(chunk)
if as_hex:
return checksum.hexdigest()
else:
return checksum.digest()
def calculate_tree_hash(body):
"""Calculate a tree hash checksum.
For more information see:
http://docs.aws.amazon.com/amazonglacier/latest/dev/checksum-calculations.html
:param body: Any file like object. This has the same constraints as
the ``body`` param in calculate_sha256
:rtype: str
:returns: The hex version of the calculated tree hash
"""
chunks = []
required_chunk_size = 1024 * 1024
sha256 = hashlib.sha256
for chunk in iter(lambda: body.read(required_chunk_size), b''):
chunks.append(sha256(chunk).digest())
if not chunks:
return sha256(b'').hexdigest()
while len(chunks) > 1:
new_chunks = []
for first, second in _in_pairs(chunks):
if second is not None:
new_chunks.append(sha256(first + second).digest())
else:
# We're at the end of the list and there's no pair left.
new_chunks.append(first)
chunks = new_chunks
return binascii.hexlify(chunks[0]).decode('ascii')
def _in_pairs(iterable):
# Creates iterator that iterates over the list in pairs:
# for a, b in _in_pairs([0, 1, 2, 3, 4]):
# print(a, b)
#
# will print:
# 0, 1
# 2, 3
# 4, None
shared_iter = iter(iterable)
# Note that zip_longest is a compat import that uses
# the itertools izip_longest. This creates an iterator,
# this call below does _not_ immediately create the list
# of pairs.
return zip_longest(shared_iter, shared_iter)
class CachedProperty(object):
"""A read only property that caches the initially computed value.
This descriptor will only call the provided ``fget`` function once.
Subsequent access to this property will return the cached value.
"""
def __init__(self, fget):
self._fget = fget
def __get__(self, obj, cls):
if obj is None:
return self
else:
computed_value = self._fget(obj)
obj.__dict__[self._fget.__name__] = computed_value
return computed_value
class ArgumentGenerator(object):
"""Generate sample input based on a shape model.
This class contains a ``generate_skeleton`` method that will take
an input/output shape (created from ``botocore.model``) and generate
a sample dictionary corresponding to the input/output shape.
The specific values used are place holder values. For strings either an
empty string or the member name can be used, for numbers 0 or 0.0 is used.
The intended usage of this class is to generate the *shape* of the input
structure.
This can be useful for operations that have complex input shapes.
This allows a user to just fill in the necessary data instead of
worrying about the specific structure of the input arguments.
Example usage::
s = botocore.session.get_session()
ddb = s.get_service_model('dynamodb')
arg_gen = ArgumentGenerator()
sample_input = arg_gen.generate_skeleton(
ddb.operation_model('CreateTable').input_shape)
print("Sample input for dynamodb.CreateTable: %s" % sample_input)
"""
def __init__(self, use_member_names=False):
self._use_member_names = use_member_names
def generate_skeleton(self, shape):
"""Generate a sample input.
:type shape: ``botocore.model.Shape``
:param shape: The input shape.
:return: The generated skeleton input corresponding to the
provided input shape.
"""
stack = []
return self._generate_skeleton(shape, stack)
def _generate_skeleton(self, shape, stack, name=''):
stack.append(shape.name)
try:
if shape.type_name == 'structure':
return self._generate_type_structure(shape, stack)
elif shape.type_name == 'list':
return self._generate_type_list(shape, stack)
elif shape.type_name == 'map':
return self._generate_type_map(shape, stack)
elif shape.type_name == 'string':
if self._use_member_names:
return name
if shape.enum:
return random.choice(shape.enum)
return ''
elif shape.type_name in ['integer', 'long']:
return 0
elif shape.type_name in ['float', 'double']:
return 0.0
elif shape.type_name == 'boolean':
return True
elif shape.type_name == 'timestamp':
return datetime.datetime(1970, 1, 1, 0, 0, 0)
finally:
stack.pop()
def _generate_type_structure(self, shape, stack):
skeleton = {}
if stack.count(shape.name) > 1:
return skeleton
for member_name, member_shape in shape.members.items():
skeleton[member_name] = self._generate_skeleton(
member_shape, stack, name=member_name)
return skeleton
def _generate_type_list(self, shape, stack):
# For list elements we've arbitrarily decided to
# return two elements for the skeleton list.
name = ''
if self._use_member_names:
name = shape.member.name
return [
self._generate_skeleton(shape.member, stack, name),
]
def _generate_type_map(self, shape, stack):
key_shape = shape.key
value_shape = shape.value
assert key_shape.type_name == 'string'
return {'KeyName': self._generate_skeleton(value_shape, stack)}
def is_valid_ipv6_endpoint_url(endpoint_url):
netloc = urlparse(endpoint_url).netloc
return IPV6_ADDRZ_RE.match(netloc) is not None
def is_valid_endpoint_url(endpoint_url):
"""Verify the endpoint_url is valid.
:type endpoint_url: string
:param endpoint_url: An endpoint_url. Must have at least a scheme
and a hostname.
:return: True if the endpoint url is valid. False otherwise.
"""
parts = urlsplit(endpoint_url)
hostname = parts.hostname
if hostname is None:
return False
if len(hostname) > 255:
return False
if hostname[-1] == ".":
hostname = hostname[:-1]
allowed = re.compile(
r"^((?!-)[A-Z\d-]{1,63}(?<!-)\.)*((?!-)[A-Z\d-]{1,63}(?<!-))$",
re.IGNORECASE)
return allowed.match(hostname)
def is_valid_uri(endpoint_url):
return is_valid_endpoint_url(endpoint_url) or is_valid_ipv6_endpoint_url(endpoint_url)
def validate_region_name(region_name):
"""Provided region_name must be a valid host label."""
if region_name is None:
return
valid_host_label = re.compile(r'^(?![0-9]+$)(?!-)[a-zA-Z0-9-]{,63}(?<!-)$')
valid = valid_host_label.match(region_name)
if not valid:
raise InvalidRegionError(region_name=region_name)
def check_dns_name(bucket_name):
"""
Check to see if the ``bucket_name`` complies with the
restricted DNS naming conventions necessary to allow
access via virtual-hosting style.
Even though "." characters are perfectly valid in this DNS
naming scheme, we are going to punt on any name containing a
"." character because these will cause SSL cert validation
problems if we try to use virtual-hosting style addressing.
"""
if '.' in bucket_name:
return False
n = len(bucket_name)
if n < 3 or n > 63:
# Wrong length
return False
match = LABEL_RE.match(bucket_name)
if match is None or match.end() != len(bucket_name):
return False
return True
def fix_s3_host(request, signature_version, region_name,
default_endpoint_url=None, **kwargs):
"""
This handler looks at S3 requests just before they are signed.
If there is a bucket name on the path (true for everything except
ListAllBuckets) it checks to see if that bucket name conforms to
the DNS naming conventions. If it does, it alters the request to
use ``virtual hosting`` style addressing rather than ``path-style``
addressing.
"""
if request.context.get('use_global_endpoint', False):
default_endpoint_url = 's3.amazonaws.com'
try:
switch_to_virtual_host_style(
request, signature_version, default_endpoint_url)
except InvalidDNSNameError as e:
bucket_name = e.kwargs['bucket_name']
logger.debug('Not changing URI, bucket is not DNS compatible: %s',
bucket_name)
def switch_to_virtual_host_style(request, signature_version,
default_endpoint_url=None, **kwargs):
"""
This is a handler to force virtual host style s3 addressing no matter
the signature version (which is taken in consideration for the default
case). If the bucket is not DNS compatible an InvalidDNSName is thrown.
:param request: A AWSRequest object that is about to be sent.
:param signature_version: The signature version to sign with
:param default_endpoint_url: The endpoint to use when switching to a
virtual style. If None is supplied, the virtual host will be
constructed from the url of the request.
"""
if request.auth_path is not None: