diff --git a/CHANGELOG.md b/CHANGELOG.md index 1689fb04..849f6170 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,10 @@ Changelog - option to add event handlers which accept no arguments +### Fixed + +- started enforcing local storage to always use the UTF-8 encoding + [1.0.0](../../releases/tag/v1.0.0) - 2022-03-13 ----------------------------------------------- diff --git a/docs/res/format_docs.py b/docs/res/format_docs.py index 3e6ef4ea..6eac0e9d 100755 --- a/docs/res/format_docs.py +++ b/docs/res/format_docs.py @@ -36,7 +36,7 @@ subs.append((fr'`({custom_type})\.([A-Z_]+)`', lambda match: f'[{match.group(0)}](#{match.group(1).lower()}-{match.group(2).lower()})')) # Load the api_reference.md generated by Sphinx -with open('api_reference.md', 'r+') as api_reference: +with open('api_reference.md', 'r+', encoding='utf-8') as api_reference: api_reference_content = api_reference.read() # Do the above defined replacements diff --git a/scripts/check_version_in_changelog.py b/scripts/check_version_in_changelog.py index 10a104a8..f72bee87 100644 --- a/scripts/check_version_in_changelog.py +++ b/scripts/check_version_in_changelog.py @@ -13,7 +13,7 @@ if not CHANGELOG_PATH.is_file(): raise RuntimeError('Unable to find CHANGELOG.md file') - with open(CHANGELOG_PATH) as changelog_file: + with open(CHANGELOG_PATH, encoding='utf-8') as changelog_file: for line in changelog_file: # The heading for the changelog entry for the given version can start with either the version number, or the version number in a link if re.match(fr'\[?{current_package_version}([\] ]|$)', line): diff --git a/scripts/utils.py b/scripts/utils.py index 2492fa6c..edcf1f60 100644 --- a/scripts/utils.py +++ b/scripts/utils.py @@ -8,7 +8,7 @@ # Load the current version number from src/package_name/_version.py # It is on a line in the format __version__ = 1.2.3 def get_current_package_version() -> str: - with open(VERSION_FILE_PATH, 'r') as version_file: + with open(VERSION_FILE_PATH, 'r', encoding='utf-8') as version_file: for line in version_file: if line.startswith('__version__'): delim = '"' if '"' in line else "'" @@ -21,7 +21,7 @@ def get_current_package_version() -> str: # Write the given version number from src/package_name/_version.py # It replaces the version number on the line with the format __version__ = 1.2.3 def set_current_package_version(version: str) -> None: - with open(VERSION_FILE_PATH, 'r+') as version_file: + with open(VERSION_FILE_PATH, 'r+', encoding='utf-8') as version_file: updated_version_file_lines = [] version_string_found = False for line in version_file: diff --git a/setup.py b/setup.py index 1e2ac3a4..7c7271f1 100644 --- a/setup.py +++ b/setup.py @@ -76,6 +76,7 @@ 'flake8-comprehensions ~= 3.10.1', 'flake8-datetimez ~= 20.10.0', 'flake8-docstrings ~= 1.7.0', + 'flake8-encodings ~= 0.5.0', 'flake8-isort ~= 6.0.0', 'flake8-noqa ~= 1.3.0', 'flake8-pytest-style ~= 1.7.2', diff --git a/src/apify/_memory_storage/resource_clients/base_resource_client.py b/src/apify/_memory_storage/resource_clients/base_resource_client.py index 5704890f..f3943fae 100644 --- a/src/apify/_memory_storage/resource_clients/base_resource_client.py +++ b/src/apify/_memory_storage/resource_clients/base_resource_client.py @@ -101,7 +101,7 @@ def _find_or_create_client_by_id_or_name( metadata_path = os.path.join(entry.path, '__metadata__.json') if not os.access(metadata_path, os.F_OK): continue - with open(metadata_path) as metadata_file: + with open(metadata_path, encoding='utf-8') as metadata_file: metadata = json.load(metadata_file) if id and id == metadata.get('id'): storage_path = entry.path diff --git a/src/apify/_memory_storage/resource_clients/dataset.py b/src/apify/_memory_storage/resource_clients/dataset.py index 37a10b1e..8d480ce5 100644 --- a/src/apify/_memory_storage/resource_clients/dataset.py +++ b/src/apify/_memory_storage/resource_clients/dataset.py @@ -412,7 +412,7 @@ def _create_from_directory( has_seen_metadata_file = True # We have found the dataset's metadata file, build out information based on it - with open(os.path.join(storage_directory, entry.name)) as f: + with open(os.path.join(storage_directory, entry.name), encoding='utf-8') as f: metadata = json.load(f) id = metadata['id'] name = metadata['name'] @@ -423,7 +423,7 @@ def _create_from_directory( continue - with open(os.path.join(storage_directory, entry.name)) as f: + with open(os.path.join(storage_directory, entry.name), encoding='utf-8') as f: entry_content = json.load(f) entry_name = entry.name.split('.')[0] diff --git a/src/apify/_memory_storage/resource_clients/key_value_store.py b/src/apify/_memory_storage/resource_clients/key_value_store.py index 3ca5e2ff..bf9d19b0 100644 --- a/src/apify/_memory_storage/resource_clients/key_value_store.py +++ b/src/apify/_memory_storage/resource_clients/key_value_store.py @@ -383,7 +383,7 @@ def _create_from_directory( if entry.is_file(): if entry.name == '__metadata__.json': # We have found the store metadata file, build out information based on it - with open(os.path.join(storage_directory, entry.name), encoding='utf8') as f: + with open(os.path.join(storage_directory, entry.name), encoding='utf-8') as f: metadata = json.load(f) id = metadata['id'] name = metadata['name'] @@ -395,7 +395,7 @@ def _create_from_directory( if '.__metadata__.' in entry.name: # This is an entry's metadata file, we can use it to create/extend the record - with open(os.path.join(storage_directory, entry.name), encoding='utf8') as f: + with open(os.path.join(storage_directory, entry.name), encoding='utf-8') as f: metadata = json.load(f) new_record = { @@ -429,7 +429,7 @@ def _create_from_directory( elif 'application/json' in content_type: try: # Try parsing the JSON ahead of time (not ideal but solves invalid files being loaded into stores) - json.loads(file_content) + json.loads(file_content.decode('utf-8')) except json.JSONDecodeError: # We need to override and then restore the warnings filter so that the warning gets printed out, # Otherwise it would be silently swallowed diff --git a/src/apify/_memory_storage/resource_clients/request_queue.py b/src/apify/_memory_storage/resource_clients/request_queue.py index 5b6963d1..ec20f47d 100644 --- a/src/apify/_memory_storage/resource_clients/request_queue.py +++ b/src/apify/_memory_storage/resource_clients/request_queue.py @@ -423,7 +423,7 @@ def _create_from_directory( if entry.is_file(): if entry.name == '__metadata__.json': # We have found the queue's metadata file, build out information based on it - with open(os.path.join(storage_directory, entry.name)) as f: + with open(os.path.join(storage_directory, entry.name), encoding='utf-8') as f: metadata = json.load(f) id = metadata['id'] name = metadata['name'] @@ -435,7 +435,7 @@ def _create_from_directory( continue - with open(os.path.join(storage_directory, entry.name)) as f: + with open(os.path.join(storage_directory, entry.name), encoding='utf-8') as f: request = json.load(f) if request.get('orderNo'): request['orderNo'] = Decimal(request.get('orderNo')) diff --git a/src/apify/_utils.py b/src/apify/_utils.py index ad8bf2ed..2aa00e71 100644 --- a/src/apify/_utils.py +++ b/src/apify/_utils.py @@ -323,7 +323,7 @@ def _is_file_or_bytes(value: Any) -> bool: def _maybe_parse_body(body: bytes, content_type: str) -> Any: if _is_content_type_json(content_type): - return json.loads(body) # Returns any + return json.loads(body.decode('utf-8')) # Returns any elif _is_content_type_xml(content_type) or _is_content_type_text(content_type): return body.decode('utf-8') return body diff --git a/tests/unit/memory_storage/resource_clients/test_key_value_store.py b/tests/unit/memory_storage/resource_clients/test_key_value_store.py index da721ad9..c163f28c 100644 --- a/tests/unit/memory_storage/resource_clients/test_key_value_store.py +++ b/tests/unit/memory_storage/resource_clients/test_key_value_store.py @@ -161,7 +161,7 @@ async def test_get_and_set_record(tmp_path: Path, key_value_store_client: KeyVal assert bytes_record_info['value'].decode('utf-8') == bytes_value.decode('utf-8') # Test using file descriptor - with open(os.path.join(tmp_path, 'test.json'), 'w+') as f: + with open(os.path.join(tmp_path, 'test.json'), 'w+', encoding='utf-8') as f: f.write('Test') with pytest.raises(NotImplementedError, match='File-like values are not supported in local memory storage'): await key_value_store_client.set_record('file', f) diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py index cf4764d9..bd18c2ad 100644 --- a/tests/unit/test_utils.py +++ b/tests/unit/test_utils.py @@ -253,7 +253,7 @@ async def test__force_remove(tmp_path: Path) -> None: assert os.path.exists(test_file_path) is False # Removes the file if it exists - open(test_file_path, 'a').close() + open(test_file_path, 'a', encoding='utf-8').close() assert os.path.exists(test_file_path) is True await _force_remove(test_file_path) assert os.path.exists(test_file_path) is False @@ -323,10 +323,10 @@ async def test__force_rename(tmp_path: Path) -> None: # Will remove dst_dir if it exists (also covers normal case) # Create the src_dir with a file in it await mkdir(src_dir) - open(src_file, 'a').close() + open(src_file, 'a', encoding='utf-8').close() # Create the dst_dir with a file in it await mkdir(dst_dir) - open(dst_file, 'a').close() + open(dst_file, 'a', encoding='utf-8').close() assert os.path.exists(src_file) is True assert os.path.exists(dst_file) is True await _force_rename(src_dir, dst_dir)