diff --git a/frappe/core/doctype/data_import/test_importer.py b/frappe/core/doctype/data_import/test_importer.py index bb939bb53a3..90e22ec9f81 100644 --- a/frappe/core/doctype/data_import/test_importer.py +++ b/frappe/core/doctype/data_import/test_importer.py @@ -52,8 +52,6 @@ def test_data_import_from_file(self): def test_data_validation_semicolon_success(self): import_file = get_import_file("sample_import_file_semicolon") - self.assertIsNotNone(frappe.flags.delimiter_options) - frappe.flags.delimiter_options = ",;" data_import = self.get_importer(doctype_name, import_file, update=True) doc = data_import.get_preview_from_template().get("data", [{}]) @@ -167,7 +165,6 @@ def get_importer_semicolon(self, doctype, import_file, update=False): # deliberatly overwrite default delimiter options here, causing to fail when parsing ; data_import.delimiter_options = "," data_import.insert() - # Commit so that the first import failure does not rollback the Data Import insert. frappe.db.commit() return data_import diff --git a/frappe/core/doctype/file/file.py b/frappe/core/doctype/file/file.py index 992ff98c88e..c1400650c5c 100755 --- a/frappe/core/doctype/file/file.py +++ b/frappe/core/doctype/file/file.py @@ -515,7 +515,7 @@ def unzip(self) -> list["File"]: def exists_on_disk(self): return os.path.exists(self.get_full_path()) - def get_content(self) -> bytes: + def get_content(self, encodings=None) -> bytes | str: if self.is_folder: frappe.throw(_("Cannot get file contents of a Folder")) @@ -531,10 +531,12 @@ def get_content(self) -> bytes: self.validate_file_url() file_path = self.get_full_path() + if encodings is None: + encodings = ["utf-8-sig", "utf-8", "windows-1250", "windows-1252"] # read file with proper encoding with open(file_path, mode="rb") as f: self._content = f.read() - encodings = ["utf-8-sig", "utf-8", "windows-1250", "windows-1252"] + for encoding in encodings: try: # for plain text files diff --git a/frappe/core/doctype/file/test_file.py b/frappe/core/doctype/file/test_file.py index ecd431436f5..6c9b9f5872c 100644 --- a/frappe/core/doctype/file/test_file.py +++ b/frappe/core/doctype/file/test_file.py @@ -1,7 +1,6 @@ # Copyright (c) 2022, Frappe Technologies Pvt. Ltd. and Contributors # License: MIT. See LICENSE import base64 -import json import os import shutil import tempfile @@ -111,7 +110,7 @@ class TestBase64File(FrappeTestCase): def setUp(self): self.attached_to_doctype, self.attached_to_docname = make_test_doc() self.test_content = base64.b64encode(test_content1.encode("utf-8")) - _file: "File" = frappe.get_doc( + _file: frappe.Document = frappe.get_doc( { "doctype": "File", "file_name": "test_base64.txt", @@ -125,7 +124,7 @@ def setUp(self): self.saved_file_url = _file.file_url def test_saved_content(self): - _file = frappe.get_doc("File", {"file_url": self.saved_file_url}) + _file: frappe.Document = frappe.get_doc("File", {"file_url": self.saved_file_url}) content = _file.get_content() self.assertEqual(content, test_content1) @@ -255,6 +254,25 @@ def test_attachment_limit(self): limit_property.delete() frappe.clear_cache(doctype="ToDo") + def test_utf8_bom_content_decoding(self): + utf8_bom_content = test_content1.encode("utf-8-sig") + _file: frappe.Document = frappe.get_doc( + { + "doctype": "File", + "file_name": "utf8bom.txt", + "attached_to_doctype": self.attached_to_doctype1, + "attached_to_name": self.attached_to_docname1, + "content": utf8_bom_content, + "decode": False, + } + ) + _file.save() + saved_file = frappe.get_doc("File", _file.name) + file_content_decoded = saved_file.get_content(encodings=["utf-8"]) + self.assertEqual(file_content_decoded[0], "\ufeff") + file_content_properly_decoded = saved_file.get_content(encodings=["utf-8-sig", "utf-8"]) + self.assertEqual(file_content_properly_decoded, test_content1) + class TestFile(FrappeTestCase): def setUp(self):