fix(formats): avoid CSV autodetection for generated CSV files from Excel

Depends on translate/translate#5267 Fixes #11872
WeblateOrg · Jun 18, 2024 · 36ff3de · 36ff3de
1 parent 67b2509
commit 36ff3de
Show file tree

Hide file tree

Showing 3 changed files with 11 additions and 5 deletions.
diff --git a/pyproject.toml b/pyproject.toml
@@ -77,7 +77,7 @@ dependencies = [
   "social-auth-app-django>=5.4.0,<6.0.0",
   "social-auth-core>=4.5.0,<5.0.0",
   "tesserocr>=2.6.1,<2.8.0",
-  "translate-toolkit>=3.13.0,<3.14",
+  "translate-toolkit>=3.13.1,<3.14",
   "translation-finder>=2.16,<3.0",
   "user-agents>=2.0,<2.3",
   "weblate-language-data>=2024.3",

diff --git a/weblate/formats/external.py b/weblate/formats/external.py
@@ -20,6 +20,8 @@
 if TYPE_CHECKING:
     from collections.abc import Callable
 
+CSV_DIALECT = "unix"
+
 
 class XlsxFormat(CSVUtf8Format):
     name = gettext_lazy("Excel Open XML")
@@ -93,7 +95,7 @@ def parse_store(self, storefile):
 
         output = StringIO()
 
-        writer = csv.writer(output, dialect="unix")
+        writer = csv.writer(output, dialect=CSV_DIALECT)
 
         # value can be None or blank stringfor cells having formatting only,
         # we need to ignore such columns as that would be treated like "" fields
@@ -116,7 +118,7 @@ def parse_store(self, storefile):
         content = output.getvalue().encode("utf-8")
 
         # Load the file as CSV
-        return super().parse_store(NamedBytesIO(name, content))
+        return super().parse_store(NamedBytesIO(name, content), dialect=CSV_DIALECT)
 
     @staticmethod
     def mimetype() -> str:

diff --git a/weblate/formats/ttkit.py b/weblate/formats/ttkit.py
@@ -1586,13 +1586,17 @@ def get_content_and_filename(storefile):
                 content = handle.read()
         return content, filename
 
-    def parse_store(self, storefile):
+    def parse_store(self, storefile, *, dialect: None | str = None):
         """Parse the store."""
         content, filename = self.get_content_and_filename(storefile)
 
         # Parse file
         store = self.get_store_instance()
-        store.parse(content, sample_length=40000)
+        store.parse(
+            content,
+            sample_length=40000 if dialect is None else None,
+            dialect=dialect,
+        )
         # Did detection of headers work?
         if store.fieldnames != ["location", "source", "target"]:
             return store