Skip to content

Commit

Permalink
fix(formats): avoid CSV autodetection for generated CSV files from Excel
Browse files Browse the repository at this point in the history
  • Loading branch information
nijel committed Jun 18, 2024
1 parent 67b2509 commit 36ff3de
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 5 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ dependencies = [
"social-auth-app-django>=5.4.0,<6.0.0",
"social-auth-core>=4.5.0,<5.0.0",
"tesserocr>=2.6.1,<2.8.0",
"translate-toolkit>=3.13.0,<3.14",
"translate-toolkit>=3.13.1,<3.14",
"translation-finder>=2.16,<3.0",
"user-agents>=2.0,<2.3",
"weblate-language-data>=2024.3",
Expand Down
6 changes: 4 additions & 2 deletions weblate/formats/external.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
if TYPE_CHECKING:
from collections.abc import Callable

CSV_DIALECT = "unix"


class XlsxFormat(CSVUtf8Format):
name = gettext_lazy("Excel Open XML")
Expand Down Expand Up @@ -93,7 +95,7 @@ def parse_store(self, storefile):

output = StringIO()

writer = csv.writer(output, dialect="unix")
writer = csv.writer(output, dialect=CSV_DIALECT)

# value can be None or blank stringfor cells having formatting only,
# we need to ignore such columns as that would be treated like "" fields
Expand All @@ -116,7 +118,7 @@ def parse_store(self, storefile):
content = output.getvalue().encode("utf-8")

# Load the file as CSV
return super().parse_store(NamedBytesIO(name, content))
return super().parse_store(NamedBytesIO(name, content), dialect=CSV_DIALECT)

@staticmethod
def mimetype() -> str:
Expand Down
8 changes: 6 additions & 2 deletions weblate/formats/ttkit.py
Original file line number Diff line number Diff line change
Expand Up @@ -1586,13 +1586,17 @@ def get_content_and_filename(storefile):
content = handle.read()
return content, filename

def parse_store(self, storefile):
def parse_store(self, storefile, *, dialect: None | str = None):
"""Parse the store."""
content, filename = self.get_content_and_filename(storefile)

# Parse file
store = self.get_store_instance()
store.parse(content, sample_length=40000)
store.parse(
content,
sample_length=40000 if dialect is None else None,
dialect=dialect,
)
# Did detection of headers work?
if store.fieldnames != ["location", "source", "target"]:
return store
Expand Down

0 comments on commit 36ff3de

Please sign in to comment.