Skip to content

Commit

Permalink
fix webdataset filename split
Browse files Browse the repository at this point in the history
  • Loading branch information
Bowser1704 committed Apr 29, 2024
1 parent 22bf538 commit 3782bd5
Showing 1 changed file with 3 additions and 1 deletion.
4 changes: 3 additions & 1 deletion src/datasets/packaged_modules/webdataset/webdataset.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import io
import json
import os
from itertools import islice
from typing import Any, Callable, Dict, List

Expand All @@ -24,7 +25,8 @@ def _get_pipeline_from_tar(cls, tar_path, tar_iterator):
current_example = {}
for filename, f in tar_iterator:
if "." in filename:
example_key, field_name = filename.split(".", 1)
example_key, field_name = os.path.splitext(filename)
field_name = field_name.lstrip(".")
if current_example and current_example["__key__"] != example_key:
yield current_example
current_example = {}
Expand Down

0 comments on commit 3782bd5

Please sign in to comment.