Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2639,7 +2639,10 @@ dataset_objects = client.get_dataset_objects(
dataset="YOUR_DATASET_NAME",
version="latest", # default is "latest"
tags=["cat"],
licenses=["MIT"]
licenses=["fastlabel"],
types=["train", "valid"], # choices are "train", "valid", "test" and "none" (Optional)
offset=0, # default is 0 (Optional)
limit=1000, # default is 1000, and must be less than 1000 (Optional)
)
```

Expand All @@ -2663,6 +2666,9 @@ client.download_dataset_objects(
version="latest", # default is "latest"
tags=["cat"],
types=["train", "valid"], # choices are "train", "valid", "test" and "none" (Optional)
licenses=["fastlabel"],
offset=0, # default is 0 (Optional)
limit=1000, # default is 1000, and must be less than 1000 (Optional)
)
```

Expand Down
78 changes: 48 additions & 30 deletions fastlabel/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@

from .api import Api
from .exceptions import FastLabelInvalidException
from .query import DatasetObjectGetQuery

logger = logging.getLogger(__name__)
logging.basicConfig(
Expand Down Expand Up @@ -3961,6 +3962,7 @@ def get_dataset_objects(
tags: Optional[List[str]] = None,
licenses: Optional[List[str]] = None,
revision_id: str = None,
types: Optional[List[Union[str, DatasetObjectType]]] = None,
offset: int = 0,
limit: int = 1000,
) -> list:
Expand All @@ -3973,6 +3975,31 @@ def get_dataset_objects(
revision_id is dataset rebision (Optional).
Only use specify one of revision_id or version.
"""
endpoint = "dataset-objects-v2"
types = [DatasetObjectType.create(type_) for type_ in types or []]
params = self._prepare_params(
dataset=dataset,
version=version,
tags=tags,
licenses=licenses,
revision_id=revision_id,
types=types,
offset=offset,
limit=limit,
)
return self.api.get_request(endpoint, params=params)

def _prepare_params(
self,
dataset: str,
offset: int,
limit: int,
version: str,
revision_id: str,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@rikunosuke 4016行目でif文でチェックしているということは、ここもOptionalですかね?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@takuya-shimada
Python の Optional[]None が来る可能性がある、を指しています。型的には str のみを期待して、4016行目は if revision_id != "": を確認している、という意味になります!

tags: Optional[List[str]],
licenses: Optional[List[str]],
types: Optional[List[DatasetObjectType]],
) -> DatasetObjectGetQuery:
if version and revision_id:
raise FastLabelInvalidException(
"only use specify one of revisionId or version.", 400
Expand All @@ -3981,56 +4008,47 @@ def get_dataset_objects(
raise FastLabelInvalidException(
"Limit must be less than or equal to 1000.", 422
)
endpoint = "dataset-objects-v2"
params = {"dataset": dataset, "offset": offset, "limit": limit}
params: DatasetObjectGetQuery = {
"dataset": dataset,
"offset": offset,
"limit": limit,
}
if revision_id:
params["revisionId"] = revision_id
if version:
params["version"] = version

tags = tags or []
if tags:
params["tags"] = tags
if licenses:
params["licenses"] = licenses
return self.api.get_request(endpoint, params=params)
if types:
params["types"] = [t.value for t in types]
return params

def download_dataset_objects(
self,
dataset: str,
path: str,
version: str = "",
revision_id: str = "",
tags: Optional[List[str]] = None,
licenses: Optional[List[str]] = None,
types: Optional[List[Union[str, DatasetObjectType]]] = None,
offset: int = 0,
limit: int = 1000,
):
endpoint = "dataset-objects-v2/signed-urls"
if limit > 1000:
raise FastLabelInvalidException(
"Limit must be less than or equal to 1000.", 422
)
params = {"dataset": dataset, "offset": offset, "limit": limit}
if version:
params["version"] = version
if tags:
params["tags"] = tags
if types:
try:
types = list(
map(
lambda t: t
if isinstance(t, DatasetObjectType)
else DatasetObjectType(t),
types,
)
)
except ValueError:
raise FastLabelInvalidException(
f"types must be {[k for k in DatasetObjectType.__members__.keys()]}.",
422,
)
params["types"] = [t.value for t in types]
types = [DatasetObjectType.create(type_) for type_ in types or []]
params = self._prepare_params(
dataset=dataset,
offset=offset,
limit=limit,
version=version,
revision_id=revision_id,
tags=tags,
types=types,
licenses=licenses,
)

response = self.api.get_request(endpoint, params=params)

Expand Down
12 changes: 12 additions & 0 deletions fastlabel/const.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,3 +254,15 @@ class DatasetObjectType(Enum):
train = "train"
valid = "valid"
test = "test"

@classmethod
def create(cls, value: "str | DatasetObjectType") -> "DatasetObjectType":
if isinstance(value, cls):
return value
try:
return cls(value)
except ValueError:
raise ValueError(
f"Invalid DatasetObjectType: {value}. "
f"types must be {[k for k in DatasetObjectType.__members__.keys()]}"
)
12 changes: 12 additions & 0 deletions fastlabel/query.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
from typing import List, Optional, TypedDict


class DatasetObjectGetQuery(TypedDict, total=False):
dataset: str
version: str
revisionId: str
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ここもOptional

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Python の Optional[] は None が来る可能性がある、を指しています。revisionId は値がある場合は全て str なので、この Optional はつけないが正しいはずです!

tags: Optional[List[str]]
licenses: Optional[List[str]]
types: Optional[List[str]]
offset: int
limit: int