/
imagefolder.py
104 lines (93 loc) 路 1.93 KB
/
imagefolder.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
from typing import List
import datasets
from datasets.tasks import ImageClassification
from ..folder_based_builder import folder_based_builder
logger = datasets.utils.logging.get_logger(__name__)
class ImageFolderConfig(folder_based_builder.FolderBasedBuilderConfig):
"""BuilderConfig for ImageFolder."""
drop_labels: bool = None
drop_metadata: bool = None
class ImageFolder(folder_based_builder.FolderBasedBuilder):
BASE_FEATURE = datasets.Image()
BASE_COLUMN_NAME = "image"
BUILDER_CONFIG_CLASS = ImageFolderConfig
EXTENSIONS: List[str] # definition at the bottom of the script
CLASSIFICATION_TASK = ImageClassification(image_column="image", label_column="label")
# Obtained with:
# ```
# import PIL.Image
# IMAGE_EXTENSIONS = []
# PIL.Image.init()
# for ext, format in PIL.Image.EXTENSION.items():
# if format in PIL.Image.OPEN:
# IMAGE_EXTENSIONS.append(ext[1:])
# ```
# We intentionally do not run this code on launch because:
# (1) Pillow is an optional dependency, so importing Pillow in global namespace is not allowed
# (2) To ensure the list of supported extensions is deterministic
IMAGE_EXTENSIONS = [
".blp",
".bmp",
".dib",
".bufr",
".cur",
".pcx",
".dcx",
".dds",
".ps",
".eps",
".fit",
".fits",
".fli",
".flc",
".ftc",
".ftu",
".gbr",
".gif",
".grib",
".h5",
".hdf",
".png",
".apng",
".jp2",
".j2k",
".jpc",
".jpf",
".jpx",
".j2c",
".icns",
".ico",
".im",
".iim",
".tif",
".tiff",
".jfif",
".jpe",
".jpg",
".jpeg",
".mpg",
".mpeg",
".msp",
".pcd",
".pxr",
".pbm",
".pgm",
".ppm",
".pnm",
".psd",
".bw",
".rgb",
".rgba",
".sgi",
".ras",
".tga",
".icb",
".vda",
".vst",
".webp",
".wmf",
".emf",
".xbm",
".xpm",
]
ImageFolder.EXTENSIONS = IMAGE_EXTENSIONS