/
preprocessor.py
98 lines (89 loc) · 3.49 KB
/
preprocessor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
from .. import utils as U
from ..imports import *
from ..preprocessor import Preprocessor
class ImagePreprocessor(Preprocessor):
"""
```
Image preprocessing
```
"""
def __init__(self, datagen, classes, target_size=(224, 224), color_mode="rgb"):
if not isinstance(datagen, keras.preprocessing.image.ImageDataGenerator):
raise ValueError("datagen must be instance of ImageDataGenerator")
self.datagen = datagen
self.c = classes
self.target_size = target_size
self.color_mode = color_mode
def get_preprocessor(self):
return self.datagen
def get_classes(self):
return self.c
def preprocess_test(self, data, batch_size=U.DEFAULT_BS):
"""
```
Alias for preprocess
```
"""
return self.preprocess(data, batch_size=batch_size)
def preprocess(self, data, batch_size=U.DEFAULT_BS):
"""
```
Receives raw data and returns
tuple containing the generator and steps
argument for model.predict.
```
"""
# input is an array of pixel values
if isinstance(data, np.ndarray):
generator = self.datagen.flow(data, shuffle=False)
generator.batch_size = batch_size
nsamples = len(data)
steps = math.ceil(nsamples / batch_size)
return (generator, steps)
# input is a folder of images
elif os.path.isdir(data):
folder = data
if folder[-1] != os.sep:
folder += os.sep
parent = os.path.dirname(os.path.dirname(folder))
folder_name = os.path.basename(os.path.dirname(folder))
if self.target_size is None or self.color_mode is None:
raise Exception(
"To use predict_folder, you must load the data using either "
+ "the images_from_folder function or the images_from_csv function."
)
generator = self.datagen.flow_from_directory(
parent,
classes=[folder_name],
target_size=self.target_size,
class_mode="categorical",
shuffle=False,
interpolation="bicubic",
color_mode=self.color_mode,
)
generator.batch_size = batch_size
nsamples = generator.samples
steps = math.ceil(nsamples / batch_size)
return (generator, steps)
# input is the path to an image file
elif os.path.isfile(data):
if self.target_size is None or self.color_mode is None:
raise Exception(
"To use predict_filename, you must load the data using either "
+ "the ktrain.vision.images_from_folder function or the "
+ "ktrain.vision.images_from_csv function."
)
img = keras.preprocessing.image.load_img(
data, target_size=self.target_size, color_mode=self.color_mode
)
x = keras.preprocessing.image.img_to_array(img)
x = np.expand_dims(x, axis=0)
generator = self.datagen.flow(np.array(x), shuffle=False)
generator.batch_size = batch_size
nsamples = 1
steps = math.ceil(nsamples / batch_size)
return (generator, steps)
else:
raise ValueError(
"data argument is not valid file, folder, or numpy.ndarray"
)